1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "diagnostic-core.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*stmt_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
81 tree vectype
= stmt_vectype (stmt_info
);
82 add_stmt_info_to_vec (stmt_cost_vec
, count
, kind
,
83 STMT_VINFO_STMT (stmt_info
), misalign
);
85 (targetm
.vectorize
.builtin_vectorization_cost (kind
, vectype
, misalign
)
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
, misalign
);
104 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
109 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
113 /* ARRAY is an array of vectors created by create_vector_array.
114 Return an SSA_NAME for the vector in index N. The reference
115 is part of the vectorization of STMT and the vector is associated
116 with scalar destination SCALAR_DEST. */
119 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
120 tree array
, unsigned HOST_WIDE_INT n
)
122 tree vect_type
, vect
, vect_name
, array_ref
;
125 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
126 vect_type
= TREE_TYPE (TREE_TYPE (array
));
127 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
128 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
129 build_int_cst (size_type_node
, n
),
130 NULL_TREE
, NULL_TREE
);
132 new_stmt
= gimple_build_assign (vect
, array_ref
);
133 vect_name
= make_ssa_name (vect
, new_stmt
);
134 gimple_assign_set_lhs (new_stmt
, vect_name
);
135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
140 /* ARRAY is an array of vectors created by create_vector_array.
141 Emit code to store SSA_NAME VECT in index N of the array.
142 The store is part of the vectorization of STMT. */
145 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
146 tree array
, unsigned HOST_WIDE_INT n
)
151 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
152 build_int_cst (size_type_node
, n
),
153 NULL_TREE
, NULL_TREE
);
155 new_stmt
= gimple_build_assign (array_ref
, vect
);
156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
159 /* PTR is a pointer to an array of type TYPE. Return a representation
160 of *PTR. The memory reference replaces those in FIRST_DR
164 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
166 tree mem_ref
, alias_ptr_type
;
168 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
169 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
183 enum vect_relevant relevant
, bool live_p
,
184 bool used_in_pattern
)
186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
187 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
188 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
191 if (vect_print_dump_info (REPORT_DETAILS
))
192 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
194 /* If this stmt is an original stmt in a pattern, we might need to mark its
195 related pattern stmt instead of the original stmt. However, such stmts
196 may have their own uses that are not in any pattern, in such cases the
197 stmt itself should be marked. */
198 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
201 if (!used_in_pattern
)
203 imm_use_iterator imm_iter
;
207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
208 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
210 if (is_gimple_assign (stmt
))
211 lhs
= gimple_assign_lhs (stmt
);
213 lhs
= gimple_call_lhs (stmt
);
215 /* This use is out of pattern use, if LHS has other uses that are
216 pattern uses, we should mark the stmt itself, and not the pattern
218 if (TREE_CODE (lhs
) == SSA_NAME
)
219 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
221 if (is_gimple_debug (USE_STMT (use_p
)))
223 use_stmt
= USE_STMT (use_p
);
225 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
228 if (vinfo_for_stmt (use_stmt
)
229 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
239 /* This is the last stmt in a sequence that was detected as a
240 pattern that can potentially be vectorized. Don't mark the stmt
241 as relevant/live because it's not going to be vectorized.
242 Instead mark the pattern-stmt that replaces it. */
244 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
246 if (vect_print_dump_info (REPORT_DETAILS
))
247 fprintf (vect_dump
, "last stmt in pattern. don't mark"
249 stmt_info
= vinfo_for_stmt (pattern_stmt
);
250 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
251 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
252 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
257 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
258 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
259 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
261 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
262 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
264 if (vect_print_dump_info (REPORT_DETAILS
))
265 fprintf (vect_dump
, "already marked relevant/live.");
269 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
273 /* Function vect_stmt_relevant_p.
275 Return true if STMT in loop that is represented by LOOP_VINFO is
276 "relevant for vectorization".
278 A stmt is considered "relevant for vectorization" if:
279 - it has uses outside the loop.
280 - it has vdefs (it alters memory).
281 - control stmts in the loop (except for the exit condition).
283 CHECKME: what other side effects would the vectorizer allow? */
286 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
287 enum vect_relevant
*relevant
, bool *live_p
)
289 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
291 imm_use_iterator imm_iter
;
295 *relevant
= vect_unused_in_scope
;
298 /* cond stmt other than loop exit cond. */
299 if (is_ctrl_stmt (stmt
)
300 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
301 != loop_exit_ctrl_vec_info_type
)
302 *relevant
= vect_used_in_scope
;
304 /* changing memory. */
305 if (gimple_code (stmt
) != GIMPLE_PHI
)
306 if (gimple_vdef (stmt
))
308 if (vect_print_dump_info (REPORT_DETAILS
))
309 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
310 *relevant
= vect_used_in_scope
;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
316 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
318 basic_block bb
= gimple_bb (USE_STMT (use_p
));
319 if (!flow_bb_inside_loop_p (loop
, bb
))
321 if (vect_print_dump_info (REPORT_DETAILS
))
322 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
324 if (is_gimple_debug (USE_STMT (use_p
)))
327 /* We expect all such uses to be in the loop exit phis
328 (because of loop closed form) */
329 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
330 gcc_assert (bb
== single_exit (loop
)->dest
);
337 return (*live_p
|| *relevant
);
341 /* Function exist_non_indexing_operands_for_use_p
343 USE is one of the uses attached to STMT. Check if USE is
344 used in STMT for anything other than indexing an array. */
347 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
350 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
352 /* USE corresponds to some operand in STMT. If there is no data
353 reference in STMT, then any operand that corresponds to USE
354 is not indexing an array. */
355 if (!STMT_VINFO_DATA_REF (stmt_info
))
358 /* STMT has a data_ref. FORNOW this means that its of one of
362 (This should have been verified in analyze_data_refs).
364 'var' in the second case corresponds to a def, not a use,
365 so USE cannot correspond to any operands that are not used
368 Therefore, all we need to check is if STMT falls into the
369 first case, and whether var corresponds to USE. */
371 if (!gimple_assign_copy_p (stmt
))
373 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
375 operand
= gimple_assign_rhs1 (stmt
);
376 if (TREE_CODE (operand
) != SSA_NAME
)
387 Function process_use.
390 - a USE in STMT in a loop represented by LOOP_VINFO
391 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
392 that defined USE. This is done by calling mark_relevant and passing it
393 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
394 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
398 Generally, LIVE_P and RELEVANT are used to define the liveness and
399 relevance info of the DEF_STMT of this USE:
400 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
401 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
403 - case 1: If USE is used only for address computations (e.g. array indexing),
404 which does not need to be directly vectorized, then the liveness/relevance
405 of the respective DEF_STMT is left unchanged.
406 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
407 skip DEF_STMT cause it had already been processed.
408 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
409 be modified accordingly.
411 Return true if everything is as expected. Return false otherwise. */
414 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
415 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
418 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
419 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
420 stmt_vec_info dstmt_vinfo
;
421 basic_block bb
, def_bb
;
424 enum vect_def_type dt
;
426 /* case 1: we are only interested in uses that need to be vectorized. Uses
427 that are used for address computation are not considered relevant. */
428 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
431 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
434 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
438 if (!def_stmt
|| gimple_nop_p (def_stmt
))
441 def_bb
= gimple_bb (def_stmt
);
442 if (!flow_bb_inside_loop_p (loop
, def_bb
))
444 if (vect_print_dump_info (REPORT_DETAILS
))
445 fprintf (vect_dump
, "def_stmt is out of loop.");
449 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
450 DEF_STMT must have already been processed, because this should be the
451 only way that STMT, which is a reduction-phi, was put in the worklist,
452 as there should be no other uses for DEF_STMT in the loop. So we just
453 check that everything is as expected, and we are done. */
454 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
455 bb
= gimple_bb (stmt
);
456 if (gimple_code (stmt
) == GIMPLE_PHI
457 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
458 && gimple_code (def_stmt
) != GIMPLE_PHI
459 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
460 && bb
->loop_father
== def_bb
->loop_father
)
462 if (vect_print_dump_info (REPORT_DETAILS
))
463 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
464 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
465 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
466 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
467 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
468 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
472 /* case 3a: outer-loop stmt defining an inner-loop stmt:
473 outer-loop-header-bb:
479 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
481 if (vect_print_dump_info (REPORT_DETAILS
))
482 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
486 case vect_unused_in_scope
:
487 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
488 vect_used_in_scope
: vect_unused_in_scope
;
491 case vect_used_in_outer_by_reduction
:
492 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
493 relevant
= vect_used_by_reduction
;
496 case vect_used_in_outer
:
497 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
498 relevant
= vect_used_in_scope
;
501 case vect_used_in_scope
:
509 /* case 3b: inner-loop stmt defining an outer-loop stmt:
510 outer-loop-header-bb:
514 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
516 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
518 if (vect_print_dump_info (REPORT_DETAILS
))
519 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
523 case vect_unused_in_scope
:
524 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
525 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
526 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
529 case vect_used_by_reduction
:
530 relevant
= vect_used_in_outer_by_reduction
;
533 case vect_used_in_scope
:
534 relevant
= vect_used_in_outer
;
542 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
543 is_pattern_stmt_p (stmt_vinfo
));
548 /* Function vect_mark_stmts_to_be_vectorized.
550 Not all stmts in the loop need to be vectorized. For example:
559 Stmt 1 and 3 do not need to be vectorized, because loop control and
560 addressing of vectorized data-refs are handled differently.
562 This pass detects such stmts. */
565 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
567 VEC(gimple
,heap
) *worklist
;
568 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
569 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
570 unsigned int nbbs
= loop
->num_nodes
;
571 gimple_stmt_iterator si
;
574 stmt_vec_info stmt_vinfo
;
578 enum vect_relevant relevant
, tmp_relevant
;
579 enum vect_def_type def_type
;
581 if (vect_print_dump_info (REPORT_DETAILS
))
582 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
584 worklist
= VEC_alloc (gimple
, heap
, 64);
586 /* 1. Init worklist. */
587 for (i
= 0; i
< nbbs
; i
++)
590 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
593 if (vect_print_dump_info (REPORT_DETAILS
))
595 fprintf (vect_dump
, "init: phi relevant? ");
596 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
599 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
600 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
602 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
604 stmt
= gsi_stmt (si
);
605 if (vect_print_dump_info (REPORT_DETAILS
))
607 fprintf (vect_dump
, "init: stmt relevant? ");
608 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
611 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
612 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
616 /* 2. Process_worklist */
617 while (VEC_length (gimple
, worklist
) > 0)
622 stmt
= VEC_pop (gimple
, worklist
);
623 if (vect_print_dump_info (REPORT_DETAILS
))
625 fprintf (vect_dump
, "worklist: examine stmt: ");
626 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
629 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
630 (DEF_STMT) as relevant/irrelevant and live/dead according to the
631 liveness and relevance properties of STMT. */
632 stmt_vinfo
= vinfo_for_stmt (stmt
);
633 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
634 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
636 /* Generally, the liveness and relevance properties of STMT are
637 propagated as is to the DEF_STMTs of its USEs:
638 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
639 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
641 One exception is when STMT has been identified as defining a reduction
642 variable; in this case we set the liveness/relevance as follows:
644 relevant = vect_used_by_reduction
645 This is because we distinguish between two kinds of relevant stmts -
646 those that are used by a reduction computation, and those that are
647 (also) used by a regular computation. This allows us later on to
648 identify stmts that are used solely by a reduction, and therefore the
649 order of the results that they produce does not have to be kept. */
651 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
652 tmp_relevant
= relevant
;
655 case vect_reduction_def
:
656 switch (tmp_relevant
)
658 case vect_unused_in_scope
:
659 relevant
= vect_used_by_reduction
;
662 case vect_used_by_reduction
:
663 if (gimple_code (stmt
) == GIMPLE_PHI
)
668 if (vect_print_dump_info (REPORT_DETAILS
))
669 fprintf (vect_dump
, "unsupported use of reduction.");
671 VEC_free (gimple
, heap
, worklist
);
678 case vect_nested_cycle
:
679 if (tmp_relevant
!= vect_unused_in_scope
680 && tmp_relevant
!= vect_used_in_outer_by_reduction
681 && tmp_relevant
!= vect_used_in_outer
)
683 if (vect_print_dump_info (REPORT_DETAILS
))
684 fprintf (vect_dump
, "unsupported use of nested cycle.");
686 VEC_free (gimple
, heap
, worklist
);
693 case vect_double_reduction_def
:
694 if (tmp_relevant
!= vect_unused_in_scope
695 && tmp_relevant
!= vect_used_by_reduction
)
697 if (vect_print_dump_info (REPORT_DETAILS
))
698 fprintf (vect_dump
, "unsupported use of double reduction.");
700 VEC_free (gimple
, heap
, worklist
);
711 if (is_pattern_stmt_p (stmt_vinfo
))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (is_gimple_assign (stmt
))
718 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
719 tree op
= gimple_assign_rhs1 (stmt
);
722 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
725 live_p
, relevant
, &worklist
, false)
726 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
727 live_p
, relevant
, &worklist
, false))
729 VEC_free (gimple
, heap
, worklist
);
734 for (; i
< gimple_num_ops (stmt
); i
++)
736 op
= gimple_op (stmt
, i
);
737 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
740 VEC_free (gimple
, heap
, worklist
);
745 else if (is_gimple_call (stmt
))
747 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
749 tree arg
= gimple_call_arg (stmt
, i
);
750 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
753 VEC_free (gimple
, heap
, worklist
);
760 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
762 tree op
= USE_FROM_PTR (use_p
);
763 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
766 VEC_free (gimple
, heap
, worklist
);
771 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
774 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
776 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
779 VEC_free (gimple
, heap
, worklist
);
783 } /* while worklist */
785 VEC_free (gimple
, heap
, worklist
);
790 /* Function vect_model_simple_cost.
792 Models cost for simple operations, i.e. those that only emit ncopies of a
793 single op. Right now, this does not account for multiple insns that could
794 be generated for the single vector op. We will handle that shortly. */
797 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
, slp_tree slp_node
,
799 stmt_vector_for_cost
*stmt_cost_vec
)
802 int inside_cost
= 0, outside_cost
= 0;
804 /* The SLP costs were already calculated during SLP tree build. */
805 if (PURE_SLP_STMT (stmt_info
))
808 /* FORNOW: Assuming maximum 2 args per stmts. */
809 for (i
= 0; i
< 2; i
++)
811 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
812 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
815 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
818 /* Pass the inside-of-loop statements to the target-specific cost model. */
819 inside_cost
= record_stmt_cost (stmt_cost_vec
, ncopies
, vector_stmt
,
822 if (vect_print_dump_info (REPORT_COST
))
823 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
824 "outside_cost = %d .", inside_cost
, outside_cost
);
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
835 enum vect_def_type
*dt
, int pwr
)
838 int inside_cost
= 0, outside_cost
= 0;
839 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
840 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
841 void *target_cost_data
;
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info
))
848 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
850 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
856 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
857 vec_promote_demote
, stmt_info
, 0);
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
867 if (vect_print_dump_info (REPORT_COST
))
868 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
869 "outside_cost = %d .", inside_cost
, outside_cost
);
871 /* Set the costs in STMT_INFO. */
872 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, NULL
, outside_cost
);
875 /* Function vect_cost_group_size
877 For grouped load or store, return the group_size only if it is the first
878 load or store of a group, else return 1. This ensures that group size is
879 only returned once per group. */
882 vect_cost_group_size (stmt_vec_info stmt_info
)
884 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
886 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
887 return GROUP_SIZE (stmt_info
);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
899 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
900 bool store_lanes_p
, enum vect_def_type dt
,
901 slp_tree slp_node
, stmt_vector_for_cost
*stmt_cost_vec
)
904 unsigned int inside_cost
= 0, outside_cost
= 0;
905 struct data_reference
*first_dr
;
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info
))
912 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
913 outside_cost
= vect_get_stmt_cost (scalar_to_vec
);
915 /* Grouped access? */
916 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
920 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
925 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
926 group_size
= vect_cost_group_size (stmt_info
);
929 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
931 /* Not a grouped access. */
935 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
938 /* We assume that the cost of a single store-lanes instruction is
939 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
940 access is instead being provided by a permute-and-store operation,
941 include the cost of the permutes. */
942 if (!store_lanes_p
&& group_size
> 1)
944 /* Uses a high and low interleave operation for each needed permute. */
946 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
947 inside_cost
= record_stmt_cost (stmt_cost_vec
, nstmts
, vec_perm
,
950 if (vect_print_dump_info (REPORT_COST
))
951 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
955 /* Costs of the stores. */
956 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, stmt_cost_vec
);
958 if (vect_print_dump_info (REPORT_COST
))
959 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
960 "outside_cost = %d .", inside_cost
, outside_cost
);
962 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
963 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
967 /* Calculate cost of DR's memory access. */
969 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
970 unsigned int *inside_cost
,
971 stmt_vector_for_cost
*stmt_cost_vec
)
973 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
974 gimple stmt
= DR_STMT (dr
);
975 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
977 switch (alignment_support_scheme
)
981 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
982 vector_store
, stmt_info
, 0);
984 if (vect_print_dump_info (REPORT_COST
))
985 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
990 case dr_unaligned_supported
:
992 /* Here, we assign an additional cost for the unaligned store. */
993 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
994 unaligned_store
, stmt_info
,
995 DR_MISALIGNMENT (dr
));
997 if (vect_print_dump_info (REPORT_COST
))
998 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
1004 case dr_unaligned_unsupported
:
1006 *inside_cost
= VECT_MAX_COST
;
1008 if (vect_print_dump_info (REPORT_COST
))
1009 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
1020 /* Function vect_model_load_cost
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1028 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, bool load_lanes_p
,
1029 slp_tree slp_node
, stmt_vector_for_cost
*stmt_cost_vec
)
1033 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1034 unsigned int inside_cost
= 0, outside_cost
= 0;
1036 /* The SLP costs were already calculated during SLP tree build. */
1037 if (PURE_SLP_STMT (stmt_info
))
1040 /* Grouped accesses? */
1041 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1042 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1044 group_size
= vect_cost_group_size (stmt_info
);
1045 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1047 /* Not a grouped access. */
1054 /* We assume that the cost of a single load-lanes instruction is
1055 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1056 access is instead being provided by a load-and-permute operation,
1057 include the cost of the permutes. */
1058 if (!load_lanes_p
&& group_size
> 1)
1060 /* Uses an even and odd extract operations for each needed permute. */
1061 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1062 inside_cost
+= record_stmt_cost (stmt_cost_vec
, nstmts
, vec_perm
,
1065 if (vect_print_dump_info (REPORT_COST
))
1066 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1070 /* The loads themselves. */
1071 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1073 /* N scalar loads plus gathering them into a vector. */
1074 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1075 inside_cost
+= record_stmt_cost (stmt_cost_vec
,
1076 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1077 scalar_load
, stmt_info
, 0);
1078 inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
, vec_construct
,
1082 vect_get_load_cost (first_dr
, ncopies
,
1083 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1084 || group_size
> 1 || slp_node
),
1085 &inside_cost
, &outside_cost
, stmt_cost_vec
);
1087 if (vect_print_dump_info (REPORT_COST
))
1088 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1089 "outside_cost = %d .", inside_cost
, outside_cost
);
1091 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1092 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
1096 /* Calculate cost of DR's memory access. */
1098 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1099 bool add_realign_cost
, unsigned int *inside_cost
,
1100 unsigned int *outside_cost
,
1101 stmt_vector_for_cost
*stmt_cost_vec
)
1103 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1104 gimple stmt
= DR_STMT (dr
);
1105 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1107 switch (alignment_support_scheme
)
1111 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1112 vector_load
, stmt_info
, 0);
1114 if (vect_print_dump_info (REPORT_COST
))
1115 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1119 case dr_unaligned_supported
:
1121 /* Here, we assign an additional cost for the unaligned load. */
1122 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1123 unaligned_load
, stmt_info
,
1124 DR_MISALIGNMENT (dr
));
1126 if (vect_print_dump_info (REPORT_COST
))
1127 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1132 case dr_explicit_realign
:
1134 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
* 2,
1135 vector_load
, stmt_info
, 0);
1136 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1137 vec_perm
, stmt_info
, 0);
1139 /* FIXME: If the misalignment remains fixed across the iterations of
1140 the containing loop, the following cost should be added to the
1142 if (targetm
.vectorize
.builtin_mask_for_load
)
1143 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, 1, vector_stmt
,
1146 if (vect_print_dump_info (REPORT_COST
))
1147 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1151 case dr_explicit_realign_optimized
:
1153 if (vect_print_dump_info (REPORT_COST
))
1154 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1157 /* Unaligned software pipeline has a load of an address, an initial
1158 load, and possibly a mask operation to "prime" the loop. However,
1159 if this is an access in a group of loads, which provide grouped
1160 access, then the above cost should only be considered for one
1161 access in the group. Inside the loop, there is a load op
1162 and a realignment op. */
1164 if (add_realign_cost
)
1166 *outside_cost
= 2 * vect_get_stmt_cost (vector_stmt
);
1167 if (targetm
.vectorize
.builtin_mask_for_load
)
1168 *outside_cost
+= vect_get_stmt_cost (vector_stmt
);
1171 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1172 vector_load
, stmt_info
, 0);
1173 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1174 vec_perm
, stmt_info
, 0);
1176 if (vect_print_dump_info (REPORT_COST
))
1178 "vect_model_load_cost: explicit realign optimized");
1183 case dr_unaligned_unsupported
:
1185 *inside_cost
= VECT_MAX_COST
;
1187 if (vect_print_dump_info (REPORT_COST
))
1188 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1198 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1199 the loop preheader for the vectorized stmt STMT. */
1202 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1205 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1208 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1213 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1217 if (nested_in_vect_loop_p (loop
, stmt
))
1220 pe
= loop_preheader_edge (loop
);
1221 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1222 gcc_assert (!new_bb
);
1226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1228 gimple_stmt_iterator gsi_bb_start
;
1230 gcc_assert (bb_vinfo
);
1231 bb
= BB_VINFO_BB (bb_vinfo
);
1232 gsi_bb_start
= gsi_after_labels (bb
);
1233 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1237 if (vect_print_dump_info (REPORT_DETAILS
))
1239 fprintf (vect_dump
, "created new init_stmt: ");
1240 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1244 /* Function vect_init_vector.
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1255 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1262 if (TREE_CODE (type
) == VECTOR_TYPE
1263 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1265 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1267 if (CONSTANT_CLASS_P (val
))
1268 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1271 new_var
= create_tmp_reg (TREE_TYPE (type
), NULL
);
1272 add_referenced_var (new_var
);
1273 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1276 new_temp
= make_ssa_name (new_var
, init_stmt
);
1277 gimple_assign_set_lhs (init_stmt
, new_temp
);
1278 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1282 val
= build_vector_from_val (type
, val
);
1285 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1286 add_referenced_var (new_var
);
1287 init_stmt
= gimple_build_assign (new_var
, val
);
1288 new_temp
= make_ssa_name (new_var
, init_stmt
);
1289 gimple_assign_set_lhs (init_stmt
, new_temp
);
1290 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1291 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1296 /* Function vect_get_vec_def_for_operand.
1298 OP is an operand in STMT. This function returns a (vector) def that will be
1299 used in the vectorized stmt for STMT.
1301 In the case that OP is an SSA_NAME which is defined in the loop, then
1302 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1304 In case OP is an invariant or constant, a new stmt that creates a vector def
1305 needs to be introduced. */
1308 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1313 stmt_vec_info def_stmt_info
= NULL
;
1314 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1315 unsigned int nunits
;
1316 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1318 enum vect_def_type dt
;
1322 if (vect_print_dump_info (REPORT_DETAILS
))
1324 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1325 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1328 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1329 &def_stmt
, &def
, &dt
);
1330 gcc_assert (is_simple_use
);
1331 if (vect_print_dump_info (REPORT_DETAILS
))
1335 fprintf (vect_dump
, "def = ");
1336 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1340 fprintf (vect_dump
, " def_stmt = ");
1341 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1347 /* Case 1: operand is a constant. */
1348 case vect_constant_def
:
1350 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1351 gcc_assert (vector_type
);
1352 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1357 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1358 if (vect_print_dump_info (REPORT_DETAILS
))
1359 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1361 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1364 /* Case 2: operand is defined outside the loop - loop invariant. */
1365 case vect_external_def
:
1367 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1368 gcc_assert (vector_type
);
1373 /* Create 'vec_inv = {inv,inv,..,inv}' */
1374 if (vect_print_dump_info (REPORT_DETAILS
))
1375 fprintf (vect_dump
, "Create vector_inv.");
1377 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1380 /* Case 3: operand is defined inside the loop. */
1381 case vect_internal_def
:
1384 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1389 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1390 /* Get vectorized pattern statement. */
1392 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1393 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1394 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1395 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1396 gcc_assert (vec_stmt
);
1397 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1398 vec_oprnd
= PHI_RESULT (vec_stmt
);
1399 else if (is_gimple_call (vec_stmt
))
1400 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1402 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1406 /* Case 4: operand is defined by a loop header phi - reduction */
1407 case vect_reduction_def
:
1408 case vect_double_reduction_def
:
1409 case vect_nested_cycle
:
1413 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1414 loop
= (gimple_bb (def_stmt
))->loop_father
;
1416 /* Get the def before the loop */
1417 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1418 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1421 /* Case 5: operand is defined by loop-header phi - induction. */
1422 case vect_induction_def
:
1424 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1426 /* Get the def from the vectorized stmt. */
1427 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1428 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1429 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1430 vec_oprnd
= PHI_RESULT (vec_stmt
);
1432 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1442 /* Function vect_get_vec_def_for_stmt_copy
1444 Return a vector-def for an operand. This function is used when the
1445 vectorized stmt to be created (by the caller to this function) is a "copy"
1446 created in case the vectorized result cannot fit in one vector, and several
1447 copies of the vector-stmt are required. In this case the vector-def is
1448 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1449 of the stmt that defines VEC_OPRND.
1450 DT is the type of the vector def VEC_OPRND.
1453 In case the vectorization factor (VF) is bigger than the number
1454 of elements that can fit in a vectype (nunits), we have to generate
1455 more than one vector stmt to vectorize the scalar stmt. This situation
1456 arises when there are multiple data-types operated upon in the loop; the
1457 smallest data-type determines the VF, and as a result, when vectorizing
1458 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1459 vector stmt (each computing a vector of 'nunits' results, and together
1460 computing 'VF' results in each iteration). This function is called when
1461 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1462 which VF=16 and nunits=4, so the number of copies required is 4):
1464 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1466 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1467 VS1.1: vx.1 = memref1 VS1.2
1468 VS1.2: vx.2 = memref2 VS1.3
1469 VS1.3: vx.3 = memref3
1471 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1472 VSnew.1: vz1 = vx.1 + ... VSnew.2
1473 VSnew.2: vz2 = vx.2 + ... VSnew.3
1474 VSnew.3: vz3 = vx.3 + ...
1476 The vectorization of S1 is explained in vectorizable_load.
1477 The vectorization of S2:
1478 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1479 the function 'vect_get_vec_def_for_operand' is called to
1480 get the relevant vector-def for each operand of S2. For operand x it
1481 returns the vector-def 'vx.0'.
1483 To create the remaining copies of the vector-stmt (VSnew.j), this
1484 function is called to get the relevant vector-def for each operand. It is
1485 obtained from the respective VS1.j stmt, which is recorded in the
1486 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1488 For example, to obtain the vector-def 'vx.1' in order to create the
1489 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1490 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1491 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1492 and return its def ('vx.1').
1493 Overall, to create the above sequence this function will be called 3 times:
1494 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1495 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1496 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1499 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1501 gimple vec_stmt_for_operand
;
1502 stmt_vec_info def_stmt_info
;
1504 /* Do nothing; can reuse same def. */
1505 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1508 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1509 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1510 gcc_assert (def_stmt_info
);
1511 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1512 gcc_assert (vec_stmt_for_operand
);
1513 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1514 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1515 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1517 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1522 /* Get vectorized definitions for the operands to create a copy of an original
1523 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1526 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1527 VEC(tree
,heap
) **vec_oprnds0
,
1528 VEC(tree
,heap
) **vec_oprnds1
)
1530 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1532 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1533 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1535 if (vec_oprnds1
&& *vec_oprnds1
)
1537 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1538 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1539 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1544 /* Get vectorized definitions for OP0 and OP1.
1545 REDUC_INDEX is the index of reduction operand in case of reduction,
1546 and -1 otherwise. */
1549 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1550 VEC (tree
, heap
) **vec_oprnds0
,
1551 VEC (tree
, heap
) **vec_oprnds1
,
1552 slp_tree slp_node
, int reduc_index
)
1556 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1557 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1558 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1560 VEC_quick_push (tree
, ops
, op0
);
1562 VEC_quick_push (tree
, ops
, op1
);
1564 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1566 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1568 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1570 VEC_free (tree
, heap
, ops
);
1571 VEC_free (slp_void_p
, heap
, vec_defs
);
1577 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1578 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1579 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1583 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1584 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1585 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1591 /* Function vect_finish_stmt_generation.
1593 Insert a new stmt. */
1596 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1597 gimple_stmt_iterator
*gsi
)
1599 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1600 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1601 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1603 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1605 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1607 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1610 if (vect_print_dump_info (REPORT_DETAILS
))
1612 fprintf (vect_dump
, "add new stmt: ");
1613 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1616 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1619 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1620 a function declaration if the target has a vectorized version
1621 of the function, or NULL_TREE if the function cannot be vectorized. */
1624 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1626 tree fndecl
= gimple_call_fndecl (call
);
1628 /* We only handle functions that do not read or clobber memory -- i.e.
1629 const or novops ones. */
1630 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1634 || TREE_CODE (fndecl
) != FUNCTION_DECL
1635 || !DECL_BUILT_IN (fndecl
))
1638 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1642 /* Function vectorizable_call.
1644 Check if STMT performs a function call that can be vectorized.
1645 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1646 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1647 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1650 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1656 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1657 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1658 tree vectype_out
, vectype_in
;
1661 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1662 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1663 tree fndecl
, new_temp
, def
, rhs_type
;
1665 enum vect_def_type dt
[3]
1666 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1667 gimple new_stmt
= NULL
;
1669 VEC(tree
, heap
) *vargs
= NULL
;
1670 enum { NARROW
, NONE
, WIDEN
} modifier
;
1674 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1677 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1680 /* Is STMT a vectorizable call? */
1681 if (!is_gimple_call (stmt
))
1684 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1687 if (stmt_can_throw_internal (stmt
))
1690 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1692 /* Process function arguments. */
1693 rhs_type
= NULL_TREE
;
1694 vectype_in
= NULL_TREE
;
1695 nargs
= gimple_call_num_args (stmt
);
1697 /* Bail out if the function has more than three arguments, we do not have
1698 interesting builtin functions to vectorize with more than two arguments
1699 except for fma. No arguments is also not good. */
1700 if (nargs
== 0 || nargs
> 3)
1703 for (i
= 0; i
< nargs
; i
++)
1707 op
= gimple_call_arg (stmt
, i
);
1709 /* We can only handle calls with arguments of the same type. */
1711 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1713 if (vect_print_dump_info (REPORT_DETAILS
))
1714 fprintf (vect_dump
, "argument types differ.");
1718 rhs_type
= TREE_TYPE (op
);
1720 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1721 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1723 if (vect_print_dump_info (REPORT_DETAILS
))
1724 fprintf (vect_dump
, "use not simple.");
1729 vectype_in
= opvectype
;
1731 && opvectype
!= vectype_in
)
1733 if (vect_print_dump_info (REPORT_DETAILS
))
1734 fprintf (vect_dump
, "argument vector types differ.");
1738 /* If all arguments are external or constant defs use a vector type with
1739 the same size as the output vector type. */
1741 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1743 gcc_assert (vectype_in
);
1746 if (vect_print_dump_info (REPORT_DETAILS
))
1748 fprintf (vect_dump
, "no vectype for scalar type ");
1749 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1756 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1757 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1758 if (nunits_in
== nunits_out
/ 2)
1760 else if (nunits_out
== nunits_in
)
1762 else if (nunits_out
== nunits_in
/ 2)
1767 /* For now, we only vectorize functions if a target specific builtin
1768 is available. TODO -- in some cases, it might be profitable to
1769 insert the calls for pieces of the vector, in order to be able
1770 to vectorize other operations in the loop. */
1771 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1772 if (fndecl
== NULL_TREE
)
1774 if (vect_print_dump_info (REPORT_DETAILS
))
1775 fprintf (vect_dump
, "function is not vectorizable.");
1780 gcc_assert (!gimple_vuse (stmt
));
1782 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1784 else if (modifier
== NARROW
)
1785 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1787 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1789 /* Sanity check: make sure that at least one copy of the vectorized stmt
1790 needs to be generated. */
1791 gcc_assert (ncopies
>= 1);
1793 if (!vec_stmt
) /* transformation not required. */
1795 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1796 if (vect_print_dump_info (REPORT_DETAILS
))
1797 fprintf (vect_dump
, "=== vectorizable_call ===");
1798 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1804 if (vect_print_dump_info (REPORT_DETAILS
))
1805 fprintf (vect_dump
, "transform call.");
1808 scalar_dest
= gimple_call_lhs (stmt
);
1809 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1811 prev_stmt_info
= NULL
;
1815 for (j
= 0; j
< ncopies
; ++j
)
1817 /* Build argument list for the vectorized call. */
1819 vargs
= VEC_alloc (tree
, heap
, nargs
);
1821 VEC_truncate (tree
, vargs
, 0);
1825 VEC (slp_void_p
, heap
) *vec_defs
1826 = VEC_alloc (slp_void_p
, heap
, nargs
);
1827 VEC (tree
, heap
) *vec_oprnds0
;
1829 for (i
= 0; i
< nargs
; i
++)
1830 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1831 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1833 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1835 /* Arguments are ready. Create the new vector stmt. */
1836 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1839 for (k
= 0; k
< nargs
; k
++)
1841 VEC (tree
, heap
) *vec_oprndsk
1842 = (VEC (tree
, heap
) *)
1843 VEC_index (slp_void_p
, vec_defs
, k
);
1844 VEC_replace (tree
, vargs
, k
,
1845 VEC_index (tree
, vec_oprndsk
, i
));
1847 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1848 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1849 gimple_call_set_lhs (new_stmt
, new_temp
);
1850 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1851 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1855 for (i
= 0; i
< nargs
; i
++)
1857 VEC (tree
, heap
) *vec_oprndsi
1858 = (VEC (tree
, heap
) *)
1859 VEC_index (slp_void_p
, vec_defs
, i
);
1860 VEC_free (tree
, heap
, vec_oprndsi
);
1862 VEC_free (slp_void_p
, heap
, vec_defs
);
1866 for (i
= 0; i
< nargs
; i
++)
1868 op
= gimple_call_arg (stmt
, i
);
1871 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1874 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1876 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1879 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1882 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1883 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1884 gimple_call_set_lhs (new_stmt
, new_temp
);
1885 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1888 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1890 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1892 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1898 for (j
= 0; j
< ncopies
; ++j
)
1900 /* Build argument list for the vectorized call. */
1902 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1904 VEC_truncate (tree
, vargs
, 0);
1908 VEC (slp_void_p
, heap
) *vec_defs
1909 = VEC_alloc (slp_void_p
, heap
, nargs
);
1910 VEC (tree
, heap
) *vec_oprnds0
;
1912 for (i
= 0; i
< nargs
; i
++)
1913 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1914 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1916 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1918 /* Arguments are ready. Create the new vector stmt. */
1919 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1923 VEC_truncate (tree
, vargs
, 0);
1924 for (k
= 0; k
< nargs
; k
++)
1926 VEC (tree
, heap
) *vec_oprndsk
1927 = (VEC (tree
, heap
) *)
1928 VEC_index (slp_void_p
, vec_defs
, k
);
1929 VEC_quick_push (tree
, vargs
,
1930 VEC_index (tree
, vec_oprndsk
, i
));
1931 VEC_quick_push (tree
, vargs
,
1932 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1934 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1935 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1936 gimple_call_set_lhs (new_stmt
, new_temp
);
1937 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1938 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1942 for (i
= 0; i
< nargs
; i
++)
1944 VEC (tree
, heap
) *vec_oprndsi
1945 = (VEC (tree
, heap
) *)
1946 VEC_index (slp_void_p
, vec_defs
, i
);
1947 VEC_free (tree
, heap
, vec_oprndsi
);
1949 VEC_free (slp_void_p
, heap
, vec_defs
);
1953 for (i
= 0; i
< nargs
; i
++)
1955 op
= gimple_call_arg (stmt
, i
);
1959 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1961 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1965 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1967 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1969 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1972 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1973 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1976 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1977 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1978 gimple_call_set_lhs (new_stmt
, new_temp
);
1979 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1982 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1984 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1986 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1989 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1994 /* No current target implements this case. */
1998 VEC_free (tree
, heap
, vargs
);
2000 /* Update the exception handling table with the vector stmt if necessary. */
2001 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2002 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2004 /* The call in STMT might prevent it from being removed in dce.
2005 We however cannot remove it here, due to the way the ssa name
2006 it defines is mapped to the new definition. So just replace
2007 rhs of the statement with something harmless. */
2012 type
= TREE_TYPE (scalar_dest
);
2013 if (is_pattern_stmt_p (stmt_info
))
2014 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2016 lhs
= gimple_call_lhs (stmt
);
2017 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2018 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2019 set_vinfo_for_stmt (stmt
, NULL
);
2020 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2021 gsi_replace (gsi
, new_stmt
, false);
2022 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2028 /* Function vect_gen_widened_results_half
2030 Create a vector stmt whose code, type, number of arguments, and result
2031 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2032 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2033 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2034 needs to be created (DECL is a function-decl of a target-builtin).
2035 STMT is the original scalar stmt that we are vectorizing. */
2038 vect_gen_widened_results_half (enum tree_code code
,
2040 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2041 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2047 /* Generate half of the widened result: */
2048 if (code
== CALL_EXPR
)
2050 /* Target specific support */
2051 if (op_type
== binary_op
)
2052 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2054 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2055 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2056 gimple_call_set_lhs (new_stmt
, new_temp
);
2060 /* Generic support */
2061 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2062 if (op_type
!= binary_op
)
2064 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2066 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2067 gimple_assign_set_lhs (new_stmt
, new_temp
);
2069 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2075 /* Get vectorized definitions for loop-based vectorization. For the first
2076 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2077 scalar operand), and for the rest we get a copy with
2078 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2079 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2080 The vectors are collected into VEC_OPRNDS. */
2083 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2084 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2088 /* Get first vector operand. */
2089 /* All the vector operands except the very first one (that is scalar oprnd)
2091 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2092 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2094 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2096 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2098 /* Get second vector operand. */
2099 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2100 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2104 /* For conversion in multiple steps, continue to get operands
2107 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2111 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2112 For multi-step conversions store the resulting vectors and call the function
2116 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2117 int multi_step_cvt
, gimple stmt
,
2118 VEC (tree
, heap
) *vec_dsts
,
2119 gimple_stmt_iterator
*gsi
,
2120 slp_tree slp_node
, enum tree_code code
,
2121 stmt_vec_info
*prev_stmt_info
)
2124 tree vop0
, vop1
, new_tmp
, vec_dest
;
2126 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2128 vec_dest
= VEC_pop (tree
, vec_dsts
);
2130 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2132 /* Create demotion operation. */
2133 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2134 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2135 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2136 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2137 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2138 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2141 /* Store the resulting vector for next recursive call. */
2142 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2145 /* This is the last step of the conversion sequence. Store the
2146 vectors in SLP_NODE or in vector info of the scalar statement
2147 (or in STMT_VINFO_RELATED_STMT chain). */
2149 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2152 if (!*prev_stmt_info
)
2153 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2155 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2157 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2162 /* For multi-step demotion operations we first generate demotion operations
2163 from the source type to the intermediate types, and then combine the
2164 results (stored in VEC_OPRNDS) in demotion operation to the destination
2168 /* At each level of recursion we have half of the operands we had at the
2170 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2171 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2172 stmt
, vec_dsts
, gsi
, slp_node
,
2173 VEC_PACK_TRUNC_EXPR
,
2177 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2181 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2182 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2183 the resulting vectors and call the function recursively. */
2186 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2187 VEC (tree
, heap
) **vec_oprnds1
,
2188 gimple stmt
, tree vec_dest
,
2189 gimple_stmt_iterator
*gsi
,
2190 enum tree_code code1
,
2191 enum tree_code code2
, tree decl1
,
2192 tree decl2
, int op_type
)
2195 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2196 gimple new_stmt1
, new_stmt2
;
2197 VEC (tree
, heap
) *vec_tmp
= NULL
;
2199 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2200 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2202 if (op_type
== binary_op
)
2203 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2207 /* Generate the two halves of promotion operation. */
2208 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2209 op_type
, vec_dest
, gsi
, stmt
);
2210 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2211 op_type
, vec_dest
, gsi
, stmt
);
2212 if (is_gimple_call (new_stmt1
))
2214 new_tmp1
= gimple_call_lhs (new_stmt1
);
2215 new_tmp2
= gimple_call_lhs (new_stmt2
);
2219 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2220 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2223 /* Store the results for the next step. */
2224 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2225 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2228 VEC_free (tree
, heap
, *vec_oprnds0
);
2229 *vec_oprnds0
= vec_tmp
;
2233 /* Check if STMT performs a conversion operation, that can be vectorized.
2234 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2235 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2236 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2239 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2240 gimple
*vec_stmt
, slp_tree slp_node
)
2244 tree op0
, op1
= NULL_TREE
;
2245 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2246 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2247 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2248 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2249 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2250 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2254 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2255 gimple new_stmt
= NULL
;
2256 stmt_vec_info prev_stmt_info
;
2259 tree vectype_out
, vectype_in
;
2261 tree lhs_type
, rhs_type
;
2262 enum { NARROW
, NONE
, WIDEN
} modifier
;
2263 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2265 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2266 int multi_step_cvt
= 0;
2267 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2268 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2270 enum machine_mode rhs_mode
;
2271 unsigned short fltsz
;
2273 /* Is STMT a vectorizable conversion? */
2275 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2278 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2281 if (!is_gimple_assign (stmt
))
2284 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2287 code
= gimple_assign_rhs_code (stmt
);
2288 if (!CONVERT_EXPR_CODE_P (code
)
2289 && code
!= FIX_TRUNC_EXPR
2290 && code
!= FLOAT_EXPR
2291 && code
!= WIDEN_MULT_EXPR
2292 && code
!= WIDEN_LSHIFT_EXPR
)
2295 op_type
= TREE_CODE_LENGTH (code
);
2297 /* Check types of lhs and rhs. */
2298 scalar_dest
= gimple_assign_lhs (stmt
);
2299 lhs_type
= TREE_TYPE (scalar_dest
);
2300 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2302 op0
= gimple_assign_rhs1 (stmt
);
2303 rhs_type
= TREE_TYPE (op0
);
2305 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2306 && !((INTEGRAL_TYPE_P (lhs_type
)
2307 && INTEGRAL_TYPE_P (rhs_type
))
2308 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2309 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2312 if ((INTEGRAL_TYPE_P (lhs_type
)
2313 && (TYPE_PRECISION (lhs_type
)
2314 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2315 || (INTEGRAL_TYPE_P (rhs_type
)
2316 && (TYPE_PRECISION (rhs_type
)
2317 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2319 if (vect_print_dump_info (REPORT_DETAILS
))
2321 "type conversion to/from bit-precision unsupported.");
2325 /* Check the operands of the operation. */
2326 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2327 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2329 if (vect_print_dump_info (REPORT_DETAILS
))
2330 fprintf (vect_dump
, "use not simple.");
2333 if (op_type
== binary_op
)
2337 op1
= gimple_assign_rhs2 (stmt
);
2338 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2339 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2341 if (CONSTANT_CLASS_P (op0
))
2342 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2343 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2345 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2350 if (vect_print_dump_info (REPORT_DETAILS
))
2351 fprintf (vect_dump
, "use not simple.");
2356 /* If op0 is an external or constant defs use a vector type of
2357 the same size as the output vector type. */
2359 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2361 gcc_assert (vectype_in
);
2364 if (vect_print_dump_info (REPORT_DETAILS
))
2366 fprintf (vect_dump
, "no vectype for scalar type ");
2367 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2373 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2374 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2375 if (nunits_in
< nunits_out
)
2377 else if (nunits_out
== nunits_in
)
2382 /* Multiple types in SLP are handled by creating the appropriate number of
2383 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2385 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2387 else if (modifier
== NARROW
)
2388 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2390 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies
>= 1);
2396 /* Supportable by target? */
2400 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2402 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2407 if (vect_print_dump_info (REPORT_DETAILS
))
2408 fprintf (vect_dump
, "conversion not supported by target.");
2412 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2413 &decl1
, &decl2
, &code1
, &code2
,
2414 &multi_step_cvt
, &interm_types
))
2416 /* Binary widening operation can only be supported directly by the
2418 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2422 if (code
!= FLOAT_EXPR
2423 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2424 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2427 rhs_mode
= TYPE_MODE (rhs_type
);
2428 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2429 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2430 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2431 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2434 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2435 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2436 if (cvt_type
== NULL_TREE
)
2439 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2441 if (!supportable_convert_operation (code
, vectype_out
,
2442 cvt_type
, &decl1
, &codecvt1
))
2445 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2446 cvt_type
, &decl1
, &decl2
,
2447 &codecvt1
, &codecvt2
,
2452 gcc_assert (multi_step_cvt
== 0);
2454 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2455 vectype_in
, NULL
, NULL
, &code1
,
2456 &code2
, &multi_step_cvt
,
2461 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2464 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2465 codecvt2
= ERROR_MARK
;
2469 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2470 cvt_type
= NULL_TREE
;
2475 gcc_assert (op_type
== unary_op
);
2476 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2477 &code1
, &multi_step_cvt
,
2481 if (code
!= FIX_TRUNC_EXPR
2482 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2483 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2486 rhs_mode
= TYPE_MODE (rhs_type
);
2488 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2489 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2490 if (cvt_type
== NULL_TREE
)
2492 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2495 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2496 &code1
, &multi_step_cvt
,
2505 if (!vec_stmt
) /* transformation not required. */
2507 if (vect_print_dump_info (REPORT_DETAILS
))
2508 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2509 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2511 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2512 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2514 else if (modifier
== NARROW
)
2516 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2517 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2521 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2522 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2524 VEC_free (tree
, heap
, interm_types
);
2529 if (vect_print_dump_info (REPORT_DETAILS
))
2530 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2532 if (op_type
== binary_op
)
2534 if (CONSTANT_CLASS_P (op0
))
2535 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2536 else if (CONSTANT_CLASS_P (op1
))
2537 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2540 /* In case of multi-step conversion, we first generate conversion operations
2541 to the intermediate types, and then from that types to the final one.
2542 We create vector destinations for the intermediate type (TYPES) received
2543 from supportable_*_operation, and store them in the correct order
2544 for future use in vect_create_vectorized_*_stmts (). */
2545 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2546 vec_dest
= vect_create_destination_var (scalar_dest
,
2547 (cvt_type
&& modifier
== WIDEN
)
2548 ? cvt_type
: vectype_out
);
2549 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2553 for (i
= VEC_length (tree
, interm_types
) - 1;
2554 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2556 vec_dest
= vect_create_destination_var (scalar_dest
,
2558 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2563 vec_dest
= vect_create_destination_var (scalar_dest
,
2565 ? vectype_out
: cvt_type
);
2569 if (modifier
== NONE
)
2570 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2571 else if (modifier
== WIDEN
)
2573 vec_oprnds0
= VEC_alloc (tree
, heap
,
2575 ? vect_pow2 (multi_step_cvt
) : 1));
2576 if (op_type
== binary_op
)
2577 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2580 vec_oprnds0
= VEC_alloc (tree
, heap
,
2582 ? vect_pow2 (multi_step_cvt
) : 1));
2584 else if (code
== WIDEN_LSHIFT_EXPR
)
2585 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2588 prev_stmt_info
= NULL
;
2592 for (j
= 0; j
< ncopies
; j
++)
2595 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2598 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2600 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2602 /* Arguments are ready, create the new vector stmt. */
2603 if (code1
== CALL_EXPR
)
2605 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2606 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2607 gimple_call_set_lhs (new_stmt
, new_temp
);
2611 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2612 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2614 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2615 gimple_assign_set_lhs (new_stmt
, new_temp
);
2618 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2620 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2625 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2627 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2628 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2633 /* In case the vectorization factor (VF) is bigger than the number
2634 of elements that we can fit in a vectype (nunits), we have to
2635 generate more than one vector stmt - i.e - we need to "unroll"
2636 the vector stmt by a factor VF/nunits. */
2637 for (j
= 0; j
< ncopies
; j
++)
2644 if (code
== WIDEN_LSHIFT_EXPR
)
2649 /* Store vec_oprnd1 for every vector stmt to be created
2650 for SLP_NODE. We check during the analysis that all
2651 the shift arguments are the same. */
2652 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2653 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2655 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2659 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2660 &vec_oprnds1
, slp_node
, -1);
2664 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2665 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2666 if (op_type
== binary_op
)
2668 if (code
== WIDEN_LSHIFT_EXPR
)
2671 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2673 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2679 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2680 VEC_truncate (tree
, vec_oprnds0
, 0);
2681 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2682 if (op_type
== binary_op
)
2684 if (code
== WIDEN_LSHIFT_EXPR
)
2687 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2689 VEC_truncate (tree
, vec_oprnds1
, 0);
2690 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2694 /* Arguments are ready. Create the new vector stmts. */
2695 for (i
= multi_step_cvt
; i
>= 0; i
--)
2697 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2698 enum tree_code c1
= code1
, c2
= code2
;
2699 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2704 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2706 stmt
, this_dest
, gsi
,
2707 c1
, c2
, decl1
, decl2
,
2711 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2715 if (codecvt1
== CALL_EXPR
)
2717 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2718 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2719 gimple_call_set_lhs (new_stmt
, new_temp
);
2723 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2724 new_temp
= make_ssa_name (vec_dest
, NULL
);
2725 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2730 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2733 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2736 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2740 if (!prev_stmt_info
)
2741 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2743 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2744 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2749 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2753 /* In case the vectorization factor (VF) is bigger than the number
2754 of elements that we can fit in a vectype (nunits), we have to
2755 generate more than one vector stmt - i.e - we need to "unroll"
2756 the vector stmt by a factor VF/nunits. */
2757 for (j
= 0; j
< ncopies
; j
++)
2761 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2765 VEC_truncate (tree
, vec_oprnds0
, 0);
2766 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2767 vect_pow2 (multi_step_cvt
) - 1);
2770 /* Arguments are ready. Create the new vector stmts. */
2772 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2774 if (codecvt1
== CALL_EXPR
)
2776 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2777 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2778 gimple_call_set_lhs (new_stmt
, new_temp
);
2782 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2783 new_temp
= make_ssa_name (vec_dest
, NULL
);
2784 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2788 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2789 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2792 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2793 stmt
, vec_dsts
, gsi
,
2798 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2802 VEC_free (tree
, heap
, vec_oprnds0
);
2803 VEC_free (tree
, heap
, vec_oprnds1
);
2804 VEC_free (tree
, heap
, vec_dsts
);
2805 VEC_free (tree
, heap
, interm_types
);
2811 /* Function vectorizable_assignment.
2813 Check if STMT performs an assignment (copy) that can be vectorized.
2814 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2815 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2816 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2819 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2820 gimple
*vec_stmt
, slp_tree slp_node
)
2825 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2826 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2827 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2831 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2832 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2835 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2837 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2838 gimple new_stmt
= NULL
;
2839 stmt_vec_info prev_stmt_info
= NULL
;
2840 enum tree_code code
;
2843 /* Multiple types in SLP are handled by creating the appropriate number of
2844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2846 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2849 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2851 gcc_assert (ncopies
>= 1);
2853 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2856 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2859 /* Is vectorizable assignment? */
2860 if (!is_gimple_assign (stmt
))
2863 scalar_dest
= gimple_assign_lhs (stmt
);
2864 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2867 code
= gimple_assign_rhs_code (stmt
);
2868 if (gimple_assign_single_p (stmt
)
2869 || code
== PAREN_EXPR
2870 || CONVERT_EXPR_CODE_P (code
))
2871 op
= gimple_assign_rhs1 (stmt
);
2875 if (code
== VIEW_CONVERT_EXPR
)
2876 op
= TREE_OPERAND (op
, 0);
2878 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2879 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2881 if (vect_print_dump_info (REPORT_DETAILS
))
2882 fprintf (vect_dump
, "use not simple.");
2886 /* We can handle NOP_EXPR conversions that do not change the number
2887 of elements or the vector size. */
2888 if ((CONVERT_EXPR_CODE_P (code
)
2889 || code
== VIEW_CONVERT_EXPR
)
2891 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2892 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2893 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2896 /* We do not handle bit-precision changes. */
2897 if ((CONVERT_EXPR_CODE_P (code
)
2898 || code
== VIEW_CONVERT_EXPR
)
2899 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2900 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2901 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2902 || ((TYPE_PRECISION (TREE_TYPE (op
))
2903 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2904 /* But a conversion that does not change the bit-pattern is ok. */
2905 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2906 > TYPE_PRECISION (TREE_TYPE (op
)))
2907 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2909 if (vect_print_dump_info (REPORT_DETAILS
))
2910 fprintf (vect_dump
, "type conversion to/from bit-precision "
2915 if (!vec_stmt
) /* transformation not required. */
2917 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2918 if (vect_print_dump_info (REPORT_DETAILS
))
2919 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2920 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2925 if (vect_print_dump_info (REPORT_DETAILS
))
2926 fprintf (vect_dump
, "transform assignment.");
2929 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2932 for (j
= 0; j
< ncopies
; j
++)
2936 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2938 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2940 /* Arguments are ready. create the new vector stmt. */
2941 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2943 if (CONVERT_EXPR_CODE_P (code
)
2944 || code
== VIEW_CONVERT_EXPR
)
2945 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2946 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2947 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2948 gimple_assign_set_lhs (new_stmt
, new_temp
);
2949 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2951 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2958 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2962 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2965 VEC_free (tree
, heap
, vec_oprnds
);
2970 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2971 either as shift by a scalar or by a vector. */
2974 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2977 enum machine_mode vec_mode
;
2982 vectype
= get_vectype_for_scalar_type (scalar_type
);
2986 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2988 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2990 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2992 || (optab_handler (optab
, TYPE_MODE (vectype
))
2993 == CODE_FOR_nothing
))
2997 vec_mode
= TYPE_MODE (vectype
);
2998 icode
= (int) optab_handler (optab
, vec_mode
);
2999 if (icode
== CODE_FOR_nothing
)
3006 /* Function vectorizable_shift.
3008 Check if STMT performs a shift operation that can be vectorized.
3009 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3010 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3011 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3014 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3015 gimple
*vec_stmt
, slp_tree slp_node
)
3019 tree op0
, op1
= NULL
;
3020 tree vec_oprnd1
= NULL_TREE
;
3021 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3023 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3024 enum tree_code code
;
3025 enum machine_mode vec_mode
;
3029 enum machine_mode optab_op2_mode
;
3032 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3033 gimple new_stmt
= NULL
;
3034 stmt_vec_info prev_stmt_info
;
3041 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3044 bool scalar_shift_arg
= true;
3045 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3048 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3051 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3054 /* Is STMT a vectorizable binary/unary operation? */
3055 if (!is_gimple_assign (stmt
))
3058 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3061 code
= gimple_assign_rhs_code (stmt
);
3063 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3064 || code
== RROTATE_EXPR
))
3067 scalar_dest
= gimple_assign_lhs (stmt
);
3068 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3069 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3070 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3072 if (vect_print_dump_info (REPORT_DETAILS
))
3073 fprintf (vect_dump
, "bit-precision shifts not supported.");
3077 op0
= gimple_assign_rhs1 (stmt
);
3078 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3079 &def_stmt
, &def
, &dt
[0], &vectype
))
3081 if (vect_print_dump_info (REPORT_DETAILS
))
3082 fprintf (vect_dump
, "use not simple.");
3085 /* If op0 is an external or constant def use a vector type with
3086 the same size as the output vector type. */
3088 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3090 gcc_assert (vectype
);
3093 if (vect_print_dump_info (REPORT_DETAILS
))
3095 fprintf (vect_dump
, "no vectype for scalar type ");
3096 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3102 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3103 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3104 if (nunits_out
!= nunits_in
)
3107 op1
= gimple_assign_rhs2 (stmt
);
3108 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3109 &def
, &dt
[1], &op1_vectype
))
3111 if (vect_print_dump_info (REPORT_DETAILS
))
3112 fprintf (vect_dump
, "use not simple.");
3117 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3121 /* Multiple types in SLP are handled by creating the appropriate number of
3122 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3124 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3127 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3129 gcc_assert (ncopies
>= 1);
3131 /* Determine whether the shift amount is a vector, or scalar. If the
3132 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3134 if (dt
[1] == vect_internal_def
&& !slp_node
)
3135 scalar_shift_arg
= false;
3136 else if (dt
[1] == vect_constant_def
3137 || dt
[1] == vect_external_def
3138 || dt
[1] == vect_internal_def
)
3140 /* In SLP, need to check whether the shift count is the same,
3141 in loops if it is a constant or invariant, it is always
3145 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3148 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3149 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3150 scalar_shift_arg
= false;
3155 if (vect_print_dump_info (REPORT_DETAILS
))
3156 fprintf (vect_dump
, "operand mode requires invariant argument.");
3160 /* Vector shifted by vector. */
3161 if (!scalar_shift_arg
)
3163 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3164 if (vect_print_dump_info (REPORT_DETAILS
))
3165 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3167 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3168 if (op1_vectype
== NULL_TREE
3169 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3171 if (vect_print_dump_info (REPORT_DETAILS
))
3172 fprintf (vect_dump
, "unusable type for last operand in"
3173 " vector/vector shift/rotate.");
3177 /* See if the machine has a vector shifted by scalar insn and if not
3178 then see if it has a vector shifted by vector insn. */
3181 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3183 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3185 if (vect_print_dump_info (REPORT_DETAILS
))
3186 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3190 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3192 && (optab_handler (optab
, TYPE_MODE (vectype
))
3193 != CODE_FOR_nothing
))
3195 scalar_shift_arg
= false;
3197 if (vect_print_dump_info (REPORT_DETAILS
))
3198 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3200 /* Unlike the other binary operators, shifts/rotates have
3201 the rhs being int, instead of the same type as the lhs,
3202 so make sure the scalar is the right type if we are
3203 dealing with vectors of long long/long/short/char. */
3204 if (dt
[1] == vect_constant_def
)
3205 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3206 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3210 && TYPE_MODE (TREE_TYPE (vectype
))
3211 != TYPE_MODE (TREE_TYPE (op1
)))
3213 if (vect_print_dump_info (REPORT_DETAILS
))
3214 fprintf (vect_dump
, "unusable type for last operand in"
3215 " vector/vector shift/rotate.");
3218 if (vec_stmt
&& !slp_node
)
3220 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3221 op1
= vect_init_vector (stmt
, op1
,
3222 TREE_TYPE (vectype
), NULL
);
3229 /* Supportable by target? */
3232 if (vect_print_dump_info (REPORT_DETAILS
))
3233 fprintf (vect_dump
, "no optab.");
3236 vec_mode
= TYPE_MODE (vectype
);
3237 icode
= (int) optab_handler (optab
, vec_mode
);
3238 if (icode
== CODE_FOR_nothing
)
3240 if (vect_print_dump_info (REPORT_DETAILS
))
3241 fprintf (vect_dump
, "op not supported by target.");
3242 /* Check only during analysis. */
3243 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3244 || (vf
< vect_min_worthwhile_factor (code
)
3247 if (vect_print_dump_info (REPORT_DETAILS
))
3248 fprintf (vect_dump
, "proceeding using word mode.");
3251 /* Worthwhile without SIMD support? Check only during analysis. */
3252 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3253 && vf
< vect_min_worthwhile_factor (code
)
3256 if (vect_print_dump_info (REPORT_DETAILS
))
3257 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3261 if (!vec_stmt
) /* transformation not required. */
3263 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3264 if (vect_print_dump_info (REPORT_DETAILS
))
3265 fprintf (vect_dump
, "=== vectorizable_shift ===");
3266 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3272 if (vect_print_dump_info (REPORT_DETAILS
))
3273 fprintf (vect_dump
, "transform binary/unary operation.");
3276 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3278 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3279 created in the previous stages of the recursion, so no allocation is
3280 needed, except for the case of shift with scalar shift argument. In that
3281 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3282 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3283 In case of loop-based vectorization we allocate VECs of size 1. We
3284 allocate VEC_OPRNDS1 only in case of binary operation. */
3287 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3288 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3290 else if (scalar_shift_arg
)
3291 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3293 prev_stmt_info
= NULL
;
3294 for (j
= 0; j
< ncopies
; j
++)
3299 if (scalar_shift_arg
)
3301 /* Vector shl and shr insn patterns can be defined with scalar
3302 operand 2 (shift operand). In this case, use constant or loop
3303 invariant op1 directly, without extending it to vector mode
3305 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3306 if (!VECTOR_MODE_P (optab_op2_mode
))
3308 if (vect_print_dump_info (REPORT_DETAILS
))
3309 fprintf (vect_dump
, "operand 1 using scalar mode.");
3311 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3314 /* Store vec_oprnd1 for every vector stmt to be created
3315 for SLP_NODE. We check during the analysis that all
3316 the shift arguments are the same.
3317 TODO: Allow different constants for different vector
3318 stmts generated for an SLP instance. */
3319 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3320 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3325 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3326 (a special case for certain kind of vector shifts); otherwise,
3327 operand 1 should be of a vector type (the usual case). */
3329 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3332 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3336 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3338 /* Arguments are ready. Create the new vector stmt. */
3339 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3341 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3342 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3343 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3344 gimple_assign_set_lhs (new_stmt
, new_temp
);
3345 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3347 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3354 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3356 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3357 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3360 VEC_free (tree
, heap
, vec_oprnds0
);
3361 VEC_free (tree
, heap
, vec_oprnds1
);
3367 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3368 gimple_stmt_iterator
*);
3371 /* Function vectorizable_operation.
3373 Check if STMT performs a binary, unary or ternary operation that can
3375 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3376 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3377 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3380 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3381 gimple
*vec_stmt
, slp_tree slp_node
)
3385 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3386 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3388 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3389 enum tree_code code
;
3390 enum machine_mode vec_mode
;
3397 enum vect_def_type dt
[3]
3398 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3399 gimple new_stmt
= NULL
;
3400 stmt_vec_info prev_stmt_info
;
3406 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3407 tree vop0
, vop1
, vop2
;
3408 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3411 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3414 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3417 /* Is STMT a vectorizable binary/unary operation? */
3418 if (!is_gimple_assign (stmt
))
3421 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3424 code
= gimple_assign_rhs_code (stmt
);
3426 /* For pointer addition, we should use the normal plus for
3427 the vector addition. */
3428 if (code
== POINTER_PLUS_EXPR
)
3431 /* Support only unary or binary operations. */
3432 op_type
= TREE_CODE_LENGTH (code
);
3433 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3435 if (vect_print_dump_info (REPORT_DETAILS
))
3436 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3441 scalar_dest
= gimple_assign_lhs (stmt
);
3442 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3444 /* Most operations cannot handle bit-precision types without extra
3446 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3447 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3448 /* Exception are bitwise binary operations. */
3449 && code
!= BIT_IOR_EXPR
3450 && code
!= BIT_XOR_EXPR
3451 && code
!= BIT_AND_EXPR
)
3453 if (vect_print_dump_info (REPORT_DETAILS
))
3454 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3458 op0
= gimple_assign_rhs1 (stmt
);
3459 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3460 &def_stmt
, &def
, &dt
[0], &vectype
))
3462 if (vect_print_dump_info (REPORT_DETAILS
))
3463 fprintf (vect_dump
, "use not simple.");
3466 /* If op0 is an external or constant def use a vector type with
3467 the same size as the output vector type. */
3469 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3471 gcc_assert (vectype
);
3474 if (vect_print_dump_info (REPORT_DETAILS
))
3476 fprintf (vect_dump
, "no vectype for scalar type ");
3477 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3483 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3484 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3485 if (nunits_out
!= nunits_in
)
3488 if (op_type
== binary_op
|| op_type
== ternary_op
)
3490 op1
= gimple_assign_rhs2 (stmt
);
3491 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3494 if (vect_print_dump_info (REPORT_DETAILS
))
3495 fprintf (vect_dump
, "use not simple.");
3499 if (op_type
== ternary_op
)
3501 op2
= gimple_assign_rhs3 (stmt
);
3502 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3505 if (vect_print_dump_info (REPORT_DETAILS
))
3506 fprintf (vect_dump
, "use not simple.");
3512 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3516 /* Multiple types in SLP are handled by creating the appropriate number of
3517 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3519 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3522 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3524 gcc_assert (ncopies
>= 1);
3526 /* Shifts are handled in vectorizable_shift (). */
3527 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3528 || code
== RROTATE_EXPR
)
3531 /* Supportable by target? */
3533 vec_mode
= TYPE_MODE (vectype
);
3534 if (code
== MULT_HIGHPART_EXPR
)
3536 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3539 icode
= CODE_FOR_nothing
;
3543 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3546 if (vect_print_dump_info (REPORT_DETAILS
))
3547 fprintf (vect_dump
, "no optab.");
3550 icode
= (int) optab_handler (optab
, vec_mode
);
3553 if (icode
== CODE_FOR_nothing
)
3555 if (vect_print_dump_info (REPORT_DETAILS
))
3556 fprintf (vect_dump
, "op not supported by target.");
3557 /* Check only during analysis. */
3558 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3559 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3561 if (vect_print_dump_info (REPORT_DETAILS
))
3562 fprintf (vect_dump
, "proceeding using word mode.");
3565 /* Worthwhile without SIMD support? Check only during analysis. */
3566 if (!VECTOR_MODE_P (vec_mode
)
3568 && vf
< vect_min_worthwhile_factor (code
))
3570 if (vect_print_dump_info (REPORT_DETAILS
))
3571 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3575 if (!vec_stmt
) /* transformation not required. */
3577 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3578 if (vect_print_dump_info (REPORT_DETAILS
))
3579 fprintf (vect_dump
, "=== vectorizable_operation ===");
3580 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3586 if (vect_print_dump_info (REPORT_DETAILS
))
3587 fprintf (vect_dump
, "transform binary/unary operation.");
3590 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3592 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3593 created in the previous stages of the recursion, so no allocation is
3594 needed, except for the case of shift with scalar shift argument. In that
3595 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3596 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3597 In case of loop-based vectorization we allocate VECs of size 1. We
3598 allocate VEC_OPRNDS1 only in case of binary operation. */
3601 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3602 if (op_type
== binary_op
|| op_type
== ternary_op
)
3603 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3604 if (op_type
== ternary_op
)
3605 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3608 /* In case the vectorization factor (VF) is bigger than the number
3609 of elements that we can fit in a vectype (nunits), we have to generate
3610 more than one vector stmt - i.e - we need to "unroll" the
3611 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3612 from one copy of the vector stmt to the next, in the field
3613 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3614 stages to find the correct vector defs to be used when vectorizing
3615 stmts that use the defs of the current stmt. The example below
3616 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3617 we need to create 4 vectorized stmts):
3619 before vectorization:
3620 RELATED_STMT VEC_STMT
3624 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3626 RELATED_STMT VEC_STMT
3627 VS1_0: vx0 = memref0 VS1_1 -
3628 VS1_1: vx1 = memref1 VS1_2 -
3629 VS1_2: vx2 = memref2 VS1_3 -
3630 VS1_3: vx3 = memref3 - -
3631 S1: x = load - VS1_0
3634 step2: vectorize stmt S2 (done here):
3635 To vectorize stmt S2 we first need to find the relevant vector
3636 def for the first operand 'x'. This is, as usual, obtained from
3637 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3638 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3639 relevant vector def 'vx0'. Having found 'vx0' we can generate
3640 the vector stmt VS2_0, and as usual, record it in the
3641 STMT_VINFO_VEC_STMT of stmt S2.
3642 When creating the second copy (VS2_1), we obtain the relevant vector
3643 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3644 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3645 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3646 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3647 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3648 chain of stmts and pointers:
3649 RELATED_STMT VEC_STMT
3650 VS1_0: vx0 = memref0 VS1_1 -
3651 VS1_1: vx1 = memref1 VS1_2 -
3652 VS1_2: vx2 = memref2 VS1_3 -
3653 VS1_3: vx3 = memref3 - -
3654 S1: x = load - VS1_0
3655 VS2_0: vz0 = vx0 + v1 VS2_1 -
3656 VS2_1: vz1 = vx1 + v1 VS2_2 -
3657 VS2_2: vz2 = vx2 + v1 VS2_3 -
3658 VS2_3: vz3 = vx3 + v1 - -
3659 S2: z = x + 1 - VS2_0 */
3661 prev_stmt_info
= NULL
;
3662 for (j
= 0; j
< ncopies
; j
++)
3667 if (op_type
== binary_op
|| op_type
== ternary_op
)
3668 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3671 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3673 if (op_type
== ternary_op
)
3675 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3676 VEC_quick_push (tree
, vec_oprnds2
,
3677 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3682 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3683 if (op_type
== ternary_op
)
3685 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3686 VEC_quick_push (tree
, vec_oprnds2
,
3687 vect_get_vec_def_for_stmt_copy (dt
[2],
3692 /* Arguments are ready. Create the new vector stmt. */
3693 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3695 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3696 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3697 vop2
= ((op_type
== ternary_op
)
3698 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3699 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3701 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3702 gimple_assign_set_lhs (new_stmt
, new_temp
);
3703 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3705 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3712 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3714 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3715 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3718 VEC_free (tree
, heap
, vec_oprnds0
);
3720 VEC_free (tree
, heap
, vec_oprnds1
);
3722 VEC_free (tree
, heap
, vec_oprnds2
);
3728 /* Function vectorizable_store.
3730 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3732 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3733 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3734 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3737 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3743 tree vec_oprnd
= NULL_TREE
;
3744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3745 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3746 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3748 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3749 struct loop
*loop
= NULL
;
3750 enum machine_mode vec_mode
;
3752 enum dr_alignment_support alignment_support_scheme
;
3755 enum vect_def_type dt
;
3756 stmt_vec_info prev_stmt_info
= NULL
;
3757 tree dataref_ptr
= NULL_TREE
;
3758 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3761 gimple next_stmt
, first_stmt
= NULL
;
3762 bool grouped_store
= false;
3763 bool store_lanes_p
= false;
3764 unsigned int group_size
, i
;
3765 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3767 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3768 bool slp
= (slp_node
!= NULL
);
3769 unsigned int vec_num
;
3770 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3774 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3776 /* Multiple types in SLP are handled by creating the appropriate number of
3777 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3779 if (slp
|| PURE_SLP_STMT (stmt_info
))
3782 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3784 gcc_assert (ncopies
>= 1);
3786 /* FORNOW. This restriction should be relaxed. */
3787 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3789 if (vect_print_dump_info (REPORT_DETAILS
))
3790 fprintf (vect_dump
, "multiple types in nested loop.");
3794 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3797 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3800 /* Is vectorizable store? */
3802 if (!is_gimple_assign (stmt
))
3805 scalar_dest
= gimple_assign_lhs (stmt
);
3806 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3807 && is_pattern_stmt_p (stmt_info
))
3808 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3809 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3810 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3811 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3812 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3813 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3814 && TREE_CODE (scalar_dest
) != MEM_REF
)
3817 gcc_assert (gimple_assign_single_p (stmt
));
3818 op
= gimple_assign_rhs1 (stmt
);
3819 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3822 if (vect_print_dump_info (REPORT_DETAILS
))
3823 fprintf (vect_dump
, "use not simple.");
3827 elem_type
= TREE_TYPE (vectype
);
3828 vec_mode
= TYPE_MODE (vectype
);
3830 /* FORNOW. In some cases can vectorize even if data-type not supported
3831 (e.g. - array initialization with 0). */
3832 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3835 if (!STMT_VINFO_DATA_REF (stmt_info
))
3838 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3839 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3840 size_zero_node
) < 0)
3842 if (vect_print_dump_info (REPORT_DETAILS
))
3843 fprintf (vect_dump
, "negative step for store.");
3847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3849 grouped_store
= true;
3850 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3851 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3853 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3854 if (vect_store_lanes_supported (vectype
, group_size
))
3855 store_lanes_p
= true;
3856 else if (!vect_grouped_store_supported (vectype
, group_size
))
3860 if (first_stmt
== stmt
)
3862 /* STMT is the leader of the group. Check the operands of all the
3863 stmts of the group. */
3864 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3867 gcc_assert (gimple_assign_single_p (next_stmt
));
3868 op
= gimple_assign_rhs1 (next_stmt
);
3869 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3870 &def_stmt
, &def
, &dt
))
3872 if (vect_print_dump_info (REPORT_DETAILS
))
3873 fprintf (vect_dump
, "use not simple.");
3876 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3881 if (!vec_stmt
) /* transformation not required. */
3883 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3884 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
, NULL
, NULL
);
3892 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3893 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3895 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3898 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3900 /* We vectorize all the stmts of the interleaving group when we
3901 reach the last stmt in the group. */
3902 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3903 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3912 grouped_store
= false;
3913 /* VEC_NUM is the number of vect stmts to be created for this
3915 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3916 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3917 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3918 op
= gimple_assign_rhs1 (first_stmt
);
3921 /* VEC_NUM is the number of vect stmts to be created for this
3923 vec_num
= group_size
;
3929 group_size
= vec_num
= 1;
3932 if (vect_print_dump_info (REPORT_DETAILS
))
3933 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3935 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3936 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3938 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3939 gcc_assert (alignment_support_scheme
);
3940 /* Targets with store-lane instructions must not require explicit
3942 gcc_assert (!store_lanes_p
3943 || alignment_support_scheme
== dr_aligned
3944 || alignment_support_scheme
== dr_unaligned_supported
);
3947 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3949 aggr_type
= vectype
;
3951 /* In case the vectorization factor (VF) is bigger than the number
3952 of elements that we can fit in a vectype (nunits), we have to generate
3953 more than one vector stmt - i.e - we need to "unroll" the
3954 vector stmt by a factor VF/nunits. For more details see documentation in
3955 vect_get_vec_def_for_copy_stmt. */
3957 /* In case of interleaving (non-unit grouped access):
3964 We create vectorized stores starting from base address (the access of the
3965 first stmt in the chain (S2 in the above example), when the last store stmt
3966 of the chain (S4) is reached:
3969 VS2: &base + vec_size*1 = vx0
3970 VS3: &base + vec_size*2 = vx1
3971 VS4: &base + vec_size*3 = vx3
3973 Then permutation statements are generated:
3975 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3976 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3979 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3980 (the order of the data-refs in the output of vect_permute_store_chain
3981 corresponds to the order of scalar stmts in the interleaving chain - see
3982 the documentation of vect_permute_store_chain()).
3984 In case of both multiple types and interleaving, above vector stores and
3985 permutation stmts are created for every copy. The result vector stmts are
3986 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3987 STMT_VINFO_RELATED_STMT for the next copies.
3990 prev_stmt_info
= NULL
;
3991 for (j
= 0; j
< ncopies
; j
++)
4000 /* Get vectorized arguments for SLP_NODE. */
4001 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4002 NULL
, slp_node
, -1);
4004 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4008 /* For interleaved stores we collect vectorized defs for all the
4009 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4010 used as an input to vect_permute_store_chain(), and OPRNDS as
4011 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4013 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4014 OPRNDS are of size 1. */
4015 next_stmt
= first_stmt
;
4016 for (i
= 0; i
< group_size
; i
++)
4018 /* Since gaps are not supported for interleaved stores,
4019 GROUP_SIZE is the exact number of stmts in the chain.
4020 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4021 there is no interleaving, GROUP_SIZE is 1, and only one
4022 iteration of the loop will be executed. */
4023 gcc_assert (next_stmt
4024 && gimple_assign_single_p (next_stmt
));
4025 op
= gimple_assign_rhs1 (next_stmt
);
4027 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4029 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4030 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4031 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4035 /* We should have catched mismatched types earlier. */
4036 gcc_assert (useless_type_conversion_p (vectype
,
4037 TREE_TYPE (vec_oprnd
)));
4038 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4039 NULL_TREE
, &dummy
, gsi
,
4040 &ptr_incr
, false, &inv_p
);
4041 gcc_assert (bb_vinfo
|| !inv_p
);
4045 /* For interleaved stores we created vectorized defs for all the
4046 defs stored in OPRNDS in the previous iteration (previous copy).
4047 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4048 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4050 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4051 OPRNDS are of size 1. */
4052 for (i
= 0; i
< group_size
; i
++)
4054 op
= VEC_index (tree
, oprnds
, i
);
4055 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4057 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4058 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4059 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4061 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4062 TYPE_SIZE_UNIT (aggr_type
));
4069 /* Combine all the vectors into an array. */
4070 vec_array
= create_vector_array (vectype
, vec_num
);
4071 for (i
= 0; i
< vec_num
; i
++)
4073 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4074 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4078 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4079 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4080 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4081 gimple_call_set_lhs (new_stmt
, data_ref
);
4082 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4089 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4091 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4095 next_stmt
= first_stmt
;
4096 for (i
= 0; i
< vec_num
; i
++)
4098 unsigned align
, misalign
;
4101 /* Bump the vector pointer. */
4102 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4106 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4107 else if (grouped_store
)
4108 /* For grouped stores vectorized defs are interleaved in
4109 vect_permute_store_chain(). */
4110 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4112 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4113 build_int_cst (reference_alias_ptr_type
4114 (DR_REF (first_dr
)), 0));
4115 align
= TYPE_ALIGN_UNIT (vectype
);
4116 if (aligned_access_p (first_dr
))
4118 else if (DR_MISALIGNMENT (first_dr
) == -1)
4120 TREE_TYPE (data_ref
)
4121 = build_aligned_type (TREE_TYPE (data_ref
),
4122 TYPE_ALIGN (elem_type
));
4123 align
= TYPE_ALIGN_UNIT (elem_type
);
4128 TREE_TYPE (data_ref
)
4129 = build_aligned_type (TREE_TYPE (data_ref
),
4130 TYPE_ALIGN (elem_type
));
4131 misalign
= DR_MISALIGNMENT (first_dr
);
4133 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4136 /* Arguments are ready. Create the new vector stmt. */
4137 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4138 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4143 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4151 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4153 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4154 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4158 VEC_free (tree
, heap
, dr_chain
);
4159 VEC_free (tree
, heap
, oprnds
);
4161 VEC_free (tree
, heap
, result_chain
);
4163 VEC_free (tree
, heap
, vec_oprnds
);
4168 /* Given a vector type VECTYPE and permutation SEL returns
4169 the VECTOR_CST mask that implements the permutation of the
4170 vector elements. If that is impossible to do, returns NULL. */
4173 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4175 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4178 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4180 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4183 mask_elt_type
= lang_hooks
.types
.type_for_mode
4184 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4185 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4187 mask_elts
= XALLOCAVEC (tree
, nunits
);
4188 for (i
= nunits
- 1; i
>= 0; i
--)
4189 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4190 mask_vec
= build_vector (mask_type
, mask_elts
);
4195 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4196 reversal of the vector elements. If that is impossible to do,
4200 perm_mask_for_reverse (tree vectype
)
4205 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4206 sel
= XALLOCAVEC (unsigned char, nunits
);
4208 for (i
= 0; i
< nunits
; ++i
)
4209 sel
[i
] = nunits
- 1 - i
;
4211 return vect_gen_perm_mask (vectype
, sel
);
4214 /* Given a vector variable X and Y, that was generated for the scalar
4215 STMT, generate instructions to permute the vector elements of X and Y
4216 using permutation mask MASK_VEC, insert them at *GSI and return the
4217 permuted vector variable. */
4220 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4221 gimple_stmt_iterator
*gsi
)
4223 tree vectype
= TREE_TYPE (x
);
4224 tree perm_dest
, data_ref
;
4227 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4228 data_ref
= make_ssa_name (perm_dest
, NULL
);
4230 /* Generate the permute statement. */
4231 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4233 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4238 /* vectorizable_load.
4240 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4242 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4243 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4244 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4247 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4248 slp_tree slp_node
, slp_instance slp_node_instance
)
4251 tree vec_dest
= NULL
;
4252 tree data_ref
= NULL
;
4253 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4254 stmt_vec_info prev_stmt_info
;
4255 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4256 struct loop
*loop
= NULL
;
4257 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4258 bool nested_in_vect_loop
= false;
4259 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4260 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4263 enum machine_mode mode
;
4264 gimple new_stmt
= NULL
;
4266 enum dr_alignment_support alignment_support_scheme
;
4267 tree dataref_ptr
= NULL_TREE
;
4269 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4271 int i
, j
, group_size
;
4272 tree msq
= NULL_TREE
, lsq
;
4273 tree offset
= NULL_TREE
;
4274 tree realignment_token
= NULL_TREE
;
4276 VEC(tree
,heap
) *dr_chain
= NULL
;
4277 bool grouped_load
= false;
4278 bool load_lanes_p
= false;
4281 bool negative
= false;
4282 bool compute_in_loop
= false;
4283 struct loop
*at_loop
;
4285 bool slp
= (slp_node
!= NULL
);
4286 bool slp_perm
= false;
4287 enum tree_code code
;
4288 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4291 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4292 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4293 tree stride_base
, stride_step
;
4294 int gather_scale
= 1;
4295 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4299 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4300 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4301 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4306 /* Multiple types in SLP are handled by creating the appropriate number of
4307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4309 if (slp
|| PURE_SLP_STMT (stmt_info
))
4312 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4314 gcc_assert (ncopies
>= 1);
4316 /* FORNOW. This restriction should be relaxed. */
4317 if (nested_in_vect_loop
&& ncopies
> 1)
4319 if (vect_print_dump_info (REPORT_DETAILS
))
4320 fprintf (vect_dump
, "multiple types in nested loop.");
4324 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4327 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4330 /* Is vectorizable load? */
4331 if (!is_gimple_assign (stmt
))
4334 scalar_dest
= gimple_assign_lhs (stmt
);
4335 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4338 code
= gimple_assign_rhs_code (stmt
);
4339 if (code
!= ARRAY_REF
4340 && code
!= INDIRECT_REF
4341 && code
!= COMPONENT_REF
4342 && code
!= IMAGPART_EXPR
4343 && code
!= REALPART_EXPR
4345 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4348 if (!STMT_VINFO_DATA_REF (stmt_info
))
4351 elem_type
= TREE_TYPE (vectype
);
4352 mode
= TYPE_MODE (vectype
);
4354 /* FORNOW. In some cases can vectorize even if data-type not supported
4355 (e.g. - data copies). */
4356 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4358 if (vect_print_dump_info (REPORT_DETAILS
))
4359 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4363 /* Check if the load is a part of an interleaving chain. */
4364 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4366 grouped_load
= true;
4368 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4370 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4371 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4373 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4374 if (vect_load_lanes_supported (vectype
, group_size
))
4375 load_lanes_p
= true;
4376 else if (!vect_grouped_load_supported (vectype
, group_size
))
4382 if (STMT_VINFO_GATHER_P (stmt_info
))
4386 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4387 &gather_off
, &gather_scale
);
4388 gcc_assert (gather_decl
);
4389 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4390 &def_stmt
, &def
, &gather_dt
,
4391 &gather_off_vectype
))
4393 if (vect_print_dump_info (REPORT_DETAILS
))
4394 fprintf (vect_dump
, "gather index use not simple.");
4398 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4400 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4401 &stride_base
, &stride_step
))
4406 negative
= tree_int_cst_compare (nested_in_vect_loop
4407 ? STMT_VINFO_DR_STEP (stmt_info
)
4409 size_zero_node
) < 0;
4410 if (negative
&& ncopies
> 1)
4412 if (vect_print_dump_info (REPORT_DETAILS
))
4413 fprintf (vect_dump
, "multiple types with negative step.");
4419 gcc_assert (!grouped_load
);
4420 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4421 if (alignment_support_scheme
!= dr_aligned
4422 && alignment_support_scheme
!= dr_unaligned_supported
)
4424 if (vect_print_dump_info (REPORT_DETAILS
))
4425 fprintf (vect_dump
, "negative step but alignment required.");
4428 if (!perm_mask_for_reverse (vectype
))
4430 if (vect_print_dump_info (REPORT_DETAILS
))
4431 fprintf (vect_dump
, "negative step and reversing not supported.");
4437 if (!vec_stmt
) /* transformation not required. */
4439 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4440 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
);
4444 if (vect_print_dump_info (REPORT_DETAILS
))
4445 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4449 if (STMT_VINFO_GATHER_P (stmt_info
))
4451 tree vec_oprnd0
= NULL_TREE
, op
;
4452 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4453 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4454 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4455 edge pe
= loop_preheader_edge (loop
);
4458 enum { NARROW
, NONE
, WIDEN
} modifier
;
4459 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4461 if (nunits
== gather_off_nunits
)
4463 else if (nunits
== gather_off_nunits
/ 2)
4465 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4468 for (i
= 0; i
< gather_off_nunits
; ++i
)
4469 sel
[i
] = i
| nunits
;
4471 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4472 gcc_assert (perm_mask
!= NULL_TREE
);
4474 else if (nunits
== gather_off_nunits
* 2)
4476 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4479 for (i
= 0; i
< nunits
; ++i
)
4480 sel
[i
] = i
< gather_off_nunits
4481 ? i
: i
+ nunits
- gather_off_nunits
;
4483 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4484 gcc_assert (perm_mask
!= NULL_TREE
);
4490 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4491 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4492 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4493 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4494 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4495 scaletype
= TREE_VALUE (arglist
);
4496 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4497 && types_compatible_p (srctype
, masktype
));
4499 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4501 ptr
= fold_convert (ptrtype
, gather_base
);
4502 if (!is_gimple_min_invariant (ptr
))
4504 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4505 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4506 gcc_assert (!new_bb
);
4509 /* Currently we support only unconditional gather loads,
4510 so mask should be all ones. */
4511 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4512 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4513 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4517 for (j
= 0; j
< 6; ++j
)
4519 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4520 mask
= build_real (TREE_TYPE (masktype
), r
);
4524 mask
= build_vector_from_val (masktype
, mask
);
4525 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4527 scale
= build_int_cst (scaletype
, gather_scale
);
4529 prev_stmt_info
= NULL
;
4530 for (j
= 0; j
< ncopies
; ++j
)
4532 if (modifier
== WIDEN
&& (j
& 1))
4533 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4534 perm_mask
, stmt
, gsi
);
4537 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4540 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4542 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4544 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4545 == TYPE_VECTOR_SUBPARTS (idxtype
));
4546 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4547 add_referenced_var (var
);
4548 var
= make_ssa_name (var
, NULL
);
4549 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4551 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4553 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4558 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4560 if (!useless_type_conversion_p (vectype
, rettype
))
4562 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4563 == TYPE_VECTOR_SUBPARTS (rettype
));
4564 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4565 add_referenced_var (var
);
4566 op
= make_ssa_name (var
, new_stmt
);
4567 gimple_call_set_lhs (new_stmt
, op
);
4568 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4569 var
= make_ssa_name (vec_dest
, NULL
);
4570 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4572 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4577 var
= make_ssa_name (vec_dest
, new_stmt
);
4578 gimple_call_set_lhs (new_stmt
, var
);
4581 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4583 if (modifier
== NARROW
)
4590 var
= permute_vec_elements (prev_res
, var
,
4591 perm_mask
, stmt
, gsi
);
4592 new_stmt
= SSA_NAME_DEF_STMT (var
);
4595 if (prev_stmt_info
== NULL
)
4596 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4598 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4599 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4603 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4605 gimple_stmt_iterator incr_gsi
;
4609 tree ref
= DR_REF (dr
);
4612 VEC(constructor_elt
, gc
) *v
= NULL
;
4613 gimple_seq stmts
= NULL
;
4615 gcc_assert (stride_base
&& stride_step
);
4617 /* For a load with loop-invariant (but other than power-of-2)
4618 stride (i.e. not a grouped access) like so:
4620 for (i = 0; i < n; i += stride)
4623 we generate a new induction variable and new accesses to
4624 form a new vector (or vectors, depending on ncopies):
4626 for (j = 0; ; j += VF*stride)
4628 tmp2 = array[j + stride];
4630 vectemp = {tmp1, tmp2, ...}
4633 ivstep
= stride_step
;
4634 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4635 build_int_cst (TREE_TYPE (ivstep
), vf
));
4637 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4639 create_iv (stride_base
, ivstep
, NULL
,
4640 loop
, &incr_gsi
, insert_after
,
4642 incr
= gsi_stmt (incr_gsi
);
4643 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4645 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4647 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4649 prev_stmt_info
= NULL
;
4650 running_off
= offvar
;
4651 for (j
= 0; j
< ncopies
; j
++)
4655 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4656 for (i
= 0; i
< nunits
; i
++)
4658 tree newref
, newoff
;
4660 if (TREE_CODE (ref
) == ARRAY_REF
)
4661 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4662 unshare_expr (TREE_OPERAND (ref
, 0)),
4664 NULL_TREE
, NULL_TREE
);
4666 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4668 TREE_OPERAND (ref
, 1));
4670 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4673 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4674 newoff
= SSA_NAME_VAR (running_off
);
4675 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4676 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4677 running_off
, stride_step
);
4679 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4680 running_off
, stride_step
);
4681 newoff
= make_ssa_name (newoff
, incr
);
4682 gimple_assign_set_lhs (incr
, newoff
);
4683 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4685 running_off
= newoff
;
4688 vec_inv
= build_constructor (vectype
, v
);
4689 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4690 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4693 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4695 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4696 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4703 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4705 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4706 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4707 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4709 /* Check if the chain of loads is already vectorized. */
4710 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4712 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4715 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4716 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4718 /* VEC_NUM is the number of vect stmts to be created for this group. */
4721 grouped_load
= false;
4722 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4723 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4727 vec_num
= group_size
;
4733 group_size
= vec_num
= 1;
4736 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4737 gcc_assert (alignment_support_scheme
);
4738 /* Targets with load-lane instructions must not require explicit
4740 gcc_assert (!load_lanes_p
4741 || alignment_support_scheme
== dr_aligned
4742 || alignment_support_scheme
== dr_unaligned_supported
);
4744 /* In case the vectorization factor (VF) is bigger than the number
4745 of elements that we can fit in a vectype (nunits), we have to generate
4746 more than one vector stmt - i.e - we need to "unroll" the
4747 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4748 from one copy of the vector stmt to the next, in the field
4749 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4750 stages to find the correct vector defs to be used when vectorizing
4751 stmts that use the defs of the current stmt. The example below
4752 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4753 need to create 4 vectorized stmts):
4755 before vectorization:
4756 RELATED_STMT VEC_STMT
4760 step 1: vectorize stmt S1:
4761 We first create the vector stmt VS1_0, and, as usual, record a
4762 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4763 Next, we create the vector stmt VS1_1, and record a pointer to
4764 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4765 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4767 RELATED_STMT VEC_STMT
4768 VS1_0: vx0 = memref0 VS1_1 -
4769 VS1_1: vx1 = memref1 VS1_2 -
4770 VS1_2: vx2 = memref2 VS1_3 -
4771 VS1_3: vx3 = memref3 - -
4772 S1: x = load - VS1_0
4775 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4776 information we recorded in RELATED_STMT field is used to vectorize
4779 /* In case of interleaving (non-unit grouped access):
4786 Vectorized loads are created in the order of memory accesses
4787 starting from the access of the first stmt of the chain:
4790 VS2: vx1 = &base + vec_size*1
4791 VS3: vx3 = &base + vec_size*2
4792 VS4: vx4 = &base + vec_size*3
4794 Then permutation statements are generated:
4796 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4797 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4800 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4801 (the order of the data-refs in the output of vect_permute_load_chain
4802 corresponds to the order of scalar stmts in the interleaving chain - see
4803 the documentation of vect_permute_load_chain()).
4804 The generation of permutation stmts and recording them in
4805 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4807 In case of both multiple types and interleaving, the vector loads and
4808 permutation stmts above are created for every copy. The result vector
4809 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4810 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4812 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4813 on a target that supports unaligned accesses (dr_unaligned_supported)
4814 we generate the following code:
4818 p = p + indx * vectype_size;
4823 Otherwise, the data reference is potentially unaligned on a target that
4824 does not support unaligned accesses (dr_explicit_realign_optimized) -
4825 then generate the following code, in which the data in each iteration is
4826 obtained by two vector loads, one from the previous iteration, and one
4827 from the current iteration:
4829 msq_init = *(floor(p1))
4830 p2 = initial_addr + VS - 1;
4831 realignment_token = call target_builtin;
4834 p2 = p2 + indx * vectype_size
4836 vec_dest = realign_load (msq, lsq, realignment_token)
4841 /* If the misalignment remains the same throughout the execution of the
4842 loop, we can create the init_addr and permutation mask at the loop
4843 preheader. Otherwise, it needs to be created inside the loop.
4844 This can only occur when vectorizing memory accesses in the inner-loop
4845 nested within an outer-loop that is being vectorized. */
4847 if (nested_in_vect_loop
4848 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4849 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4851 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4852 compute_in_loop
= true;
4855 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4856 || alignment_support_scheme
== dr_explicit_realign
)
4857 && !compute_in_loop
)
4859 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4860 alignment_support_scheme
, NULL_TREE
,
4862 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4864 phi
= SSA_NAME_DEF_STMT (msq
);
4865 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4872 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4875 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4877 aggr_type
= vectype
;
4879 prev_stmt_info
= NULL
;
4880 for (j
= 0; j
< ncopies
; j
++)
4882 /* 1. Create the vector or array pointer update chain. */
4884 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4885 offset
, &dummy
, gsi
,
4886 &ptr_incr
, false, &inv_p
);
4888 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4889 TYPE_SIZE_UNIT (aggr_type
));
4891 if (grouped_load
|| slp_perm
)
4892 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4898 vec_array
= create_vector_array (vectype
, vec_num
);
4901 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4902 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4903 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4904 gimple_call_set_lhs (new_stmt
, vec_array
);
4905 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4907 /* Extract each vector into an SSA_NAME. */
4908 for (i
= 0; i
< vec_num
; i
++)
4910 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4912 VEC_quick_push (tree
, dr_chain
, new_temp
);
4915 /* Record the mapping between SSA_NAMEs and statements. */
4916 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4920 for (i
= 0; i
< vec_num
; i
++)
4923 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4926 /* 2. Create the vector-load in the loop. */
4927 switch (alignment_support_scheme
)
4930 case dr_unaligned_supported
:
4932 unsigned int align
, misalign
;
4935 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4936 build_int_cst (reference_alias_ptr_type
4937 (DR_REF (first_dr
)), 0));
4938 align
= TYPE_ALIGN_UNIT (vectype
);
4939 if (alignment_support_scheme
== dr_aligned
)
4941 gcc_assert (aligned_access_p (first_dr
));
4944 else if (DR_MISALIGNMENT (first_dr
) == -1)
4946 TREE_TYPE (data_ref
)
4947 = build_aligned_type (TREE_TYPE (data_ref
),
4948 TYPE_ALIGN (elem_type
));
4949 align
= TYPE_ALIGN_UNIT (elem_type
);
4954 TREE_TYPE (data_ref
)
4955 = build_aligned_type (TREE_TYPE (data_ref
),
4956 TYPE_ALIGN (elem_type
));
4957 misalign
= DR_MISALIGNMENT (first_dr
);
4959 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
4963 case dr_explicit_realign
:
4968 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4970 if (compute_in_loop
)
4971 msq
= vect_setup_realignment (first_stmt
, gsi
,
4973 dr_explicit_realign
,
4976 new_stmt
= gimple_build_assign_with_ops
4977 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4979 (TREE_TYPE (dataref_ptr
),
4980 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4981 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4982 gimple_assign_set_lhs (new_stmt
, ptr
);
4983 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4985 = build2 (MEM_REF
, vectype
, ptr
,
4986 build_int_cst (reference_alias_ptr_type
4987 (DR_REF (first_dr
)), 0));
4988 vec_dest
= vect_create_destination_var (scalar_dest
,
4990 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4991 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4992 gimple_assign_set_lhs (new_stmt
, new_temp
);
4993 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4994 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4995 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4998 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4999 TYPE_SIZE_UNIT (elem_type
));
5000 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5001 new_stmt
= gimple_build_assign_with_ops
5002 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5005 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5006 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
5007 gimple_assign_set_lhs (new_stmt
, ptr
);
5008 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5010 = build2 (MEM_REF
, vectype
, ptr
,
5011 build_int_cst (reference_alias_ptr_type
5012 (DR_REF (first_dr
)), 0));
5015 case dr_explicit_realign_optimized
:
5016 new_stmt
= gimple_build_assign_with_ops
5017 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
5019 (TREE_TYPE (dataref_ptr
),
5020 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5021 new_temp
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
),
5023 gimple_assign_set_lhs (new_stmt
, new_temp
);
5024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5026 = build2 (MEM_REF
, vectype
, new_temp
,
5027 build_int_cst (reference_alias_ptr_type
5028 (DR_REF (first_dr
)), 0));
5033 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5034 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5035 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5036 gimple_assign_set_lhs (new_stmt
, new_temp
);
5037 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5039 /* 3. Handle explicit realignment if necessary/supported.
5041 vec_dest = realign_load (msq, lsq, realignment_token) */
5042 if (alignment_support_scheme
== dr_explicit_realign_optimized
5043 || alignment_support_scheme
== dr_explicit_realign
)
5045 lsq
= gimple_assign_lhs (new_stmt
);
5046 if (!realignment_token
)
5047 realignment_token
= dataref_ptr
;
5048 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5050 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5053 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5054 gimple_assign_set_lhs (new_stmt
, new_temp
);
5055 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5057 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5060 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5061 add_phi_arg (phi
, lsq
,
5062 loop_latch_edge (containing_loop
),
5063 UNKNOWN_LOCATION
, NULL
);
5068 /* 4. Handle invariant-load. */
5069 if (inv_p
&& !bb_vinfo
)
5071 gimple_stmt_iterator gsi2
= *gsi
;
5072 gcc_assert (!grouped_load
);
5074 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5076 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5081 tree perm_mask
= perm_mask_for_reverse (vectype
);
5082 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5083 perm_mask
, stmt
, gsi
);
5084 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5087 /* Collect vector loads and later create their permutation in
5088 vect_transform_grouped_load (). */
5089 if (grouped_load
|| slp_perm
)
5090 VEC_quick_push (tree
, dr_chain
, new_temp
);
5092 /* Store vector loads in the corresponding SLP_NODE. */
5093 if (slp
&& !slp_perm
)
5094 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5099 if (slp
&& !slp_perm
)
5104 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5105 slp_node_instance
, false))
5107 VEC_free (tree
, heap
, dr_chain
);
5116 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5117 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5122 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5124 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5125 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5129 VEC_free (tree
, heap
, dr_chain
);
5135 /* Function vect_is_simple_cond.
5138 LOOP - the loop that is being vectorized.
5139 COND - Condition that is checked for simple use.
5142 *COMP_VECTYPE - the vector type for the comparison.
5144 Returns whether a COND can be vectorized. Checks whether
5145 condition operands are supportable using vec_is_simple_use. */
5148 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5149 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5153 enum vect_def_type dt
;
5154 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5156 if (!COMPARISON_CLASS_P (cond
))
5159 lhs
= TREE_OPERAND (cond
, 0);
5160 rhs
= TREE_OPERAND (cond
, 1);
5162 if (TREE_CODE (lhs
) == SSA_NAME
)
5164 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5165 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5166 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5169 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5170 && TREE_CODE (lhs
) != FIXED_CST
)
5173 if (TREE_CODE (rhs
) == SSA_NAME
)
5175 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5176 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5177 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5180 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5181 && TREE_CODE (rhs
) != FIXED_CST
)
5184 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5188 /* vectorizable_condition.
5190 Check if STMT is conditional modify expression that can be vectorized.
5191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5192 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5195 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5196 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5197 else caluse if it is 2).
5199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5202 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5203 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5206 tree scalar_dest
= NULL_TREE
;
5207 tree vec_dest
= NULL_TREE
;
5208 tree cond_expr
, then_clause
, else_clause
;
5209 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5210 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5211 tree comp_vectype
= NULL_TREE
;
5212 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5213 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5214 tree vec_compare
, vec_cond_expr
;
5216 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5218 enum vect_def_type dt
, dts
[4];
5219 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5221 enum tree_code code
;
5222 stmt_vec_info prev_stmt_info
= NULL
;
5224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5225 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5226 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5228 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5231 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5233 gcc_assert (ncopies
>= 1);
5234 if (reduc_index
&& ncopies
> 1)
5235 return false; /* FORNOW */
5237 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5240 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5243 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5244 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5248 /* FORNOW: not yet supported. */
5249 if (STMT_VINFO_LIVE_P (stmt_info
))
5251 if (vect_print_dump_info (REPORT_DETAILS
))
5252 fprintf (vect_dump
, "value used after loop.");
5256 /* Is vectorizable conditional operation? */
5257 if (!is_gimple_assign (stmt
))
5260 code
= gimple_assign_rhs_code (stmt
);
5262 if (code
!= COND_EXPR
)
5265 cond_expr
= gimple_assign_rhs1 (stmt
);
5266 then_clause
= gimple_assign_rhs2 (stmt
);
5267 else_clause
= gimple_assign_rhs3 (stmt
);
5269 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5274 if (TREE_CODE (then_clause
) == SSA_NAME
)
5276 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5277 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5278 &then_def_stmt
, &def
, &dt
))
5281 else if (TREE_CODE (then_clause
) != INTEGER_CST
5282 && TREE_CODE (then_clause
) != REAL_CST
5283 && TREE_CODE (then_clause
) != FIXED_CST
)
5286 if (TREE_CODE (else_clause
) == SSA_NAME
)
5288 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5289 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5290 &else_def_stmt
, &def
, &dt
))
5293 else if (TREE_CODE (else_clause
) != INTEGER_CST
5294 && TREE_CODE (else_clause
) != REAL_CST
5295 && TREE_CODE (else_clause
) != FIXED_CST
)
5300 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5301 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5308 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5309 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5310 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5311 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5315 scalar_dest
= gimple_assign_lhs (stmt
);
5316 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5318 /* Handle cond expr. */
5319 for (j
= 0; j
< ncopies
; j
++)
5321 gimple new_stmt
= NULL
;
5326 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5327 VEC (slp_void_p
, heap
) *vec_defs
;
5329 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5330 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5331 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5332 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5333 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5334 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5335 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5336 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5337 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5338 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5340 VEC_free (tree
, heap
, ops
);
5341 VEC_free (slp_void_p
, heap
, vec_defs
);
5347 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5349 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5350 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5353 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5355 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5356 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5357 if (reduc_index
== 1)
5358 vec_then_clause
= reduc_def
;
5361 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5363 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5364 NULL
, >emp
, &def
, &dts
[2]);
5366 if (reduc_index
== 2)
5367 vec_else_clause
= reduc_def
;
5370 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5372 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5373 NULL
, >emp
, &def
, &dts
[3]);
5379 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5380 VEC_pop (tree
, vec_oprnds0
));
5381 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5382 VEC_pop (tree
, vec_oprnds1
));
5383 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5384 VEC_pop (tree
, vec_oprnds2
));
5385 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5386 VEC_pop (tree
, vec_oprnds3
));
5391 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5392 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5393 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5394 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5397 /* Arguments are ready. Create the new vector stmt. */
5398 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5400 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5401 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5402 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5404 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5405 vec_cond_lhs
, vec_cond_rhs
);
5406 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5407 vec_compare
, vec_then_clause
, vec_else_clause
);
5409 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5410 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5411 gimple_assign_set_lhs (new_stmt
, new_temp
);
5412 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5414 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5421 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5425 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5428 VEC_free (tree
, heap
, vec_oprnds0
);
5429 VEC_free (tree
, heap
, vec_oprnds1
);
5430 VEC_free (tree
, heap
, vec_oprnds2
);
5431 VEC_free (tree
, heap
, vec_oprnds3
);
5437 /* Make sure the statement is vectorizable. */
5440 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5442 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5443 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5444 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5446 tree scalar_type
, vectype
;
5447 gimple pattern_stmt
;
5448 gimple_seq pattern_def_seq
;
5450 if (vect_print_dump_info (REPORT_DETAILS
))
5452 fprintf (vect_dump
, "==> examining statement: ");
5453 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5456 if (gimple_has_volatile_ops (stmt
))
5458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5459 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5464 /* Skip stmts that do not need to be vectorized. In loops this is expected
5466 - the COND_EXPR which is the loop exit condition
5467 - any LABEL_EXPRs in the loop
5468 - computations that are used only for array indexing or loop control.
5469 In basic blocks we only analyze statements that are a part of some SLP
5470 instance, therefore, all the statements are relevant.
5472 Pattern statement needs to be analyzed instead of the original statement
5473 if the original statement is not relevant. Otherwise, we analyze both
5474 statements. In basic blocks we are called from some SLP instance
5475 traversal, don't analyze pattern stmts instead, the pattern stmts
5476 already will be part of SLP instance. */
5478 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5479 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5480 && !STMT_VINFO_LIVE_P (stmt_info
))
5482 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5484 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5485 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5487 /* Analyze PATTERN_STMT instead of the original stmt. */
5488 stmt
= pattern_stmt
;
5489 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5490 if (vect_print_dump_info (REPORT_DETAILS
))
5492 fprintf (vect_dump
, "==> examining pattern statement: ");
5493 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5498 if (vect_print_dump_info (REPORT_DETAILS
))
5499 fprintf (vect_dump
, "irrelevant.");
5504 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5507 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5508 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5510 /* Analyze PATTERN_STMT too. */
5511 if (vect_print_dump_info (REPORT_DETAILS
))
5513 fprintf (vect_dump
, "==> examining pattern statement: ");
5514 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5517 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5521 if (is_pattern_stmt_p (stmt_info
)
5523 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5525 gimple_stmt_iterator si
;
5527 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5529 gimple pattern_def_stmt
= gsi_stmt (si
);
5530 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5531 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5533 /* Analyze def stmt of STMT if it's a pattern stmt. */
5534 if (vect_print_dump_info (REPORT_DETAILS
))
5536 fprintf (vect_dump
, "==> examining pattern def statement: ");
5537 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5540 if (!vect_analyze_stmt (pattern_def_stmt
,
5541 need_to_vectorize
, node
))
5547 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5549 case vect_internal_def
:
5552 case vect_reduction_def
:
5553 case vect_nested_cycle
:
5554 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5555 || relevance
== vect_used_in_outer_by_reduction
5556 || relevance
== vect_unused_in_scope
));
5559 case vect_induction_def
:
5560 case vect_constant_def
:
5561 case vect_external_def
:
5562 case vect_unknown_def_type
:
5569 gcc_assert (PURE_SLP_STMT (stmt_info
));
5571 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5572 if (vect_print_dump_info (REPORT_DETAILS
))
5574 fprintf (vect_dump
, "get vectype for scalar type: ");
5575 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5578 vectype
= get_vectype_for_scalar_type (scalar_type
);
5581 if (vect_print_dump_info (REPORT_DETAILS
))
5583 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5584 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5589 if (vect_print_dump_info (REPORT_DETAILS
))
5591 fprintf (vect_dump
, "vectype: ");
5592 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5595 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5598 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5600 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5601 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5602 *need_to_vectorize
= true;
5607 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5608 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5609 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5610 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5611 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5612 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5613 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5614 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5615 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5616 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5617 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5621 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5622 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5623 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5624 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5625 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5626 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5627 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5628 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5635 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5636 fprintf (vect_dump
, "supported: ");
5637 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5646 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5647 need extra handling, except for vectorizable reductions. */
5648 if (STMT_VINFO_LIVE_P (stmt_info
)
5649 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5650 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5654 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5656 fprintf (vect_dump
, "not vectorized: live stmt not ");
5657 fprintf (vect_dump
, "supported: ");
5658 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5668 /* Function vect_transform_stmt.
5670 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5673 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5674 bool *grouped_store
, slp_tree slp_node
,
5675 slp_instance slp_node_instance
)
5677 bool is_store
= false;
5678 gimple vec_stmt
= NULL
;
5679 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5682 switch (STMT_VINFO_TYPE (stmt_info
))
5684 case type_demotion_vec_info_type
:
5685 case type_promotion_vec_info_type
:
5686 case type_conversion_vec_info_type
:
5687 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5691 case induc_vec_info_type
:
5692 gcc_assert (!slp_node
);
5693 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5697 case shift_vec_info_type
:
5698 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5702 case op_vec_info_type
:
5703 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5707 case assignment_vec_info_type
:
5708 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5712 case load_vec_info_type
:
5713 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5718 case store_vec_info_type
:
5719 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5723 /* In case of interleaving, the whole chain is vectorized when the
5724 last store in the chain is reached. Store stmts before the last
5725 one are skipped, and there vec_stmt_info shouldn't be freed
5727 *grouped_store
= true;
5728 if (STMT_VINFO_VEC_STMT (stmt_info
))
5735 case condition_vec_info_type
:
5736 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5740 case call_vec_info_type
:
5741 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5742 stmt
= gsi_stmt (*gsi
);
5745 case reduc_vec_info_type
:
5746 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5751 if (!STMT_VINFO_LIVE_P (stmt_info
))
5753 if (vect_print_dump_info (REPORT_DETAILS
))
5754 fprintf (vect_dump
, "stmt not supported.");
5759 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5760 is being vectorized, but outside the immediately enclosing loop. */
5762 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5763 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5764 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5765 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5766 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5767 || STMT_VINFO_RELEVANT (stmt_info
) ==
5768 vect_used_in_outer_by_reduction
))
5770 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5771 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5772 imm_use_iterator imm_iter
;
5773 use_operand_p use_p
;
5777 if (vect_print_dump_info (REPORT_DETAILS
))
5778 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5780 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5781 (to be used when vectorizing outer-loop stmts that use the DEF of
5783 if (gimple_code (stmt
) == GIMPLE_PHI
)
5784 scalar_dest
= PHI_RESULT (stmt
);
5786 scalar_dest
= gimple_assign_lhs (stmt
);
5788 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5790 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5792 exit_phi
= USE_STMT (use_p
);
5793 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5798 /* Handle stmts whose DEF is used outside the loop-nest that is
5799 being vectorized. */
5800 if (STMT_VINFO_LIVE_P (stmt_info
)
5801 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5803 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5808 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5814 /* Remove a group of stores (for SLP or interleaving), free their
5818 vect_remove_stores (gimple first_stmt
)
5820 gimple next
= first_stmt
;
5822 gimple_stmt_iterator next_si
;
5826 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5828 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5829 if (is_pattern_stmt_p (stmt_info
))
5830 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5831 /* Free the attached stmt_vec_info and remove the stmt. */
5832 next_si
= gsi_for_stmt (next
);
5833 unlink_stmt_vdef (next
);
5834 gsi_remove (&next_si
, true);
5835 release_defs (next
);
5836 free_stmt_vec_info (next
);
5842 /* Function new_stmt_vec_info.
5844 Create and initialize a new stmt_vec_info struct for STMT. */
5847 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5848 bb_vec_info bb_vinfo
)
5851 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5853 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5854 STMT_VINFO_STMT (res
) = stmt
;
5855 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5856 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5857 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5858 STMT_VINFO_LIVE_P (res
) = false;
5859 STMT_VINFO_VECTYPE (res
) = NULL
;
5860 STMT_VINFO_VEC_STMT (res
) = NULL
;
5861 STMT_VINFO_VECTORIZABLE (res
) = true;
5862 STMT_VINFO_IN_PATTERN_P (res
) = false;
5863 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5864 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5865 STMT_VINFO_DATA_REF (res
) = NULL
;
5867 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5868 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5869 STMT_VINFO_DR_INIT (res
) = NULL
;
5870 STMT_VINFO_DR_STEP (res
) = NULL
;
5871 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5873 if (gimple_code (stmt
) == GIMPLE_PHI
5874 && is_loop_header_bb_p (gimple_bb (stmt
)))
5875 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5877 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5879 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
5880 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
5881 STMT_SLP_TYPE (res
) = loop_vect
;
5882 GROUP_FIRST_ELEMENT (res
) = NULL
;
5883 GROUP_NEXT_ELEMENT (res
) = NULL
;
5884 GROUP_SIZE (res
) = 0;
5885 GROUP_STORE_COUNT (res
) = 0;
5886 GROUP_GAP (res
) = 0;
5887 GROUP_SAME_DR_STMT (res
) = NULL
;
5888 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5894 /* Create a hash table for stmt_vec_info. */
5897 init_stmt_vec_info_vec (void)
5899 gcc_assert (!stmt_vec_info_vec
);
5900 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5904 /* Free hash table for stmt_vec_info. */
5907 free_stmt_vec_info_vec (void)
5909 gcc_assert (stmt_vec_info_vec
);
5910 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5914 /* Free stmt vectorization related info. */
5917 free_stmt_vec_info (gimple stmt
)
5919 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5924 /* Check if this statement has a related "pattern stmt"
5925 (introduced by the vectorizer during the pattern recognition
5926 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5928 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5930 stmt_vec_info patt_info
5931 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5934 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5937 gimple_stmt_iterator si
;
5938 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5939 free_stmt_vec_info (gsi_stmt (si
));
5941 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5945 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5946 set_vinfo_for_stmt (stmt
, NULL
);
5951 /* Function get_vectype_for_scalar_type_and_size.
5953 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5957 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5959 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5960 enum machine_mode simd_mode
;
5961 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5968 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5969 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5972 /* We can't build a vector type of elements with alignment bigger than
5974 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5977 /* For vector types of elements whose mode precision doesn't
5978 match their types precision we use a element type of mode
5979 precision. The vectorization routines will have to make sure
5980 they support the proper result truncation/extension.
5981 We also make sure to build vector types with INTEGER_TYPE
5982 component type only. */
5983 if (INTEGRAL_TYPE_P (scalar_type
)
5984 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5985 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5986 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5987 TYPE_UNSIGNED (scalar_type
));
5989 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5990 When the component mode passes the above test simply use a type
5991 corresponding to that mode. The theory is that any use that
5992 would cause problems with this will disable vectorization anyway. */
5993 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5994 && !INTEGRAL_TYPE_P (scalar_type
)
5995 && !POINTER_TYPE_P (scalar_type
))
5996 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5998 /* If no size was supplied use the mode the target prefers. Otherwise
5999 lookup a vector mode of the specified size. */
6001 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6003 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6004 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6008 vectype
= build_vector_type (scalar_type
, nunits
);
6009 if (vect_print_dump_info (REPORT_DETAILS
))
6011 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
6012 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
6018 if (vect_print_dump_info (REPORT_DETAILS
))
6020 fprintf (vect_dump
, "vectype: ");
6021 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
6024 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6025 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6027 if (vect_print_dump_info (REPORT_DETAILS
))
6028 fprintf (vect_dump
, "mode not supported by target.");
6035 unsigned int current_vector_size
;
6037 /* Function get_vectype_for_scalar_type.
6039 Returns the vector type corresponding to SCALAR_TYPE as supported
6043 get_vectype_for_scalar_type (tree scalar_type
)
6046 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6047 current_vector_size
);
6049 && current_vector_size
== 0)
6050 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6054 /* Function get_same_sized_vectype
6056 Returns a vector type corresponding to SCALAR_TYPE of size
6057 VECTOR_TYPE if supported by the target. */
6060 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6062 return get_vectype_for_scalar_type_and_size
6063 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6066 /* Function vect_is_simple_use.
6069 LOOP_VINFO - the vect info of the loop that is being vectorized.
6070 BB_VINFO - the vect info of the basic block that is being vectorized.
6071 OPERAND - operand of STMT in the loop or bb.
6072 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6074 Returns whether a stmt with OPERAND can be vectorized.
6075 For loops, supportable operands are constants, loop invariants, and operands
6076 that are defined by the current iteration of the loop. Unsupportable
6077 operands are those that are defined by a previous iteration of the loop (as
6078 is the case in reduction/induction computations).
6079 For basic blocks, supportable operands are constants and bb invariants.
6080 For now, operands defined outside the basic block are not supported. */
6083 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6084 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6085 tree
*def
, enum vect_def_type
*dt
)
6088 stmt_vec_info stmt_vinfo
;
6089 struct loop
*loop
= NULL
;
6092 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6097 if (vect_print_dump_info (REPORT_DETAILS
))
6099 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6100 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6103 if (CONSTANT_CLASS_P (operand
))
6105 *dt
= vect_constant_def
;
6109 if (is_gimple_min_invariant (operand
))
6112 *dt
= vect_external_def
;
6116 if (TREE_CODE (operand
) == PAREN_EXPR
)
6118 if (vect_print_dump_info (REPORT_DETAILS
))
6119 fprintf (vect_dump
, "non-associatable copy.");
6120 operand
= TREE_OPERAND (operand
, 0);
6123 if (TREE_CODE (operand
) != SSA_NAME
)
6125 if (vect_print_dump_info (REPORT_DETAILS
))
6126 fprintf (vect_dump
, "not ssa-name.");
6130 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6131 if (*def_stmt
== NULL
)
6133 if (vect_print_dump_info (REPORT_DETAILS
))
6134 fprintf (vect_dump
, "no def_stmt.");
6138 if (vect_print_dump_info (REPORT_DETAILS
))
6140 fprintf (vect_dump
, "def_stmt: ");
6141 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6144 /* Empty stmt is expected only in case of a function argument.
6145 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6146 if (gimple_nop_p (*def_stmt
))
6149 *dt
= vect_external_def
;
6153 bb
= gimple_bb (*def_stmt
);
6155 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6156 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6157 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6158 *dt
= vect_external_def
;
6161 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6162 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6165 if (*dt
== vect_unknown_def_type
6167 && *dt
== vect_double_reduction_def
6168 && gimple_code (stmt
) != GIMPLE_PHI
))
6170 if (vect_print_dump_info (REPORT_DETAILS
))
6171 fprintf (vect_dump
, "Unsupported pattern.");
6175 if (vect_print_dump_info (REPORT_DETAILS
))
6176 fprintf (vect_dump
, "type of def: %d.",*dt
);
6178 switch (gimple_code (*def_stmt
))
6181 *def
= gimple_phi_result (*def_stmt
);
6185 *def
= gimple_assign_lhs (*def_stmt
);
6189 *def
= gimple_call_lhs (*def_stmt
);
6194 if (vect_print_dump_info (REPORT_DETAILS
))
6195 fprintf (vect_dump
, "unsupported defining stmt: ");
6202 /* Function vect_is_simple_use_1.
6204 Same as vect_is_simple_use_1 but also determines the vector operand
6205 type of OPERAND and stores it to *VECTYPE. If the definition of
6206 OPERAND is vect_uninitialized_def, vect_constant_def or
6207 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6208 is responsible to compute the best suited vector type for the
6212 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6213 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6214 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6216 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6220 /* Now get a vector type if the def is internal, otherwise supply
6221 NULL_TREE and leave it up to the caller to figure out a proper
6222 type for the use stmt. */
6223 if (*dt
== vect_internal_def
6224 || *dt
== vect_induction_def
6225 || *dt
== vect_reduction_def
6226 || *dt
== vect_double_reduction_def
6227 || *dt
== vect_nested_cycle
)
6229 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6231 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6232 && !STMT_VINFO_RELEVANT (stmt_info
)
6233 && !STMT_VINFO_LIVE_P (stmt_info
))
6234 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6236 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6237 gcc_assert (*vectype
!= NULL_TREE
);
6239 else if (*dt
== vect_uninitialized_def
6240 || *dt
== vect_constant_def
6241 || *dt
== vect_external_def
)
6242 *vectype
= NULL_TREE
;
6250 /* Function supportable_widening_operation
6252 Check whether an operation represented by the code CODE is a
6253 widening operation that is supported by the target platform in
6254 vector form (i.e., when operating on arguments of type VECTYPE_IN
6255 producing a result of type VECTYPE_OUT).
6257 Widening operations we currently support are NOP (CONVERT), FLOAT
6258 and WIDEN_MULT. This function checks if these operations are supported
6259 by the target platform either directly (via vector tree-codes), or via
6263 - CODE1 and CODE2 are codes of vector operations to be used when
6264 vectorizing the operation, if available.
6265 - DECL1 and DECL2 are decls of target builtin functions to be used
6266 when vectorizing the operation, if available. In this case,
6267 CODE1 and CODE2 are CALL_EXPR.
6268 - MULTI_STEP_CVT determines the number of required intermediate steps in
6269 case of multi-step conversion (like char->short->int - in that case
6270 MULTI_STEP_CVT will be 1).
6271 - INTERM_TYPES contains the intermediate type required to perform the
6272 widening operation (short in the above example). */
6275 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6276 tree vectype_out
, tree vectype_in
,
6277 tree
*decl1 ATTRIBUTE_UNUSED
,
6278 tree
*decl2 ATTRIBUTE_UNUSED
,
6279 enum tree_code
*code1
, enum tree_code
*code2
,
6280 int *multi_step_cvt
,
6281 VEC (tree
, heap
) **interm_types
)
6283 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6284 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6285 struct loop
*vect_loop
= NULL
;
6286 enum machine_mode vec_mode
;
6287 enum insn_code icode1
, icode2
;
6288 optab optab1
, optab2
;
6289 tree vectype
= vectype_in
;
6290 tree wide_vectype
= vectype_out
;
6291 enum tree_code c1
, c2
;
6293 tree prev_type
, intermediate_type
;
6294 enum machine_mode intermediate_mode
, prev_mode
;
6295 optab optab3
, optab4
;
6297 *multi_step_cvt
= 0;
6299 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6303 case WIDEN_MULT_EXPR
:
6304 /* The result of a vectorized widening operation usually requires
6305 two vectors (because the widened results do not fit into one vector).
6306 The generated vector results would normally be expected to be
6307 generated in the same order as in the original scalar computation,
6308 i.e. if 8 results are generated in each vector iteration, they are
6309 to be organized as follows:
6310 vect1: [res1,res2,res3,res4],
6311 vect2: [res5,res6,res7,res8].
6313 However, in the special case that the result of the widening
6314 operation is used in a reduction computation only, the order doesn't
6315 matter (because when vectorizing a reduction we change the order of
6316 the computation). Some targets can take advantage of this and
6317 generate more efficient code. For example, targets like Altivec,
6318 that support widen_mult using a sequence of {mult_even,mult_odd}
6319 generate the following vectors:
6320 vect1: [res1,res3,res5,res7],
6321 vect2: [res2,res4,res6,res8].
6323 When vectorizing outer-loops, we execute the inner-loop sequentially
6324 (each vectorized inner-loop iteration contributes to VF outer-loop
6325 iterations in parallel). We therefore don't allow to change the
6326 order of the computation in the inner-loop during outer-loop
6328 /* TODO: Another case in which order doesn't *really* matter is when we
6329 widen and then contract again, e.g. (short)((int)x * y >> 8).
6330 Normally, pack_trunc performs an even/odd permute, whereas the
6331 repack from an even/odd expansion would be an interleave, which
6332 would be significantly simpler for e.g. AVX2. */
6333 /* In any case, in order to avoid duplicating the code below, recurse
6334 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6335 are properly set up for the caller. If we fail, we'll continue with
6336 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6338 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6339 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6340 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6341 stmt
, vectype_out
, vectype_in
,
6342 NULL
, NULL
, code1
, code2
,
6343 multi_step_cvt
, interm_types
))
6345 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6346 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6349 case VEC_WIDEN_MULT_EVEN_EXPR
:
6350 /* Support the recursion induced just above. */
6351 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6352 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6355 case WIDEN_LSHIFT_EXPR
:
6356 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6357 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6361 c1
= VEC_UNPACK_LO_EXPR
;
6362 c2
= VEC_UNPACK_HI_EXPR
;
6366 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6367 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6370 case FIX_TRUNC_EXPR
:
6371 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6372 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6373 computing the operation. */
6380 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6382 enum tree_code ctmp
= c1
;
6387 if (code
== FIX_TRUNC_EXPR
)
6389 /* The signedness is determined from output operand. */
6390 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6391 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6395 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6396 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6399 if (!optab1
|| !optab2
)
6402 vec_mode
= TYPE_MODE (vectype
);
6403 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6404 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6410 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6411 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6414 /* Check if it's a multi-step conversion that can be done using intermediate
6417 prev_type
= vectype
;
6418 prev_mode
= vec_mode
;
6420 if (!CONVERT_EXPR_CODE_P (code
))
6423 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6424 intermediate steps in promotion sequence. We try
6425 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6427 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6428 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6430 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6432 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6433 TYPE_UNSIGNED (prev_type
));
6434 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6435 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6437 if (!optab3
|| !optab4
6438 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6439 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6440 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6441 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6442 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6443 == CODE_FOR_nothing
)
6444 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6445 == CODE_FOR_nothing
))
6448 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6449 (*multi_step_cvt
)++;
6451 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6452 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6455 prev_type
= intermediate_type
;
6456 prev_mode
= intermediate_mode
;
6459 VEC_free (tree
, heap
, *interm_types
);
6464 /* Function supportable_narrowing_operation
6466 Check whether an operation represented by the code CODE is a
6467 narrowing operation that is supported by the target platform in
6468 vector form (i.e., when operating on arguments of type VECTYPE_IN
6469 and producing a result of type VECTYPE_OUT).
6471 Narrowing operations we currently support are NOP (CONVERT) and
6472 FIX_TRUNC. This function checks if these operations are supported by
6473 the target platform directly via vector tree-codes.
6476 - CODE1 is the code of a vector operation to be used when
6477 vectorizing the operation, if available.
6478 - MULTI_STEP_CVT determines the number of required intermediate steps in
6479 case of multi-step conversion (like int->short->char - in that case
6480 MULTI_STEP_CVT will be 1).
6481 - INTERM_TYPES contains the intermediate type required to perform the
6482 narrowing operation (short in the above example). */
6485 supportable_narrowing_operation (enum tree_code code
,
6486 tree vectype_out
, tree vectype_in
,
6487 enum tree_code
*code1
, int *multi_step_cvt
,
6488 VEC (tree
, heap
) **interm_types
)
6490 enum machine_mode vec_mode
;
6491 enum insn_code icode1
;
6492 optab optab1
, interm_optab
;
6493 tree vectype
= vectype_in
;
6494 tree narrow_vectype
= vectype_out
;
6496 tree intermediate_type
;
6497 enum machine_mode intermediate_mode
, prev_mode
;
6501 *multi_step_cvt
= 0;
6505 c1
= VEC_PACK_TRUNC_EXPR
;
6508 case FIX_TRUNC_EXPR
:
6509 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6513 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6514 tree code and optabs used for computing the operation. */
6521 if (code
== FIX_TRUNC_EXPR
)
6522 /* The signedness is determined from output operand. */
6523 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6525 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6530 vec_mode
= TYPE_MODE (vectype
);
6531 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6536 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6539 /* Check if it's a multi-step conversion that can be done using intermediate
6541 prev_mode
= vec_mode
;
6542 if (code
== FIX_TRUNC_EXPR
)
6543 uns
= TYPE_UNSIGNED (vectype_out
);
6545 uns
= TYPE_UNSIGNED (vectype
);
6547 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6548 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6549 costly than signed. */
6550 if (code
== FIX_TRUNC_EXPR
&& uns
)
6552 enum insn_code icode2
;
6555 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6557 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6558 if (interm_optab
!= NULL
6559 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6560 && insn_data
[icode1
].operand
[0].mode
6561 == insn_data
[icode2
].operand
[0].mode
)
6564 optab1
= interm_optab
;
6569 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6570 intermediate steps in promotion sequence. We try
6571 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6572 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6573 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6575 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6577 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6579 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6582 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6583 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6584 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6585 == CODE_FOR_nothing
))
6588 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6589 (*multi_step_cvt
)++;
6591 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6594 prev_mode
= intermediate_mode
;
6595 optab1
= interm_optab
;
6598 VEC_free (tree
, heap
, *interm_types
);