]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
pretty-print.h (pp_unsigned_wide_integer): New.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
4dee9718 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
62f7fd21 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
718f9c0f 40#include "diagnostic-core.h"
ebfd146a
IR
41#include "tree-vectorizer.h"
42#include "langhooks.h"
43
44
272c6793
RS
45/* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47static tree
48create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49{
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52}
53
54/* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59static tree
60read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62{
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80}
81
82/* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86static void
87write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89{
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100}
101
102/* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106static tree
107create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108{
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119}
120
ebfd146a
IR
121/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123/* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127static void
128vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
83197f37
IR
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
ebfd146a
IR
131{
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 135 gimple pattern_stmt;
ebfd146a
IR
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
83197f37
IR
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
ebfd146a
IR
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
83197f37
IR
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
ebfd146a 153
83197f37
IR
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
ebfd146a 158
83197f37
IR
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
163 {
164 if (is_gimple_debug (USE_STMT (use_p)))
165 continue;
166 use_stmt = USE_STMT (use_p);
ebfd146a 167
83197f37
IR
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
170 {
171 found = true;
172 break;
173 }
174 }
175 }
176
177 if (!found)
178 {
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
183
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
185
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
188 " relevant/live.");
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
193 stmt = pattern_stmt;
194 }
ebfd146a
IR
195 }
196
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
200
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
203 {
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
206 return;
207 }
208
209 VEC_safe_push (gimple, heap, *worklist, stmt);
210}
211
212
213/* Function vect_stmt_relevant_p.
214
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
217
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
222
223 CHECKME: what other side effects would the vectorizer allow? */
224
225static bool
226vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
228{
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 ssa_op_iter op_iter;
231 imm_use_iterator imm_iter;
232 use_operand_p use_p;
233 def_operand_p def_p;
234
8644a673 235 *relevant = vect_unused_in_scope;
ebfd146a
IR
236 *live_p = false;
237
238 /* cond stmt other than loop exit cond. */
b8698a0f
L
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
8644a673 242 *relevant = vect_used_in_scope;
ebfd146a
IR
243
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 246 if (gimple_vdef (stmt))
ebfd146a
IR
247 {
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 250 *relevant = vect_used_in_scope;
ebfd146a
IR
251 }
252
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
255 {
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
257 {
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
260 {
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
263
3157b0c2
AO
264 if (is_gimple_debug (USE_STMT (use_p)))
265 continue;
266
ebfd146a
IR
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
271
272 *live_p = true;
273 }
274 }
275 }
276
277 return (*live_p || *relevant);
278}
279
280
b8698a0f 281/* Function exist_non_indexing_operands_for_use_p
ebfd146a 282
ff802fa1 283 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
284 used in STMT for anything other than indexing an array. */
285
286static bool
287exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
288{
289 tree operand;
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 291
ff802fa1 292 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
296 return true;
59a05b0c 297
ebfd146a
IR
298 /* STMT has a data_ref. FORNOW this means that its of one of
299 the following forms:
300 -1- ARRAY_REF = var
301 -2- var = ARRAY_REF
302 (This should have been verified in analyze_data_refs).
303
304 'var' in the second case corresponds to a def, not a use,
b8698a0f 305 so USE cannot correspond to any operands that are not used
ebfd146a
IR
306 for array indexing.
307
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
ebfd146a
IR
310
311 if (!gimple_assign_copy_p (stmt))
312 return false;
59a05b0c
EB
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
314 return false;
ebfd146a 315 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
316 if (TREE_CODE (operand) != SSA_NAME)
317 return false;
318
319 if (operand == use)
320 return true;
321
322 return false;
323}
324
325
b8698a0f 326/*
ebfd146a
IR
327 Function process_use.
328
329 Inputs:
330 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 332 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a
IR
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
334
335 Outputs:
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
340 Exceptions:
341 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 342 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 343 of the respective DEF_STMT is left unchanged.
b8698a0f
L
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
348
349 Return true if everything is as expected. Return false otherwise. */
350
351static bool
b8698a0f 352process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
ebfd146a
IR
353 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
354{
355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
357 stmt_vec_info dstmt_vinfo;
358 basic_block bb, def_bb;
359 tree def;
360 gimple def_stmt;
361 enum vect_def_type dt;
362
b8698a0f 363 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a
IR
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use, stmt))
366 return true;
367
a70d6342 368 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 369 {
8644a673 370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
371 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
372 return false;
373 }
374
375 if (!def_stmt || gimple_nop_p (def_stmt))
376 return true;
377
378 def_bb = gimple_bb (def_stmt);
379 if (!flow_bb_inside_loop_p (loop, def_bb))
380 {
381 if (vect_print_dump_info (REPORT_DETAILS))
382 fprintf (vect_dump, "def_stmt is out of loop.");
383 return true;
384 }
385
b8698a0f
L
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo = vinfo_for_stmt (def_stmt);
392 bb = gimple_bb (stmt);
393 if (gimple_code (stmt) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
395 && gimple_code (def_stmt) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
397 && bb->loop_father == def_bb->loop_father)
398 {
399 if (vect_print_dump_info (REPORT_DETAILS))
400 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
402 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 405 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
406 return true;
407 }
408
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
411 d = def_stmt
412 inner-loop:
413 stmt # use (d)
414 outer-loop-tail-bb:
415 ... */
416 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
417 {
418 if (vect_print_dump_info (REPORT_DETAILS))
419 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 420
ebfd146a
IR
421 switch (relevant)
422 {
8644a673 423 case vect_unused_in_scope:
7c5222ff
IR
424 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
425 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 426 break;
7c5222ff 427
ebfd146a 428 case vect_used_in_outer_by_reduction:
7c5222ff 429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
430 relevant = vect_used_by_reduction;
431 break;
7c5222ff 432
ebfd146a 433 case vect_used_in_outer:
7c5222ff 434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 435 relevant = vect_used_in_scope;
ebfd146a 436 break;
7c5222ff 437
8644a673 438 case vect_used_in_scope:
ebfd146a
IR
439 break;
440
441 default:
442 gcc_unreachable ();
b8698a0f 443 }
ebfd146a
IR
444 }
445
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
448 ...
449 inner-loop:
450 d = def_stmt
06066f92 451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
452 stmt # use (d) */
453 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
454 {
455 if (vect_print_dump_info (REPORT_DETAILS))
456 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 457
ebfd146a
IR
458 switch (relevant)
459 {
8644a673 460 case vect_unused_in_scope:
b8698a0f 461 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 462 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 463 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
464 break;
465
ebfd146a
IR
466 case vect_used_by_reduction:
467 relevant = vect_used_in_outer_by_reduction;
468 break;
469
8644a673 470 case vect_used_in_scope:
ebfd146a
IR
471 relevant = vect_used_in_outer;
472 break;
473
474 default:
475 gcc_unreachable ();
476 }
477 }
478
83197f37
IR
479 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
480 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
481 return true;
482}
483
484
485/* Function vect_mark_stmts_to_be_vectorized.
486
487 Not all stmts in the loop need to be vectorized. For example:
488
489 for i...
490 for j...
491 1. T0 = i + j
492 2. T1 = a[T0]
493
494 3. j = j + 1
495
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
498
499 This pass detects such stmts. */
500
501bool
502vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
503{
504 VEC(gimple,heap) *worklist;
505 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
506 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
507 unsigned int nbbs = loop->num_nodes;
508 gimple_stmt_iterator si;
509 gimple stmt;
510 unsigned int i;
511 stmt_vec_info stmt_vinfo;
512 basic_block bb;
513 gimple phi;
514 bool live_p;
06066f92
IR
515 enum vect_relevant relevant, tmp_relevant;
516 enum vect_def_type def_type;
ebfd146a
IR
517
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
520
521 worklist = VEC_alloc (gimple, heap, 64);
522
523 /* 1. Init worklist. */
524 for (i = 0; i < nbbs; i++)
525 {
526 bb = bbs[i];
527 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 528 {
ebfd146a
IR
529 phi = gsi_stmt (si);
530 if (vect_print_dump_info (REPORT_DETAILS))
531 {
532 fprintf (vect_dump, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
534 }
535
536 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 537 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
538 }
539 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
540 {
541 stmt = gsi_stmt (si);
542 if (vect_print_dump_info (REPORT_DETAILS))
543 {
544 fprintf (vect_dump, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 546 }
ebfd146a
IR
547
548 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 549 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
550 }
551 }
552
553 /* 2. Process_worklist */
554 while (VEC_length (gimple, worklist) > 0)
555 {
556 use_operand_p use_p;
557 ssa_op_iter iter;
558
559 stmt = VEC_pop (gimple, worklist);
560 if (vect_print_dump_info (REPORT_DETAILS))
561 {
562 fprintf (vect_dump, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
564 }
565
b8698a0f
L
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
568 liveness and relevance properties of STMT. */
569 stmt_vinfo = vinfo_for_stmt (stmt);
570 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
571 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
572
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
577
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
580 live_p = false
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 583 those that are used by a reduction computation, and those that are
ff802fa1 584 (also) used by a regular computation. This allows us later on to
b8698a0f 585 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 586 order of the results that they produce does not have to be kept. */
ebfd146a 587
06066f92
IR
588 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
589 tmp_relevant = relevant;
590 switch (def_type)
ebfd146a 591 {
06066f92
IR
592 case vect_reduction_def:
593 switch (tmp_relevant)
594 {
595 case vect_unused_in_scope:
596 relevant = vect_used_by_reduction;
597 break;
598
599 case vect_used_by_reduction:
600 if (gimple_code (stmt) == GIMPLE_PHI)
601 break;
602 /* fall through */
603
604 default:
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "unsupported use of reduction.");
607
608 VEC_free (gimple, heap, worklist);
609 return false;
610 }
611
b8698a0f 612 live_p = false;
06066f92 613 break;
b8698a0f 614
06066f92
IR
615 case vect_nested_cycle:
616 if (tmp_relevant != vect_unused_in_scope
617 && tmp_relevant != vect_used_in_outer_by_reduction
618 && tmp_relevant != vect_used_in_outer)
619 {
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "unsupported use of nested cycle.");
7c5222ff 622
06066f92
IR
623 VEC_free (gimple, heap, worklist);
624 return false;
625 }
7c5222ff 626
b8698a0f
L
627 live_p = false;
628 break;
629
06066f92
IR
630 case vect_double_reduction_def:
631 if (tmp_relevant != vect_unused_in_scope
632 && tmp_relevant != vect_used_by_reduction)
633 {
7c5222ff 634 if (vect_print_dump_info (REPORT_DETAILS))
06066f92 635 fprintf (vect_dump, "unsupported use of double reduction.");
7c5222ff
IR
636
637 VEC_free (gimple, heap, worklist);
638 return false;
06066f92
IR
639 }
640
641 live_p = false;
b8698a0f 642 break;
7c5222ff 643
06066f92
IR
644 default:
645 break;
7c5222ff 646 }
b8698a0f 647
9d5e7640
IR
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
649 {
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt))
654 {
655 for (i = 1; i < gimple_num_ops (stmt); i++)
656 {
657 tree op = gimple_op (stmt, i);
658 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
659 &worklist))
660 {
661 VEC_free (gimple, heap, worklist);
662 return false;
663 }
664 }
665 }
666 else if (is_gimple_call (stmt))
667 {
668 for (i = 0; i < gimple_call_num_args (stmt); i++)
669 {
670 tree arg = gimple_call_arg (stmt, i);
671 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
672 &worklist))
673 {
674 VEC_free (gimple, heap, worklist);
675 return false;
676 }
677 }
678 }
679 }
680 else
681 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
682 {
683 tree op = USE_FROM_PTR (use_p);
684 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
685 &worklist))
686 {
687 VEC_free (gimple, heap, worklist);
688 return false;
689 }
690 }
ebfd146a
IR
691 } /* while worklist */
692
693 VEC_free (gimple, heap, worklist);
694 return true;
695}
696
697
720f5239
IR
698/* Get cost by calling cost target builtin. */
699
700static inline
701int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
702{
703 tree dummy_type = NULL;
704 int dummy = 0;
705
706 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
707 dummy_type, dummy);
708}
709
ff802fa1
IR
710
711/* Get cost for STMT. */
712
ebfd146a
IR
713int
714cost_for_stmt (gimple stmt)
715{
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
717
718 switch (STMT_VINFO_TYPE (stmt_info))
719 {
720 case load_vec_info_type:
720f5239 721 return vect_get_stmt_cost (scalar_load);
ebfd146a 722 case store_vec_info_type:
720f5239 723 return vect_get_stmt_cost (scalar_store);
ebfd146a
IR
724 case op_vec_info_type:
725 case condition_vec_info_type:
726 case assignment_vec_info_type:
727 case reduc_vec_info_type:
728 case induc_vec_info_type:
729 case type_promotion_vec_info_type:
730 case type_demotion_vec_info_type:
731 case type_conversion_vec_info_type:
732 case call_vec_info_type:
720f5239 733 return vect_get_stmt_cost (scalar_stmt);
ebfd146a
IR
734 case undef_vec_info_type:
735 default:
736 gcc_unreachable ();
737 }
738}
739
b8698a0f 740/* Function vect_model_simple_cost.
ebfd146a 741
b8698a0f 742 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
743 single op. Right now, this does not account for multiple insns that could
744 be generated for the single vector op. We will handle that shortly. */
745
746void
b8698a0f 747vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
748 enum vect_def_type *dt, slp_tree slp_node)
749{
750 int i;
751 int inside_cost = 0, outside_cost = 0;
752
753 /* The SLP costs were already calculated during SLP tree build. */
754 if (PURE_SLP_STMT (stmt_info))
755 return;
756
720f5239 757 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
758
759 /* FORNOW: Assuming maximum 2 args per stmts. */
760 for (i = 0; i < 2; i++)
761 {
8644a673 762 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
720f5239 763 outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a 764 }
b8698a0f 765
ebfd146a
IR
766 if (vect_print_dump_info (REPORT_COST))
767 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
768 "outside_cost = %d .", inside_cost, outside_cost);
769
770 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
771 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
772 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
773}
774
775
b8698a0f
L
776/* Function vect_cost_strided_group_size
777
ebfd146a
IR
778 For strided load or store, return the group_size only if it is the first
779 load or store of a group, else return 1. This ensures that group size is
780 only returned once per group. */
781
782static int
783vect_cost_strided_group_size (stmt_vec_info stmt_info)
784{
e14c1050 785 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
786
787 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 788 return GROUP_SIZE (stmt_info);
ebfd146a
IR
789
790 return 1;
791}
792
793
794/* Function vect_model_store_cost
795
796 Models cost for stores. In the case of strided accesses, one access
797 has the overhead of the strided access attributed to it. */
798
799void
b8698a0f 800vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793
RS
801 bool store_lanes_p, enum vect_def_type dt,
802 slp_tree slp_node)
ebfd146a
IR
803{
804 int group_size;
720f5239
IR
805 unsigned int inside_cost = 0, outside_cost = 0;
806 struct data_reference *first_dr;
807 gimple first_stmt;
ebfd146a
IR
808
809 /* The SLP costs were already calculated during SLP tree build. */
810 if (PURE_SLP_STMT (stmt_info))
811 return;
812
8644a673 813 if (dt == vect_constant_def || dt == vect_external_def)
720f5239 814 outside_cost = vect_get_stmt_cost (scalar_to_vec);
ebfd146a
IR
815
816 /* Strided access? */
e14c1050 817 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
720f5239
IR
818 {
819 if (slp_node)
820 {
821 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
822 group_size = 1;
823 }
824 else
825 {
e14c1050 826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
720f5239
IR
827 group_size = vect_cost_strided_group_size (stmt_info);
828 }
829
830 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
831 }
ebfd146a
IR
832 /* Not a strided access. */
833 else
720f5239
IR
834 {
835 group_size = 1;
836 first_dr = STMT_VINFO_DATA_REF (stmt_info);
837 }
ebfd146a 838
272c6793
RS
839 /* We assume that the cost of a single store-lanes instruction is
840 equivalent to the cost of GROUP_SIZE separate stores. If a strided
841 access is instead being provided by a permute-and-store operation,
842 include the cost of the permutes. */
843 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
844 {
845 /* Uses a high and low interleave operation for each needed permute. */
b8698a0f 846 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 847 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
848
849 if (vect_print_dump_info (REPORT_COST))
850 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
851 group_size);
852
853 }
854
855 /* Costs of the stores. */
720f5239 856 vect_get_store_cost (first_dr, ncopies, &inside_cost);
ebfd146a
IR
857
858 if (vect_print_dump_info (REPORT_COST))
859 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
860 "outside_cost = %d .", inside_cost, outside_cost);
861
862 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
863 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
864 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
865}
866
867
720f5239
IR
868/* Calculate cost of DR's memory access. */
869void
870vect_get_store_cost (struct data_reference *dr, int ncopies,
871 unsigned int *inside_cost)
872{
873 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
874
875 switch (alignment_support_scheme)
876 {
877 case dr_aligned:
878 {
879 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
880
881 if (vect_print_dump_info (REPORT_COST))
882 fprintf (vect_dump, "vect_model_store_cost: aligned.");
883
884 break;
885 }
886
887 case dr_unaligned_supported:
888 {
889 gimple stmt = DR_STMT (dr);
890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
891 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
892
893 /* Here, we assign an additional cost for the unaligned store. */
894 *inside_cost += ncopies
895 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
896 vectype, DR_MISALIGNMENT (dr));
897
898 if (vect_print_dump_info (REPORT_COST))
899 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
900 "hardware.");
901
902 break;
903 }
904
905 default:
906 gcc_unreachable ();
907 }
908}
909
910
ebfd146a
IR
911/* Function vect_model_load_cost
912
913 Models cost for loads. In the case of strided accesses, the last access
914 has the overhead of the strided access attributed to it. Since unaligned
b8698a0f 915 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
916 access scheme chosen. */
917
918void
272c6793
RS
919vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
920 slp_tree slp_node)
ebfd146a
IR
921{
922 int group_size;
ebfd146a
IR
923 gimple first_stmt;
924 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
720f5239 925 unsigned int inside_cost = 0, outside_cost = 0;
ebfd146a
IR
926
927 /* The SLP costs were already calculated during SLP tree build. */
928 if (PURE_SLP_STMT (stmt_info))
929 return;
930
931 /* Strided accesses? */
e14c1050
IR
932 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
933 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a
IR
934 {
935 group_size = vect_cost_strided_group_size (stmt_info);
936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
937 }
938 /* Not a strided access. */
939 else
940 {
941 group_size = 1;
942 first_dr = dr;
943 }
944
272c6793
RS
945 /* We assume that the cost of a single load-lanes instruction is
946 equivalent to the cost of GROUP_SIZE separate loads. If a strided
947 access is instead being provided by a load-and-permute operation,
948 include the cost of the permutes. */
949 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
950 {
951 /* Uses an even and odd extract operations for each needed permute. */
952 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 953 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
954
955 if (vect_print_dump_info (REPORT_COST))
956 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
957 group_size);
ebfd146a
IR
958 }
959
960 /* The loads themselves. */
720f5239 961 vect_get_load_cost (first_dr, ncopies,
e14c1050
IR
962 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
963 || slp_node),
720f5239
IR
964 &inside_cost, &outside_cost);
965
966 if (vect_print_dump_info (REPORT_COST))
967 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
968 "outside_cost = %d .", inside_cost, outside_cost);
969
970 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
971 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
972 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
973}
974
975
976/* Calculate cost of DR's memory access. */
977void
978vect_get_load_cost (struct data_reference *dr, int ncopies,
979 bool add_realign_cost, unsigned int *inside_cost,
980 unsigned int *outside_cost)
981{
982 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
983
984 switch (alignment_support_scheme)
ebfd146a
IR
985 {
986 case dr_aligned:
987 {
9940b13c 988 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
ebfd146a
IR
989
990 if (vect_print_dump_info (REPORT_COST))
991 fprintf (vect_dump, "vect_model_load_cost: aligned.");
992
993 break;
994 }
995 case dr_unaligned_supported:
996 {
720f5239
IR
997 gimple stmt = DR_STMT (dr);
998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1000
720f5239
IR
1001 /* Here, we assign an additional cost for the unaligned load. */
1002 *inside_cost += ncopies
1003 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1004 vectype, DR_MISALIGNMENT (dr));
ebfd146a
IR
1005 if (vect_print_dump_info (REPORT_COST))
1006 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1007 "hardware.");
1008
1009 break;
1010 }
1011 case dr_explicit_realign:
1012 {
720f5239
IR
1013 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1014 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
1015
1016 /* FIXME: If the misalignment remains fixed across the iterations of
1017 the containing loop, the following cost should be added to the
1018 outside costs. */
1019 if (targetm.vectorize.builtin_mask_for_load)
720f5239 1020 *inside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
1021
1022 break;
1023 }
1024 case dr_explicit_realign_optimized:
1025 {
1026 if (vect_print_dump_info (REPORT_COST))
1027 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1028 "pipelined.");
1029
1030 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1031 load, and possibly a mask operation to "prime" the loop. However,
ebfd146a
IR
1032 if this is an access in a group of loads, which provide strided
1033 access, then the above cost should only be considered for one
ff802fa1 1034 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1035 and a realignment op. */
1036
720f5239 1037 if (add_realign_cost)
ebfd146a 1038 {
720f5239 1039 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
ebfd146a 1040 if (targetm.vectorize.builtin_mask_for_load)
720f5239 1041 *outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
1042 }
1043
720f5239
IR
1044 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1045 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
1046 break;
1047 }
1048
1049 default:
1050 gcc_unreachable ();
1051 }
ebfd146a
IR
1052}
1053
1054
1055/* Function vect_init_vector.
1056
1057 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
ff802fa1
IR
1058 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1059 is not NULL. Otherwise, place the initialization at the loop preheader.
b8698a0f 1060 Return the DEF of INIT_STMT.
ebfd146a
IR
1061 It will be used in the vectorization of STMT. */
1062
1063tree
1064vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1065 gimple_stmt_iterator *gsi)
1066{
1067 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1068 tree new_var;
1069 gimple init_stmt;
1070 tree vec_oprnd;
1071 edge pe;
1072 tree new_temp;
1073 basic_block new_bb;
b8698a0f 1074
ebfd146a 1075 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
b8698a0f 1076 add_referenced_var (new_var);
ebfd146a
IR
1077 init_stmt = gimple_build_assign (new_var, vector_var);
1078 new_temp = make_ssa_name (new_var, init_stmt);
1079 gimple_assign_set_lhs (init_stmt, new_temp);
1080
1081 if (gsi)
1082 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1083 else
1084 {
1085 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1086
a70d6342
IR
1087 if (loop_vinfo)
1088 {
1089 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1090
1091 if (nested_in_vect_loop_p (loop, stmt))
1092 loop = loop->inner;
b8698a0f 1093
a70d6342
IR
1094 pe = loop_preheader_edge (loop);
1095 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1096 gcc_assert (!new_bb);
1097 }
1098 else
1099 {
1100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1101 basic_block bb;
1102 gimple_stmt_iterator gsi_bb_start;
1103
1104 gcc_assert (bb_vinfo);
1105 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1106 gsi_bb_start = gsi_after_labels (bb);
a70d6342
IR
1107 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1108 }
ebfd146a
IR
1109 }
1110
1111 if (vect_print_dump_info (REPORT_DETAILS))
1112 {
1113 fprintf (vect_dump, "created new init_stmt: ");
1114 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1115 }
1116
1117 vec_oprnd = gimple_assign_lhs (init_stmt);
1118 return vec_oprnd;
1119}
1120
a70d6342 1121
ebfd146a
IR
1122/* Function vect_get_vec_def_for_operand.
1123
ff802fa1 1124 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1125 used in the vectorized stmt for STMT.
1126
1127 In the case that OP is an SSA_NAME which is defined in the loop, then
1128 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1129
1130 In case OP is an invariant or constant, a new stmt that creates a vector def
1131 needs to be introduced. */
1132
1133tree
1134vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1135{
1136 tree vec_oprnd;
1137 gimple vec_stmt;
1138 gimple def_stmt;
1139 stmt_vec_info def_stmt_info = NULL;
1140 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1141 unsigned int nunits;
ebfd146a
IR
1142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1143 tree vec_inv;
1144 tree vec_cst;
e7e9eb2f 1145 tree t = NULL_TREE;
ebfd146a 1146 tree def;
e7e9eb2f 1147 int i;
ebfd146a
IR
1148 enum vect_def_type dt;
1149 bool is_simple_use;
1150 tree vector_type;
1151
1152 if (vect_print_dump_info (REPORT_DETAILS))
1153 {
1154 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1155 print_generic_expr (vect_dump, op, TDF_SLIM);
1156 }
1157
b8698a0f 1158 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
a70d6342 1159 &dt);
ebfd146a
IR
1160 gcc_assert (is_simple_use);
1161 if (vect_print_dump_info (REPORT_DETAILS))
1162 {
1163 if (def)
1164 {
1165 fprintf (vect_dump, "def = ");
1166 print_generic_expr (vect_dump, def, TDF_SLIM);
1167 }
1168 if (def_stmt)
1169 {
1170 fprintf (vect_dump, " def_stmt = ");
1171 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1172 }
1173 }
1174
1175 switch (dt)
1176 {
1177 /* Case 1: operand is a constant. */
1178 case vect_constant_def:
1179 {
7569a6cc
RG
1180 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1181 gcc_assert (vector_type);
9dc3f7de 1182 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1183
b8698a0f 1184 if (scalar_def)
ebfd146a
IR
1185 *scalar_def = op;
1186
1187 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1188 if (vect_print_dump_info (REPORT_DETAILS))
1189 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1190
b9acc9f1 1191 vec_cst = build_vector_from_val (vector_type, op);
7569a6cc 1192 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
ebfd146a
IR
1193 }
1194
1195 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1196 case vect_external_def:
ebfd146a
IR
1197 {
1198 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1199 gcc_assert (vector_type);
1200 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1201
b8698a0f 1202 if (scalar_def)
ebfd146a
IR
1203 *scalar_def = def;
1204
1205 /* Create 'vec_inv = {inv,inv,..,inv}' */
1206 if (vect_print_dump_info (REPORT_DETAILS))
1207 fprintf (vect_dump, "Create vector_inv.");
1208
e7e9eb2f
NF
1209 for (i = nunits - 1; i >= 0; --i)
1210 {
1211 t = tree_cons (NULL_TREE, def, t);
1212 }
1213
1214 /* FIXME: use build_constructor directly. */
1215 vec_inv = build_constructor_from_list (vector_type, t);
ebfd146a
IR
1216 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1217 }
1218
1219 /* Case 3: operand is defined inside the loop. */
8644a673 1220 case vect_internal_def:
ebfd146a 1221 {
b8698a0f 1222 if (scalar_def)
ebfd146a
IR
1223 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1224
1225 /* Get the def from the vectorized stmt. */
1226 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1227
ebfd146a 1228 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1229 /* Get vectorized pattern statement. */
1230 if (!vec_stmt
1231 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1232 && !STMT_VINFO_RELEVANT (def_stmt_info))
1233 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1234 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1235 gcc_assert (vec_stmt);
1236 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1237 vec_oprnd = PHI_RESULT (vec_stmt);
1238 else if (is_gimple_call (vec_stmt))
1239 vec_oprnd = gimple_call_lhs (vec_stmt);
1240 else
1241 vec_oprnd = gimple_assign_lhs (vec_stmt);
1242 return vec_oprnd;
1243 }
1244
1245 /* Case 4: operand is defined by a loop header phi - reduction */
1246 case vect_reduction_def:
06066f92 1247 case vect_double_reduction_def:
7c5222ff 1248 case vect_nested_cycle:
ebfd146a
IR
1249 {
1250 struct loop *loop;
1251
1252 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1253 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1254
1255 /* Get the def before the loop */
1256 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1257 return get_initial_def_for_reduction (stmt, op, scalar_def);
1258 }
1259
1260 /* Case 5: operand is defined by loop-header phi - induction. */
1261 case vect_induction_def:
1262 {
1263 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1264
1265 /* Get the def from the vectorized stmt. */
1266 def_stmt_info = vinfo_for_stmt (def_stmt);
1267 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1268 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1269 vec_oprnd = PHI_RESULT (vec_stmt);
1270 else
1271 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1272 return vec_oprnd;
1273 }
1274
1275 default:
1276 gcc_unreachable ();
1277 }
1278}
1279
1280
1281/* Function vect_get_vec_def_for_stmt_copy
1282
ff802fa1 1283 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1284 vectorized stmt to be created (by the caller to this function) is a "copy"
1285 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1286 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1287 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1288 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1289 DT is the type of the vector def VEC_OPRND.
1290
1291 Context:
1292 In case the vectorization factor (VF) is bigger than the number
1293 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1294 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1295 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1296 smallest data-type determines the VF, and as a result, when vectorizing
1297 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1298 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1299 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1300 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1301 which VF=16 and nunits=4, so the number of copies required is 4):
1302
1303 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1304
ebfd146a
IR
1305 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1306 VS1.1: vx.1 = memref1 VS1.2
1307 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1308 VS1.3: vx.3 = memref3
ebfd146a
IR
1309
1310 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1311 VSnew.1: vz1 = vx.1 + ... VSnew.2
1312 VSnew.2: vz2 = vx.2 + ... VSnew.3
1313 VSnew.3: vz3 = vx.3 + ...
1314
1315 The vectorization of S1 is explained in vectorizable_load.
1316 The vectorization of S2:
b8698a0f
L
1317 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1318 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1319 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1320 returns the vector-def 'vx.0'.
1321
b8698a0f
L
1322 To create the remaining copies of the vector-stmt (VSnew.j), this
1323 function is called to get the relevant vector-def for each operand. It is
1324 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1325 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1326
b8698a0f
L
1327 For example, to obtain the vector-def 'vx.1' in order to create the
1328 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1329 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1330 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1331 and return its def ('vx.1').
1332 Overall, to create the above sequence this function will be called 3 times:
1333 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1334 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1335 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1336
1337tree
1338vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1339{
1340 gimple vec_stmt_for_operand;
1341 stmt_vec_info def_stmt_info;
1342
1343 /* Do nothing; can reuse same def. */
8644a673 1344 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1345 return vec_oprnd;
1346
1347 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1348 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1349 gcc_assert (def_stmt_info);
1350 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1351 gcc_assert (vec_stmt_for_operand);
1352 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1353 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1354 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1355 else
1356 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1357 return vec_oprnd;
1358}
1359
1360
1361/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1362 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1363
1364static void
b8698a0f
L
1365vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1366 VEC(tree,heap) **vec_oprnds0,
ebfd146a
IR
1367 VEC(tree,heap) **vec_oprnds1)
1368{
1369 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1370
1371 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1372 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1373
1374 if (vec_oprnds1 && *vec_oprnds1)
1375 {
1376 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1377 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1378 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1379 }
1380}
1381
1382
ff802fa1
IR
1383/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1384 NULL. */
ebfd146a
IR
1385
1386static void
1387vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1388 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1389 slp_tree slp_node)
1390{
1391 if (slp_node)
9dc3f7de 1392 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
ebfd146a
IR
1393 else
1394 {
1395 tree vec_oprnd;
1396
b8698a0f
L
1397 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1398 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
ebfd146a
IR
1399 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1400
1401 if (op1)
1402 {
b8698a0f
L
1403 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1404 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
ebfd146a
IR
1405 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1406 }
1407 }
1408}
1409
1410
1411/* Function vect_finish_stmt_generation.
1412
1413 Insert a new stmt. */
1414
1415void
1416vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1417 gimple_stmt_iterator *gsi)
1418{
1419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1422
1423 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1424
1425 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1426
b8698a0f 1427 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1428 bb_vinfo));
ebfd146a
IR
1429
1430 if (vect_print_dump_info (REPORT_DETAILS))
1431 {
1432 fprintf (vect_dump, "add new stmt: ");
1433 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1434 }
1435
ad885386 1436 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1437}
1438
1439/* Checks if CALL can be vectorized in type VECTYPE. Returns
1440 a function declaration if the target has a vectorized version
1441 of the function, or NULL_TREE if the function cannot be vectorized. */
1442
1443tree
1444vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1445{
1446 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1447
1448 /* We only handle functions that do not read or clobber memory -- i.e.
1449 const or novops ones. */
1450 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1451 return NULL_TREE;
1452
1453 if (!fndecl
1454 || TREE_CODE (fndecl) != FUNCTION_DECL
1455 || !DECL_BUILT_IN (fndecl))
1456 return NULL_TREE;
1457
62f7fd21 1458 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1459 vectype_in);
1460}
1461
1462/* Function vectorizable_call.
1463
b8698a0f
L
1464 Check if STMT performs a function call that can be vectorized.
1465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1468
1469static bool
1470vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1471{
1472 tree vec_dest;
1473 tree scalar_dest;
1474 tree op, type;
1475 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1477 tree vectype_out, vectype_in;
1478 int nunits_in;
1479 int nunits_out;
1480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
b690cc0f 1481 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1482 gimple def_stmt;
0502fb85
UB
1483 enum vect_def_type dt[3]
1484 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1485 gimple new_stmt = NULL;
ebfd146a
IR
1486 int ncopies, j;
1487 VEC(tree, heap) *vargs = NULL;
1488 enum { NARROW, NONE, WIDEN } modifier;
1489 size_t i, nargs;
9d5e7640 1490 tree lhs;
ebfd146a 1491
a70d6342
IR
1492 /* FORNOW: unsupported in basic block SLP. */
1493 gcc_assert (loop_vinfo);
b8698a0f 1494
ebfd146a
IR
1495 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1496 return false;
1497
8644a673 1498 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1499 return false;
1500
1501 /* FORNOW: SLP not supported. */
1502 if (STMT_SLP_TYPE (stmt_info))
1503 return false;
1504
1505 /* Is STMT a vectorizable call? */
1506 if (!is_gimple_call (stmt))
1507 return false;
1508
1509 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1510 return false;
1511
822ba6d7 1512 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1513 return false;
1514
b690cc0f
RG
1515 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1516
ebfd146a
IR
1517 /* Process function arguments. */
1518 rhs_type = NULL_TREE;
b690cc0f 1519 vectype_in = NULL_TREE;
ebfd146a
IR
1520 nargs = gimple_call_num_args (stmt);
1521
1b1562a5
MM
1522 /* Bail out if the function has more than three arguments, we do not have
1523 interesting builtin functions to vectorize with more than two arguments
1524 except for fma. No arguments is also not good. */
1525 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1526 return false;
1527
1528 for (i = 0; i < nargs; i++)
1529 {
b690cc0f
RG
1530 tree opvectype;
1531
ebfd146a
IR
1532 op = gimple_call_arg (stmt, i);
1533
1534 /* We can only handle calls with arguments of the same type. */
1535 if (rhs_type
8533c9d8 1536 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a
IR
1537 {
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "argument types differ.");
1540 return false;
1541 }
b690cc0f
RG
1542 if (!rhs_type)
1543 rhs_type = TREE_TYPE (op);
ebfd146a 1544
b690cc0f
RG
1545 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1546 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a
IR
1547 {
1548 if (vect_print_dump_info (REPORT_DETAILS))
1549 fprintf (vect_dump, "use not simple.");
1550 return false;
1551 }
ebfd146a 1552
b690cc0f
RG
1553 if (!vectype_in)
1554 vectype_in = opvectype;
1555 else if (opvectype
1556 && opvectype != vectype_in)
1557 {
1558 if (vect_print_dump_info (REPORT_DETAILS))
1559 fprintf (vect_dump, "argument vector types differ.");
1560 return false;
1561 }
1562 }
1563 /* If all arguments are external or constant defs use a vector type with
1564 the same size as the output vector type. */
ebfd146a 1565 if (!vectype_in)
b690cc0f 1566 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1567 if (vec_stmt)
1568 gcc_assert (vectype_in);
1569 if (!vectype_in)
1570 {
1571 if (vect_print_dump_info (REPORT_DETAILS))
1572 {
1573 fprintf (vect_dump, "no vectype for scalar type ");
1574 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1575 }
1576
1577 return false;
1578 }
ebfd146a
IR
1579
1580 /* FORNOW */
b690cc0f
RG
1581 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1582 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1583 if (nunits_in == nunits_out / 2)
1584 modifier = NARROW;
1585 else if (nunits_out == nunits_in)
1586 modifier = NONE;
1587 else if (nunits_out == nunits_in / 2)
1588 modifier = WIDEN;
1589 else
1590 return false;
1591
1592 /* For now, we only vectorize functions if a target specific builtin
1593 is available. TODO -- in some cases, it might be profitable to
1594 insert the calls for pieces of the vector, in order to be able
1595 to vectorize other operations in the loop. */
1596 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1597 if (fndecl == NULL_TREE)
1598 {
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "function is not vectorizable.");
1601
1602 return false;
1603 }
1604
5006671f 1605 gcc_assert (!gimple_vuse (stmt));
ebfd146a
IR
1606
1607 if (modifier == NARROW)
1608 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1609 else
1610 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1611
1612 /* Sanity check: make sure that at least one copy of the vectorized stmt
1613 needs to be generated. */
1614 gcc_assert (ncopies >= 1);
1615
1616 if (!vec_stmt) /* transformation not required. */
1617 {
1618 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1619 if (vect_print_dump_info (REPORT_DETAILS))
1620 fprintf (vect_dump, "=== vectorizable_call ===");
1621 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1622 return true;
1623 }
1624
1625 /** Transform. **/
1626
1627 if (vect_print_dump_info (REPORT_DETAILS))
9d5e7640 1628 fprintf (vect_dump, "transform call.");
ebfd146a
IR
1629
1630 /* Handle def. */
1631 scalar_dest = gimple_call_lhs (stmt);
1632 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1633
1634 prev_stmt_info = NULL;
1635 switch (modifier)
1636 {
1637 case NONE:
1638 for (j = 0; j < ncopies; ++j)
1639 {
1640 /* Build argument list for the vectorized call. */
1641 if (j == 0)
1642 vargs = VEC_alloc (tree, heap, nargs);
1643 else
1644 VEC_truncate (tree, vargs, 0);
1645
1646 for (i = 0; i < nargs; i++)
1647 {
1648 op = gimple_call_arg (stmt, i);
1649 if (j == 0)
1650 vec_oprnd0
1651 = vect_get_vec_def_for_operand (op, stmt, NULL);
1652 else
63827fb8
IR
1653 {
1654 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1655 vec_oprnd0
1656 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1657 }
ebfd146a
IR
1658
1659 VEC_quick_push (tree, vargs, vec_oprnd0);
1660 }
1661
1662 new_stmt = gimple_build_call_vec (fndecl, vargs);
1663 new_temp = make_ssa_name (vec_dest, new_stmt);
1664 gimple_call_set_lhs (new_stmt, new_temp);
1665
1666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1667 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1668
1669 if (j == 0)
1670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1671 else
1672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1673
1674 prev_stmt_info = vinfo_for_stmt (new_stmt);
1675 }
1676
1677 break;
1678
1679 case NARROW:
1680 for (j = 0; j < ncopies; ++j)
1681 {
1682 /* Build argument list for the vectorized call. */
1683 if (j == 0)
1684 vargs = VEC_alloc (tree, heap, nargs * 2);
1685 else
1686 VEC_truncate (tree, vargs, 0);
1687
1688 for (i = 0; i < nargs; i++)
1689 {
1690 op = gimple_call_arg (stmt, i);
1691 if (j == 0)
1692 {
1693 vec_oprnd0
1694 = vect_get_vec_def_for_operand (op, stmt, NULL);
1695 vec_oprnd1
63827fb8 1696 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1697 }
1698 else
1699 {
336ecb65 1700 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 1701 vec_oprnd0
63827fb8 1702 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 1703 vec_oprnd1
63827fb8 1704 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1705 }
1706
1707 VEC_quick_push (tree, vargs, vec_oprnd0);
1708 VEC_quick_push (tree, vargs, vec_oprnd1);
1709 }
1710
1711 new_stmt = gimple_build_call_vec (fndecl, vargs);
1712 new_temp = make_ssa_name (vec_dest, new_stmt);
1713 gimple_call_set_lhs (new_stmt, new_temp);
1714
1715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1716 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1717
1718 if (j == 0)
1719 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1720 else
1721 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1722
1723 prev_stmt_info = vinfo_for_stmt (new_stmt);
1724 }
1725
1726 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1727
1728 break;
1729
1730 case WIDEN:
1731 /* No current target implements this case. */
1732 return false;
1733 }
1734
1735 VEC_free (tree, heap, vargs);
1736
1737 /* Update the exception handling table with the vector stmt if necessary. */
1738 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1739 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1740
1741 /* The call in STMT might prevent it from being removed in dce.
1742 We however cannot remove it here, due to the way the ssa name
1743 it defines is mapped to the new definition. So just replace
1744 rhs of the statement with something harmless. */
1745
1746 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
1747 if (is_pattern_stmt_p (stmt_info))
1748 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1749 else
1750 lhs = gimple_call_lhs (stmt);
1751 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a
IR
1752 set_vinfo_for_stmt (new_stmt, stmt_info);
1753 set_vinfo_for_stmt (stmt, NULL);
1754 STMT_VINFO_STMT (stmt_info) = new_stmt;
1755 gsi_replace (gsi, new_stmt, false);
1756 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1757
1758 return true;
1759}
1760
1761
1762/* Function vect_gen_widened_results_half
1763
1764 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 1765 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 1766 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
1767 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1768 needs to be created (DECL is a function-decl of a target-builtin).
1769 STMT is the original scalar stmt that we are vectorizing. */
1770
1771static gimple
1772vect_gen_widened_results_half (enum tree_code code,
1773 tree decl,
1774 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1775 tree vec_dest, gimple_stmt_iterator *gsi,
1776 gimple stmt)
b8698a0f 1777{
ebfd146a 1778 gimple new_stmt;
b8698a0f
L
1779 tree new_temp;
1780
1781 /* Generate half of the widened result: */
1782 if (code == CALL_EXPR)
1783 {
1784 /* Target specific support */
ebfd146a
IR
1785 if (op_type == binary_op)
1786 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1787 else
1788 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1789 new_temp = make_ssa_name (vec_dest, new_stmt);
1790 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
1791 }
1792 else
ebfd146a 1793 {
b8698a0f
L
1794 /* Generic support */
1795 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
1796 if (op_type != binary_op)
1797 vec_oprnd1 = NULL;
1798 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1799 vec_oprnd1);
1800 new_temp = make_ssa_name (vec_dest, new_stmt);
1801 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 1802 }
ebfd146a
IR
1803 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1804
ebfd146a
IR
1805 return new_stmt;
1806}
1807
1808
b8698a0f
L
1809/* Check if STMT performs a conversion operation, that can be vectorized.
1810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1813
1814static bool
1815vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1816 gimple *vec_stmt, slp_tree slp_node)
1817{
1818 tree vec_dest;
1819 tree scalar_dest;
1820 tree op0;
1821 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1822 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1824 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1825 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1826 tree new_temp;
1827 tree def;
1828 gimple def_stmt;
1829 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1830 gimple new_stmt = NULL;
1831 stmt_vec_info prev_stmt_info;
1832 int nunits_in;
1833 int nunits_out;
1834 tree vectype_out, vectype_in;
1835 int ncopies, j;
b690cc0f 1836 tree rhs_type;
ebfd146a
IR
1837 tree builtin_decl;
1838 enum { NARROW, NONE, WIDEN } modifier;
1839 int i;
1840 VEC(tree,heap) *vec_oprnds0 = NULL;
1841 tree vop0;
ebfd146a
IR
1842 VEC(tree,heap) *dummy = NULL;
1843 int dummy_int;
1844
1845 /* Is STMT a vectorizable conversion? */
1846
a70d6342
IR
1847 /* FORNOW: unsupported in basic block SLP. */
1848 gcc_assert (loop_vinfo);
b8698a0f 1849
ebfd146a
IR
1850 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1851 return false;
1852
8644a673 1853 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1854 return false;
1855
1856 if (!is_gimple_assign (stmt))
1857 return false;
1858
1859 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1860 return false;
1861
1862 code = gimple_assign_rhs_code (stmt);
1863 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1864 return false;
1865
1866 /* Check types of lhs and rhs. */
b690cc0f
RG
1867 scalar_dest = gimple_assign_lhs (stmt);
1868 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1869
ebfd146a
IR
1870 op0 = gimple_assign_rhs1 (stmt);
1871 rhs_type = TREE_TYPE (op0);
b690cc0f
RG
1872 /* Check the operands of the operation. */
1873 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1874 &def_stmt, &def, &dt[0], &vectype_in))
1875 {
1876 if (vect_print_dump_info (REPORT_DETAILS))
1877 fprintf (vect_dump, "use not simple.");
1878 return false;
1879 }
1880 /* If op0 is an external or constant defs use a vector type of
1881 the same size as the output vector type. */
ebfd146a 1882 if (!vectype_in)
b690cc0f 1883 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1884 if (vec_stmt)
1885 gcc_assert (vectype_in);
1886 if (!vectype_in)
1887 {
1888 if (vect_print_dump_info (REPORT_DETAILS))
1889 {
1890 fprintf (vect_dump, "no vectype for scalar type ");
1891 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1892 }
1893
1894 return false;
1895 }
ebfd146a
IR
1896
1897 /* FORNOW */
b690cc0f
RG
1898 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1899 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1900 if (nunits_in == nunits_out / 2)
1901 modifier = NARROW;
1902 else if (nunits_out == nunits_in)
1903 modifier = NONE;
1904 else if (nunits_out == nunits_in / 2)
1905 modifier = WIDEN;
1906 else
1907 return false;
1908
ebfd146a
IR
1909 if (modifier == NARROW)
1910 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1911 else
1912 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1913
ff802fa1
IR
1914 /* Multiple types in SLP are handled by creating the appropriate number of
1915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1916 case of SLP. */
437f4a00 1917 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 1918 ncopies = 1;
b8698a0f 1919
ebfd146a
IR
1920 /* Sanity check: make sure that at least one copy of the vectorized stmt
1921 needs to be generated. */
1922 gcc_assert (ncopies >= 1);
1923
ebfd146a
IR
1924 /* Supportable by target? */
1925 if ((modifier == NONE
88dd7150 1926 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
ebfd146a 1927 || (modifier == WIDEN
b690cc0f
RG
1928 && !supportable_widening_operation (code, stmt,
1929 vectype_out, vectype_in,
ebfd146a
IR
1930 &decl1, &decl2,
1931 &code1, &code2,
1932 &dummy_int, &dummy))
1933 || (modifier == NARROW
b690cc0f 1934 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
ebfd146a
IR
1935 &code1, &dummy_int, &dummy)))
1936 {
1937 if (vect_print_dump_info (REPORT_DETAILS))
1938 fprintf (vect_dump, "conversion not supported by target.");
1939 return false;
1940 }
1941
1942 if (modifier != NONE)
1943 {
ebfd146a
IR
1944 /* FORNOW: SLP not supported. */
1945 if (STMT_SLP_TYPE (stmt_info))
b8698a0f 1946 return false;
ebfd146a
IR
1947 }
1948
1949 if (!vec_stmt) /* transformation not required. */
1950 {
1951 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1952 return true;
1953 }
1954
1955 /** Transform. **/
1956 if (vect_print_dump_info (REPORT_DETAILS))
1957 fprintf (vect_dump, "transform conversion.");
1958
1959 /* Handle def. */
1960 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1961
1962 if (modifier == NONE && !slp_node)
1963 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1964
1965 prev_stmt_info = NULL;
1966 switch (modifier)
1967 {
1968 case NONE:
1969 for (j = 0; j < ncopies; j++)
1970 {
ebfd146a 1971 if (j == 0)
b8698a0f 1972 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
1973 else
1974 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1975
1976 builtin_decl =
88dd7150
RG
1977 targetm.vectorize.builtin_conversion (code,
1978 vectype_out, vectype_in);
ac47786e 1979 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
b8698a0f 1980 {
ebfd146a
IR
1981 /* Arguments are ready. create the new vector stmt. */
1982 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1983 new_temp = make_ssa_name (vec_dest, new_stmt);
1984 gimple_call_set_lhs (new_stmt, new_temp);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1986 if (slp_node)
1987 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1988 }
1989
1990 if (j == 0)
1991 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1992 else
1993 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1994 prev_stmt_info = vinfo_for_stmt (new_stmt);
1995 }
1996 break;
1997
1998 case WIDEN:
1999 /* In case the vectorization factor (VF) is bigger than the number
2000 of elements that we can fit in a vectype (nunits), we have to
2001 generate more than one vector stmt - i.e - we need to "unroll"
2002 the vector stmt by a factor VF/nunits. */
2003 for (j = 0; j < ncopies; j++)
2004 {
2005 if (j == 0)
2006 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2007 else
2008 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2009
ebfd146a
IR
2010 /* Generate first half of the widened result: */
2011 new_stmt
b8698a0f 2012 = vect_gen_widened_results_half (code1, decl1,
ebfd146a
IR
2013 vec_oprnd0, vec_oprnd1,
2014 unary_op, vec_dest, gsi, stmt);
2015 if (j == 0)
2016 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2017 else
2018 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2019 prev_stmt_info = vinfo_for_stmt (new_stmt);
2020
2021 /* Generate second half of the widened result: */
2022 new_stmt
2023 = vect_gen_widened_results_half (code2, decl2,
2024 vec_oprnd0, vec_oprnd1,
2025 unary_op, vec_dest, gsi, stmt);
2026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2027 prev_stmt_info = vinfo_for_stmt (new_stmt);
2028 }
2029 break;
2030
2031 case NARROW:
2032 /* In case the vectorization factor (VF) is bigger than the number
2033 of elements that we can fit in a vectype (nunits), we have to
2034 generate more than one vector stmt - i.e - we need to "unroll"
2035 the vector stmt by a factor VF/nunits. */
2036 for (j = 0; j < ncopies; j++)
2037 {
2038 /* Handle uses. */
2039 if (j == 0)
2040 {
2041 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2042 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2043 }
2044 else
2045 {
2046 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
2047 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2048 }
2049
9dc3f7de 2050 /* Arguments are ready. Create the new vector stmt. */
ebfd146a
IR
2051 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
2052 vec_oprnd1);
2053 new_temp = make_ssa_name (vec_dest, new_stmt);
2054 gimple_assign_set_lhs (new_stmt, new_temp);
2055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2056
2057 if (j == 0)
2058 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2059 else
2060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2061
2062 prev_stmt_info = vinfo_for_stmt (new_stmt);
2063 }
2064
2065 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2066 }
2067
2068 if (vec_oprnds0)
b8698a0f 2069 VEC_free (tree, heap, vec_oprnds0);
ebfd146a
IR
2070
2071 return true;
2072}
ff802fa1
IR
2073
2074
ebfd146a
IR
2075/* Function vectorizable_assignment.
2076
b8698a0f
L
2077 Check if STMT performs an assignment (copy) that can be vectorized.
2078 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2079 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2080 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2081
2082static bool
2083vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2084 gimple *vec_stmt, slp_tree slp_node)
2085{
2086 tree vec_dest;
2087 tree scalar_dest;
2088 tree op;
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2091 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2092 tree new_temp;
2093 tree def;
2094 gimple def_stmt;
2095 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2096 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2097 int ncopies;
f18b55bd 2098 int i, j;
ebfd146a
IR
2099 VEC(tree,heap) *vec_oprnds = NULL;
2100 tree vop;
a70d6342 2101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2102 gimple new_stmt = NULL;
2103 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2104 enum tree_code code;
2105 tree vectype_in;
ebfd146a
IR
2106
2107 /* Multiple types in SLP are handled by creating the appropriate number of
2108 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2109 case of SLP. */
437f4a00 2110 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2111 ncopies = 1;
2112 else
2113 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2114
2115 gcc_assert (ncopies >= 1);
ebfd146a 2116
a70d6342 2117 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2118 return false;
2119
8644a673 2120 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2121 return false;
2122
2123 /* Is vectorizable assignment? */
2124 if (!is_gimple_assign (stmt))
2125 return false;
2126
2127 scalar_dest = gimple_assign_lhs (stmt);
2128 if (TREE_CODE (scalar_dest) != SSA_NAME)
2129 return false;
2130
fde9c428 2131 code = gimple_assign_rhs_code (stmt);
ebfd146a 2132 if (gimple_assign_single_p (stmt)
fde9c428
RG
2133 || code == PAREN_EXPR
2134 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2135 op = gimple_assign_rhs1 (stmt);
2136 else
2137 return false;
2138
7b7ec6c5
RG
2139 if (code == VIEW_CONVERT_EXPR)
2140 op = TREE_OPERAND (op, 0);
2141
fde9c428
RG
2142 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2143 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a
IR
2144 {
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "use not simple.");
2147 return false;
2148 }
2149
fde9c428
RG
2150 /* We can handle NOP_EXPR conversions that do not change the number
2151 of elements or the vector size. */
7b7ec6c5
RG
2152 if ((CONVERT_EXPR_CODE_P (code)
2153 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2154 && (!vectype_in
2155 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2156 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2157 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2158 return false;
2159
ebfd146a
IR
2160 if (!vec_stmt) /* transformation not required. */
2161 {
2162 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "=== vectorizable_assignment ===");
2165 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2166 return true;
2167 }
2168
2169 /** Transform. **/
2170 if (vect_print_dump_info (REPORT_DETAILS))
2171 fprintf (vect_dump, "transform assignment.");
2172
2173 /* Handle def. */
2174 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2175
2176 /* Handle use. */
f18b55bd 2177 for (j = 0; j < ncopies; j++)
ebfd146a 2178 {
f18b55bd
IR
2179 /* Handle uses. */
2180 if (j == 0)
2181 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2182 else
2183 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2184
2185 /* Arguments are ready. create the new vector stmt. */
ac47786e 2186 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
f18b55bd 2187 {
7b7ec6c5
RG
2188 if (CONVERT_EXPR_CODE_P (code)
2189 || code == VIEW_CONVERT_EXPR)
4a73490d 2190 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2191 new_stmt = gimple_build_assign (vec_dest, vop);
2192 new_temp = make_ssa_name (vec_dest, new_stmt);
2193 gimple_assign_set_lhs (new_stmt, new_temp);
2194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2195 if (slp_node)
2196 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2197 }
ebfd146a
IR
2198
2199 if (slp_node)
f18b55bd
IR
2200 continue;
2201
2202 if (j == 0)
2203 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2204 else
2205 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2206
2207 prev_stmt_info = vinfo_for_stmt (new_stmt);
2208 }
b8698a0f
L
2209
2210 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
2211 return true;
2212}
2213
9dc3f7de 2214
1107f3ae
IR
2215/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2216 either as shift by a scalar or by a vector. */
2217
2218bool
2219vect_supportable_shift (enum tree_code code, tree scalar_type)
2220{
2221
2222 enum machine_mode vec_mode;
2223 optab optab;
2224 int icode;
2225 tree vectype;
2226
2227 vectype = get_vectype_for_scalar_type (scalar_type);
2228 if (!vectype)
2229 return false;
2230
2231 optab = optab_for_tree_code (code, vectype, optab_scalar);
2232 if (!optab
2233 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2234 {
2235 optab = optab_for_tree_code (code, vectype, optab_vector);
2236 if (!optab
2237 || (optab_handler (optab, TYPE_MODE (vectype))
2238 == CODE_FOR_nothing))
2239 return false;
2240 }
2241
2242 vec_mode = TYPE_MODE (vectype);
2243 icode = (int) optab_handler (optab, vec_mode);
2244 if (icode == CODE_FOR_nothing)
2245 return false;
2246
2247 return true;
2248}
2249
2250
9dc3f7de
IR
2251/* Function vectorizable_shift.
2252
2253 Check if STMT performs a shift operation that can be vectorized.
2254 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2255 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2257
2258static bool
2259vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2260 gimple *vec_stmt, slp_tree slp_node)
2261{
2262 tree vec_dest;
2263 tree scalar_dest;
2264 tree op0, op1 = NULL;
2265 tree vec_oprnd1 = NULL_TREE;
2266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2267 tree vectype;
2268 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2269 enum tree_code code;
2270 enum machine_mode vec_mode;
2271 tree new_temp;
2272 optab optab;
2273 int icode;
2274 enum machine_mode optab_op2_mode;
2275 tree def;
2276 gimple def_stmt;
2277 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2278 gimple new_stmt = NULL;
2279 stmt_vec_info prev_stmt_info;
2280 int nunits_in;
2281 int nunits_out;
2282 tree vectype_out;
2283 int ncopies;
2284 int j, i;
2285 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2286 tree vop0, vop1;
2287 unsigned int k;
49eab32e 2288 bool scalar_shift_arg = true;
9dc3f7de
IR
2289 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2290 int vf;
2291
2292 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2293 return false;
2294
2295 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2296 return false;
2297
2298 /* Is STMT a vectorizable binary/unary operation? */
2299 if (!is_gimple_assign (stmt))
2300 return false;
2301
2302 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2303 return false;
2304
2305 code = gimple_assign_rhs_code (stmt);
2306
2307 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2308 || code == RROTATE_EXPR))
2309 return false;
2310
2311 scalar_dest = gimple_assign_lhs (stmt);
2312 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2313
2314 op0 = gimple_assign_rhs1 (stmt);
2315 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2316 &def_stmt, &def, &dt[0], &vectype))
2317 {
2318 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump, "use not simple.");
2320 return false;
2321 }
2322 /* If op0 is an external or constant def use a vector type with
2323 the same size as the output vector type. */
2324 if (!vectype)
2325 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2326 if (vec_stmt)
2327 gcc_assert (vectype);
2328 if (!vectype)
2329 {
2330 if (vect_print_dump_info (REPORT_DETAILS))
2331 {
2332 fprintf (vect_dump, "no vectype for scalar type ");
2333 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2334 }
2335
2336 return false;
2337 }
2338
2339 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2340 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2341 if (nunits_out != nunits_in)
2342 return false;
2343
2344 op1 = gimple_assign_rhs2 (stmt);
2345 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2346 {
2347 if (vect_print_dump_info (REPORT_DETAILS))
2348 fprintf (vect_dump, "use not simple.");
2349 return false;
2350 }
2351
2352 if (loop_vinfo)
2353 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2354 else
2355 vf = 1;
2356
2357 /* Multiple types in SLP are handled by creating the appropriate number of
2358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2359 case of SLP. */
437f4a00 2360 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
2361 ncopies = 1;
2362 else
2363 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2364
2365 gcc_assert (ncopies >= 1);
2366
2367 /* Determine whether the shift amount is a vector, or scalar. If the
2368 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2369
49eab32e
JJ
2370 if (dt[1] == vect_internal_def && !slp_node)
2371 scalar_shift_arg = false;
2372 else if (dt[1] == vect_constant_def
2373 || dt[1] == vect_external_def
2374 || dt[1] == vect_internal_def)
2375 {
2376 /* In SLP, need to check whether the shift count is the same,
2377 in loops if it is a constant or invariant, it is always
2378 a scalar shift. */
2379 if (slp_node)
2380 {
2381 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2382 gimple slpstmt;
2383
2384 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
2385 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
2386 scalar_shift_arg = false;
2387 }
2388 }
2389 else
2390 {
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "operand mode requires invariant argument.");
2393 return false;
2394 }
2395
9dc3f7de 2396 /* Vector shifted by vector. */
49eab32e 2397 if (!scalar_shift_arg)
9dc3f7de
IR
2398 {
2399 optab = optab_for_tree_code (code, vectype, optab_vector);
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "vector/vector shift/rotate found.");
2402 }
2403 /* See if the machine has a vector shifted by scalar insn and if not
2404 then see if it has a vector shifted by vector insn. */
49eab32e 2405 else
9dc3f7de
IR
2406 {
2407 optab = optab_for_tree_code (code, vectype, optab_scalar);
2408 if (optab
2409 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2410 {
9dc3f7de
IR
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2413 }
2414 else
2415 {
2416 optab = optab_for_tree_code (code, vectype, optab_vector);
2417 if (optab
2418 && (optab_handler (optab, TYPE_MODE (vectype))
2419 != CODE_FOR_nothing))
2420 {
49eab32e
JJ
2421 scalar_shift_arg = false;
2422
9dc3f7de
IR
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "vector/vector shift/rotate found.");
2425
2426 /* Unlike the other binary operators, shifts/rotates have
2427 the rhs being int, instead of the same type as the lhs,
2428 so make sure the scalar is the right type if we are
2429 dealing with vectors of short/char. */
2430 if (dt[1] == vect_constant_def)
2431 op1 = fold_convert (TREE_TYPE (vectype), op1);
2432 }
2433 }
2434 }
9dc3f7de
IR
2435
2436 /* Supportable by target? */
2437 if (!optab)
2438 {
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "no optab.");
2441 return false;
2442 }
2443 vec_mode = TYPE_MODE (vectype);
2444 icode = (int) optab_handler (optab, vec_mode);
2445 if (icode == CODE_FOR_nothing)
2446 {
2447 if (vect_print_dump_info (REPORT_DETAILS))
2448 fprintf (vect_dump, "op not supported by target.");
2449 /* Check only during analysis. */
2450 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2451 || (vf < vect_min_worthwhile_factor (code)
2452 && !vec_stmt))
2453 return false;
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "proceeding using word mode.");
2456 }
2457
2458 /* Worthwhile without SIMD support? Check only during analysis. */
2459 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2460 && vf < vect_min_worthwhile_factor (code)
2461 && !vec_stmt)
2462 {
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "not worthwhile without SIMD support.");
2465 return false;
2466 }
2467
2468 if (!vec_stmt) /* transformation not required. */
2469 {
2470 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2471 if (vect_print_dump_info (REPORT_DETAILS))
2472 fprintf (vect_dump, "=== vectorizable_shift ===");
2473 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2474 return true;
2475 }
2476
2477 /** Transform. **/
2478
2479 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "transform binary/unary operation.");
2481
2482 /* Handle def. */
2483 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2484
2485 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2486 created in the previous stages of the recursion, so no allocation is
2487 needed, except for the case of shift with scalar shift argument. In that
2488 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2489 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2490 In case of loop-based vectorization we allocate VECs of size 1. We
2491 allocate VEC_OPRNDS1 only in case of binary operation. */
2492 if (!slp_node)
2493 {
2494 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2495 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2496 }
2497 else if (scalar_shift_arg)
2498 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2499
2500 prev_stmt_info = NULL;
2501 for (j = 0; j < ncopies; j++)
2502 {
2503 /* Handle uses. */
2504 if (j == 0)
2505 {
2506 if (scalar_shift_arg)
2507 {
2508 /* Vector shl and shr insn patterns can be defined with scalar
2509 operand 2 (shift operand). In this case, use constant or loop
2510 invariant op1 directly, without extending it to vector mode
2511 first. */
2512 optab_op2_mode = insn_data[icode].operand[2].mode;
2513 if (!VECTOR_MODE_P (optab_op2_mode))
2514 {
2515 if (vect_print_dump_info (REPORT_DETAILS))
2516 fprintf (vect_dump, "operand 1 using scalar mode.");
2517 vec_oprnd1 = op1;
2518 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2519 if (slp_node)
2520 {
2521 /* Store vec_oprnd1 for every vector stmt to be created
2522 for SLP_NODE. We check during the analysis that all
2523 the shift arguments are the same.
2524 TODO: Allow different constants for different vector
2525 stmts generated for an SLP instance. */
2526 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2527 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2528 }
2529 }
2530 }
2531
2532 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2533 (a special case for certain kind of vector shifts); otherwise,
2534 operand 1 should be of a vector type (the usual case). */
2535 if (vec_oprnd1)
2536 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2537 slp_node);
2538 else
2539 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2540 slp_node);
2541 }
2542 else
2543 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2544
2545 /* Arguments are ready. Create the new vector stmt. */
2546 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2547 {
2548 vop1 = VEC_index (tree, vec_oprnds1, i);
2549 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2550 new_temp = make_ssa_name (vec_dest, new_stmt);
2551 gimple_assign_set_lhs (new_stmt, new_temp);
2552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2553 if (slp_node)
2554 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2555 }
2556
2557 if (slp_node)
2558 continue;
2559
2560 if (j == 0)
2561 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2562 else
2563 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2564 prev_stmt_info = vinfo_for_stmt (new_stmt);
2565 }
2566
2567 VEC_free (tree, heap, vec_oprnds0);
2568 VEC_free (tree, heap, vec_oprnds1);
2569
2570 return true;
2571}
2572
2573
ebfd146a
IR
2574/* Function vectorizable_operation.
2575
16949072
RG
2576 Check if STMT performs a binary, unary or ternary operation that can
2577 be vectorized.
b8698a0f 2578 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2579 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2580 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2581
2582static bool
2583vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2584 gimple *vec_stmt, slp_tree slp_node)
2585{
2586 tree vec_dest;
2587 tree scalar_dest;
16949072 2588 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 2589 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b690cc0f 2590 tree vectype;
ebfd146a
IR
2591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2592 enum tree_code code;
2593 enum machine_mode vec_mode;
2594 tree new_temp;
2595 int op_type;
2596 optab optab;
2597 int icode;
ebfd146a
IR
2598 tree def;
2599 gimple def_stmt;
16949072
RG
2600 enum vect_def_type dt[3]
2601 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
2602 gimple new_stmt = NULL;
2603 stmt_vec_info prev_stmt_info;
b690cc0f 2604 int nunits_in;
ebfd146a
IR
2605 int nunits_out;
2606 tree vectype_out;
2607 int ncopies;
2608 int j, i;
16949072
RG
2609 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2610 tree vop0, vop1, vop2;
a70d6342
IR
2611 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2612 int vf;
2613
a70d6342 2614 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2615 return false;
2616
8644a673 2617 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2618 return false;
2619
2620 /* Is STMT a vectorizable binary/unary operation? */
2621 if (!is_gimple_assign (stmt))
2622 return false;
2623
2624 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2625 return false;
2626
ebfd146a
IR
2627 code = gimple_assign_rhs_code (stmt);
2628
2629 /* For pointer addition, we should use the normal plus for
2630 the vector addition. */
2631 if (code == POINTER_PLUS_EXPR)
2632 code = PLUS_EXPR;
2633
2634 /* Support only unary or binary operations. */
2635 op_type = TREE_CODE_LENGTH (code);
16949072 2636 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a
IR
2637 {
2638 if (vect_print_dump_info (REPORT_DETAILS))
16949072
RG
2639 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2640 op_type);
ebfd146a
IR
2641 return false;
2642 }
2643
b690cc0f
RG
2644 scalar_dest = gimple_assign_lhs (stmt);
2645 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2646
ebfd146a 2647 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2648 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2649 &def_stmt, &def, &dt[0], &vectype))
ebfd146a
IR
2650 {
2651 if (vect_print_dump_info (REPORT_DETAILS))
2652 fprintf (vect_dump, "use not simple.");
2653 return false;
2654 }
b690cc0f
RG
2655 /* If op0 is an external or constant def use a vector type with
2656 the same size as the output vector type. */
2657 if (!vectype)
2658 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2659 if (vec_stmt)
2660 gcc_assert (vectype);
2661 if (!vectype)
2662 {
2663 if (vect_print_dump_info (REPORT_DETAILS))
2664 {
2665 fprintf (vect_dump, "no vectype for scalar type ");
2666 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2667 }
2668
2669 return false;
2670 }
b690cc0f
RG
2671
2672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2673 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2674 if (nunits_out != nunits_in)
2675 return false;
ebfd146a 2676
16949072 2677 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
2678 {
2679 op1 = gimple_assign_rhs2 (stmt);
b8698a0f 2680 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 2681 &dt[1]))
ebfd146a
IR
2682 {
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "use not simple.");
2685 return false;
2686 }
2687 }
16949072
RG
2688 if (op_type == ternary_op)
2689 {
2690 op2 = gimple_assign_rhs3 (stmt);
2691 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2692 &dt[2]))
2693 {
2694 if (vect_print_dump_info (REPORT_DETAILS))
2695 fprintf (vect_dump, "use not simple.");
2696 return false;
2697 }
2698 }
ebfd146a 2699
b690cc0f
RG
2700 if (loop_vinfo)
2701 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2702 else
2703 vf = 1;
2704
2705 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 2706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 2707 case of SLP. */
437f4a00 2708 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
2709 ncopies = 1;
2710 else
2711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2712
2713 gcc_assert (ncopies >= 1);
2714
9dc3f7de 2715 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
2716 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2717 || code == RROTATE_EXPR)
9dc3f7de 2718 return false;
ebfd146a 2719
16949072 2720 optab = optab_for_tree_code (code, vectype, optab_default);
ebfd146a
IR
2721
2722 /* Supportable by target? */
2723 if (!optab)
2724 {
2725 if (vect_print_dump_info (REPORT_DETAILS))
2726 fprintf (vect_dump, "no optab.");
2727 return false;
2728 }
2729 vec_mode = TYPE_MODE (vectype);
947131ba 2730 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
2731 if (icode == CODE_FOR_nothing)
2732 {
2733 if (vect_print_dump_info (REPORT_DETAILS))
2734 fprintf (vect_dump, "op not supported by target.");
2735 /* Check only during analysis. */
2736 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
a70d6342 2737 || (vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2738 && !vec_stmt))
2739 return false;
2740 if (vect_print_dump_info (REPORT_DETAILS))
2741 fprintf (vect_dump, "proceeding using word mode.");
2742 }
2743
ff802fa1 2744 /* Worthwhile without SIMD support? Check only during analysis. */
ebfd146a 2745 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
a70d6342 2746 && vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2747 && !vec_stmt)
2748 {
2749 if (vect_print_dump_info (REPORT_DETAILS))
2750 fprintf (vect_dump, "not worthwhile without SIMD support.");
2751 return false;
2752 }
2753
2754 if (!vec_stmt) /* transformation not required. */
2755 {
2756 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2757 if (vect_print_dump_info (REPORT_DETAILS))
2758 fprintf (vect_dump, "=== vectorizable_operation ===");
2759 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2760 return true;
2761 }
2762
2763 /** Transform. **/
2764
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "transform binary/unary operation.");
2767
2768 /* Handle def. */
2769 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2770
ff802fa1 2771 /* Allocate VECs for vector operands. In case of SLP, vector operands are
ebfd146a 2772 created in the previous stages of the recursion, so no allocation is
ff802fa1 2773 needed, except for the case of shift with scalar shift argument. In that
ebfd146a
IR
2774 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2775 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
ff802fa1 2776 In case of loop-based vectorization we allocate VECs of size 1. We
b8698a0f 2777 allocate VEC_OPRNDS1 only in case of binary operation. */
ebfd146a
IR
2778 if (!slp_node)
2779 {
2780 vec_oprnds0 = VEC_alloc (tree, heap, 1);
16949072 2781 if (op_type == binary_op || op_type == ternary_op)
ebfd146a 2782 vec_oprnds1 = VEC_alloc (tree, heap, 1);
16949072
RG
2783 if (op_type == ternary_op)
2784 vec_oprnds2 = VEC_alloc (tree, heap, 1);
ebfd146a 2785 }
ebfd146a
IR
2786
2787 /* In case the vectorization factor (VF) is bigger than the number
2788 of elements that we can fit in a vectype (nunits), we have to generate
2789 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 2790 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 2791 from one copy of the vector stmt to the next, in the field
ff802fa1 2792 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 2793 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
2794 stmts that use the defs of the current stmt. The example below
2795 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2796 we need to create 4 vectorized stmts):
ebfd146a
IR
2797
2798 before vectorization:
2799 RELATED_STMT VEC_STMT
2800 S1: x = memref - -
2801 S2: z = x + 1 - -
2802
2803 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2804 there):
2805 RELATED_STMT VEC_STMT
2806 VS1_0: vx0 = memref0 VS1_1 -
2807 VS1_1: vx1 = memref1 VS1_2 -
2808 VS1_2: vx2 = memref2 VS1_3 -
2809 VS1_3: vx3 = memref3 - -
2810 S1: x = load - VS1_0
2811 S2: z = x + 1 - -
2812
2813 step2: vectorize stmt S2 (done here):
2814 To vectorize stmt S2 we first need to find the relevant vector
ff802fa1 2815 def for the first operand 'x'. This is, as usual, obtained from
ebfd146a 2816 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
ff802fa1
IR
2817 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2818 relevant vector def 'vx0'. Having found 'vx0' we can generate
ebfd146a
IR
2819 the vector stmt VS2_0, and as usual, record it in the
2820 STMT_VINFO_VEC_STMT of stmt S2.
2821 When creating the second copy (VS2_1), we obtain the relevant vector
2822 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
ff802fa1
IR
2823 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2824 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
ebfd146a 2825 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
ff802fa1 2826 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
ebfd146a
IR
2827 chain of stmts and pointers:
2828 RELATED_STMT VEC_STMT
2829 VS1_0: vx0 = memref0 VS1_1 -
2830 VS1_1: vx1 = memref1 VS1_2 -
2831 VS1_2: vx2 = memref2 VS1_3 -
2832 VS1_3: vx3 = memref3 - -
2833 S1: x = load - VS1_0
2834 VS2_0: vz0 = vx0 + v1 VS2_1 -
2835 VS2_1: vz1 = vx1 + v1 VS2_2 -
2836 VS2_2: vz2 = vx2 + v1 VS2_3 -
2837 VS2_3: vz3 = vx3 + v1 - -
2838 S2: z = x + 1 - VS2_0 */
2839
2840 prev_stmt_info = NULL;
2841 for (j = 0; j < ncopies; j++)
2842 {
2843 /* Handle uses. */
2844 if (j == 0)
2845 {
16949072 2846 if (op_type == binary_op || op_type == ternary_op)
b8698a0f 2847 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
ebfd146a
IR
2848 slp_node);
2849 else
b8698a0f 2850 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
ebfd146a 2851 slp_node);
16949072
RG
2852 if (op_type == ternary_op)
2853 {
2854 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2855 VEC_quick_push (tree, vec_oprnds2,
2856 vect_get_vec_def_for_operand (op2, stmt, NULL));
2857 }
ebfd146a
IR
2858 }
2859 else
16949072
RG
2860 {
2861 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2862 if (op_type == ternary_op)
2863 {
2864 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2865 VEC_quick_push (tree, vec_oprnds2,
2866 vect_get_vec_def_for_stmt_copy (dt[2],
2867 vec_oprnd));
2868 }
2869 }
ebfd146a 2870
9dc3f7de 2871 /* Arguments are ready. Create the new vector stmt. */
ac47786e 2872 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
ebfd146a 2873 {
16949072
RG
2874 vop1 = ((op_type == binary_op || op_type == ternary_op)
2875 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2876 vop2 = ((op_type == ternary_op)
2877 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2878 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2879 vop0, vop1, vop2);
ebfd146a
IR
2880 new_temp = make_ssa_name (vec_dest, new_stmt);
2881 gimple_assign_set_lhs (new_stmt, new_temp);
2882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2883 if (slp_node)
2884 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2885 }
2886
2887 if (slp_node)
2888 continue;
2889
2890 if (j == 0)
2891 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2892 else
2893 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2894 prev_stmt_info = vinfo_for_stmt (new_stmt);
2895 }
2896
2897 VEC_free (tree, heap, vec_oprnds0);
2898 if (vec_oprnds1)
2899 VEC_free (tree, heap, vec_oprnds1);
16949072
RG
2900 if (vec_oprnds2)
2901 VEC_free (tree, heap, vec_oprnds2);
ebfd146a
IR
2902
2903 return true;
2904}
2905
2906
ff802fa1 2907/* Get vectorized definitions for loop-based vectorization. For the first
b8698a0f
L
2908 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2909 scalar operand), and for the rest we get a copy with
ebfd146a
IR
2910 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2911 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2912 The vectors are collected into VEC_OPRNDS. */
2913
2914static void
b8698a0f 2915vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
ebfd146a
IR
2916 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2917{
2918 tree vec_oprnd;
2919
2920 /* Get first vector operand. */
2921 /* All the vector operands except the very first one (that is scalar oprnd)
2922 are stmt copies. */
b8698a0f 2923 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
ebfd146a
IR
2924 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2925 else
2926 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2927
2928 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2929
2930 /* Get second vector operand. */
2931 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2932 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
b8698a0f 2933
ebfd146a
IR
2934 *oprnd = vec_oprnd;
2935
b8698a0f 2936 /* For conversion in multiple steps, continue to get operands
ebfd146a
IR
2937 recursively. */
2938 if (multi_step_cvt)
b8698a0f 2939 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
ebfd146a
IR
2940}
2941
2942
2943/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
b8698a0f 2944 For multi-step conversions store the resulting vectors and call the function
ebfd146a
IR
2945 recursively. */
2946
2947static void
2948vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2949 int multi_step_cvt, gimple stmt,
2950 VEC (tree, heap) *vec_dsts,
2951 gimple_stmt_iterator *gsi,
2952 slp_tree slp_node, enum tree_code code,
2953 stmt_vec_info *prev_stmt_info)
2954{
2955 unsigned int i;
2956 tree vop0, vop1, new_tmp, vec_dest;
2957 gimple new_stmt;
2958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2959
b8698a0f 2960 vec_dest = VEC_pop (tree, vec_dsts);
ebfd146a
IR
2961
2962 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2963 {
2964 /* Create demotion operation. */
2965 vop0 = VEC_index (tree, *vec_oprnds, i);
2966 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2967 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2968 new_tmp = make_ssa_name (vec_dest, new_stmt);
2969 gimple_assign_set_lhs (new_stmt, new_tmp);
2970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2971
2972 if (multi_step_cvt)
2973 /* Store the resulting vector for next recursive call. */
b8698a0f 2974 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
ebfd146a
IR
2975 else
2976 {
b8698a0f 2977 /* This is the last step of the conversion sequence. Store the
ebfd146a
IR
2978 vectors in SLP_NODE or in vector info of the scalar statement
2979 (or in STMT_VINFO_RELATED_STMT chain). */
2980 if (slp_node)
2981 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2982 else
2983 {
2984 if (!*prev_stmt_info)
2985 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2986 else
2987 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2988
2989 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2990 }
2991 }
2992 }
2993
2994 /* For multi-step demotion operations we first generate demotion operations
b8698a0f 2995 from the source type to the intermediate types, and then combine the
ebfd146a
IR
2996 results (stored in VEC_OPRNDS) in demotion operation to the destination
2997 type. */
2998 if (multi_step_cvt)
2999 {
3000 /* At each level of recursion we have have of the operands we had at the
3001 previous level. */
3002 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
b8698a0f 3003 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
ebfd146a
IR
3004 stmt, vec_dsts, gsi, slp_node,
3005 code, prev_stmt_info);
3006 }
3007}
3008
3009
3010/* Function vectorizable_type_demotion
3011
3012 Check if STMT performs a binary or unary operation that involves
3013 type demotion, and if it can be vectorized.
3014 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3015 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3016 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3017
3018static bool
3019vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
3020 gimple *vec_stmt, slp_tree slp_node)
3021{
3022 tree vec_dest;
3023 tree scalar_dest;
3024 tree op0;
3025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3026 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3027 enum tree_code code, code1 = ERROR_MARK;
3028 tree def;
3029 gimple def_stmt;
3030 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3031 stmt_vec_info prev_stmt_info;
3032 int nunits_in;
3033 int nunits_out;
3034 tree vectype_out;
3035 int ncopies;
3036 int j, i;
3037 tree vectype_in;
3038 int multi_step_cvt = 0;
3039 VEC (tree, heap) *vec_oprnds0 = NULL;
3040 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
3041 tree last_oprnd, intermediate_type;
3042
a70d6342
IR
3043 /* FORNOW: not supported by basic block SLP vectorization. */
3044 gcc_assert (loop_vinfo);
3045
ebfd146a
IR
3046 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3047 return false;
3048
8644a673 3049 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3050 return false;
3051
3052 /* Is STMT a vectorizable type-demotion operation? */
3053 if (!is_gimple_assign (stmt))
3054 return false;
3055
3056 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3057 return false;
3058
3059 code = gimple_assign_rhs_code (stmt);
3060 if (!CONVERT_EXPR_CODE_P (code))
3061 return false;
3062
b690cc0f
RG
3063 scalar_dest = gimple_assign_lhs (stmt);
3064 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3065
3066 /* Check the operands of the operation. */
ebfd146a 3067 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
3068 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3069 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3070 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3071 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3072 && CONVERT_EXPR_CODE_P (code))))
3073 return false;
3074 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3075 &def_stmt, &def, &dt[0], &vectype_in))
3076 {
3077 if (vect_print_dump_info (REPORT_DETAILS))
3078 fprintf (vect_dump, "use not simple.");
3079 return false;
3080 }
3081 /* If op0 is an external def use a vector type with the
3082 same size as the output vector type if possible. */
3083 if (!vectype_in)
3084 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3085 if (vec_stmt)
3086 gcc_assert (vectype_in);
ebfd146a 3087 if (!vectype_in)
7d8930a0
IR
3088 {
3089 if (vect_print_dump_info (REPORT_DETAILS))
3090 {
3091 fprintf (vect_dump, "no vectype for scalar type ");
3092 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3093 }
3094
3095 return false;
3096 }
ebfd146a 3097
b690cc0f 3098 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
3099 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3100 if (nunits_in >= nunits_out)
3101 return false;
3102
3103 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3104 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 3105 case of SLP. */
437f4a00 3106 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3107 ncopies = 1;
3108 else
3109 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
ebfd146a
IR
3110 gcc_assert (ncopies >= 1);
3111
ebfd146a 3112 /* Supportable by target? */
b690cc0f
RG
3113 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
3114 &code1, &multi_step_cvt, &interm_types))
ebfd146a
IR
3115 return false;
3116
ebfd146a
IR
3117 if (!vec_stmt) /* transformation not required. */
3118 {
3119 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3120 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "=== vectorizable_demotion ===");
3122 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3123 return true;
3124 }
3125
3126 /** Transform. **/
3127 if (vect_print_dump_info (REPORT_DETAILS))
3128 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3129 ncopies);
3130
b8698a0f
L
3131 /* In case of multi-step demotion, we first generate demotion operations to
3132 the intermediate types, and then from that types to the final one.
ebfd146a 3133 We create vector destinations for the intermediate type (TYPES) received
b8698a0f 3134 from supportable_narrowing_operation, and store them in the correct order
ebfd146a
IR
3135 for future use in vect_create_vectorized_demotion_stmts(). */
3136 if (multi_step_cvt)
3137 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3138 else
3139 vec_dsts = VEC_alloc (tree, heap, 1);
b8698a0f 3140
ebfd146a
IR
3141 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3142 VEC_quick_push (tree, vec_dsts, vec_dest);
3143
3144 if (multi_step_cvt)
3145 {
b8698a0f 3146 for (i = VEC_length (tree, interm_types) - 1;
ebfd146a
IR
3147 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3148 {
b8698a0f 3149 vec_dest = vect_create_destination_var (scalar_dest,
ebfd146a
IR
3150 intermediate_type);
3151 VEC_quick_push (tree, vec_dsts, vec_dest);
3152 }
3153 }
3154
3155 /* In case the vectorization factor (VF) is bigger than the number
3156 of elements that we can fit in a vectype (nunits), we have to generate
3157 more than one vector stmt - i.e - we need to "unroll" the
3158 vector stmt by a factor VF/nunits. */
3159 last_oprnd = op0;
3160 prev_stmt_info = NULL;
3161 for (j = 0; j < ncopies; j++)
3162 {
3163 /* Handle uses. */
3164 if (slp_node)
9dc3f7de 3165 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
ebfd146a
IR
3166 else
3167 {
3168 VEC_free (tree, heap, vec_oprnds0);
3169 vec_oprnds0 = VEC_alloc (tree, heap,
3170 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
b8698a0f 3171 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
ebfd146a
IR
3172 vect_pow2 (multi_step_cvt) - 1);
3173 }
3174
9dc3f7de 3175 /* Arguments are ready. Create the new vector stmts. */
ebfd146a 3176 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
b8698a0f 3177 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
ebfd146a 3178 multi_step_cvt, stmt, tmp_vec_dsts,
b8698a0f 3179 gsi, slp_node, code1,
ebfd146a
IR
3180 &prev_stmt_info);
3181 }
3182
3183 VEC_free (tree, heap, vec_oprnds0);
3184 VEC_free (tree, heap, vec_dsts);
3185 VEC_free (tree, heap, tmp_vec_dsts);
3186 VEC_free (tree, heap, interm_types);
3187
3188 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3189 return true;
3190}
3191
3192
3193/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
ff802fa1 3194 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
ebfd146a
IR
3195 the resulting vectors and call the function recursively. */
3196
3197static void
3198vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
3199 VEC (tree, heap) **vec_oprnds1,
3200 int multi_step_cvt, gimple stmt,
3201 VEC (tree, heap) *vec_dsts,
3202 gimple_stmt_iterator *gsi,
3203 slp_tree slp_node, enum tree_code code1,
b8698a0f 3204 enum tree_code code2, tree decl1,
ebfd146a
IR
3205 tree decl2, int op_type,
3206 stmt_vec_info *prev_stmt_info)
3207{
3208 int i;
3209 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
3210 gimple new_stmt1, new_stmt2;
3211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3212 VEC (tree, heap) *vec_tmp;
3213
3214 vec_dest = VEC_pop (tree, vec_dsts);
3215 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
3216
ac47786e 3217 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
ebfd146a
IR
3218 {
3219 if (op_type == binary_op)
3220 vop1 = VEC_index (tree, *vec_oprnds1, i);
3221 else
3222 vop1 = NULL_TREE;
3223
3224 /* Generate the two halves of promotion operation. */
b8698a0f 3225 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
ebfd146a
IR
3226 op_type, vec_dest, gsi, stmt);
3227 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3228 op_type, vec_dest, gsi, stmt);
3229 if (is_gimple_call (new_stmt1))
3230 {
3231 new_tmp1 = gimple_call_lhs (new_stmt1);
3232 new_tmp2 = gimple_call_lhs (new_stmt2);
3233 }
3234 else
3235 {
3236 new_tmp1 = gimple_assign_lhs (new_stmt1);
3237 new_tmp2 = gimple_assign_lhs (new_stmt2);
3238 }
3239
3240 if (multi_step_cvt)
3241 {
3242 /* Store the results for the recursive call. */
3243 VEC_quick_push (tree, vec_tmp, new_tmp1);
3244 VEC_quick_push (tree, vec_tmp, new_tmp2);
3245 }
3246 else
3247 {
3248 /* Last step of promotion sequience - store the results. */
3249 if (slp_node)
3250 {
3251 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3252 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3253 }
3254 else
3255 {
3256 if (!*prev_stmt_info)
3257 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3258 else
3259 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3260
3261 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3262 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3263 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3264 }
3265 }
3266 }
3267
3268 if (multi_step_cvt)
3269 {
b8698a0f 3270 /* For multi-step promotion operation we first generate we call the
ff802fa1 3271 function recurcively for every stage. We start from the input type,
ebfd146a
IR
3272 create promotion operations to the intermediate types, and then
3273 create promotions to the output type. */
3274 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
ebfd146a
IR
3275 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3276 multi_step_cvt - 1, stmt,
3277 vec_dsts, gsi, slp_node, code1,
3278 code2, decl2, decl2, op_type,
3279 prev_stmt_info);
3280 }
ff802fa1
IR
3281
3282 VEC_free (tree, heap, vec_tmp);
ebfd146a 3283}
b8698a0f 3284
ebfd146a
IR
3285
3286/* Function vectorizable_type_promotion
3287
3288 Check if STMT performs a binary or unary operation that involves
3289 type promotion, and if it can be vectorized.
3290 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3291 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3292 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3293
3294static bool
3295vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3296 gimple *vec_stmt, slp_tree slp_node)
3297{
3298 tree vec_dest;
3299 tree scalar_dest;
3300 tree op0, op1 = NULL;
3301 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3304 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3305 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
b8698a0f 3306 int op_type;
ebfd146a
IR
3307 tree def;
3308 gimple def_stmt;
3309 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3310 stmt_vec_info prev_stmt_info;
3311 int nunits_in;
3312 int nunits_out;
3313 tree vectype_out;
3314 int ncopies;
3315 int j, i;
3316 tree vectype_in;
3317 tree intermediate_type = NULL_TREE;
3318 int multi_step_cvt = 0;
3319 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3320 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
b8698a0f 3321
a70d6342
IR
3322 /* FORNOW: not supported by basic block SLP vectorization. */
3323 gcc_assert (loop_vinfo);
b8698a0f 3324
ebfd146a
IR
3325 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3326 return false;
3327
8644a673 3328 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3329 return false;
3330
3331 /* Is STMT a vectorizable type-promotion operation? */
3332 if (!is_gimple_assign (stmt))
3333 return false;
3334
3335 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3336 return false;
3337
3338 code = gimple_assign_rhs_code (stmt);
3339 if (!CONVERT_EXPR_CODE_P (code)
3340 && code != WIDEN_MULT_EXPR)
3341 return false;
3342
b690cc0f
RG
3343 scalar_dest = gimple_assign_lhs (stmt);
3344 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3345
3346 /* Check the operands of the operation. */
ebfd146a 3347 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
3348 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3349 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3350 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3351 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3352 && CONVERT_EXPR_CODE_P (code))))
3353 return false;
3354 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3355 &def_stmt, &def, &dt[0], &vectype_in))
3356 {
3357 if (vect_print_dump_info (REPORT_DETAILS))
3358 fprintf (vect_dump, "use not simple.");
3359 return false;
3360 }
383d9c83
IR
3361
3362 op_type = TREE_CODE_LENGTH (code);
3363 if (op_type == binary_op)
3364 {
3365 bool ok;
3366
3367 op1 = gimple_assign_rhs2 (stmt);
3368 if (code == WIDEN_MULT_EXPR)
3369 {
3370 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3371 OP1. */
3372 if (CONSTANT_CLASS_P (op0))
3373 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
3374 &def_stmt, &def, &dt[1], &vectype_in);
3375 else
3376 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
3377 &dt[1]);
3378
3379 if (!ok)
3380 {
3381 if (vect_print_dump_info (REPORT_DETAILS))
3382 fprintf (vect_dump, "use not simple.");
3383 return false;
3384 }
3385 }
3386 }
3387
b690cc0f
RG
3388 /* If op0 is an external or constant def use a vector type with
3389 the same size as the output vector type. */
3390 if (!vectype_in)
3391 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3392 if (vec_stmt)
3393 gcc_assert (vectype_in);
ebfd146a 3394 if (!vectype_in)
7d8930a0
IR
3395 {
3396 if (vect_print_dump_info (REPORT_DETAILS))
3397 {
3398 fprintf (vect_dump, "no vectype for scalar type ");
3399 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3400 }
3401
3402 return false;
3403 }
ebfd146a 3404
b690cc0f 3405 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
3406 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3407 if (nunits_in <= nunits_out)
3408 return false;
3409
3410 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3411 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 3412 case of SLP. */
437f4a00 3413 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3414 ncopies = 1;
3415 else
3416 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3417
3418 gcc_assert (ncopies >= 1);
3419
ebfd146a 3420 /* Supportable by target? */
b690cc0f 3421 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
ebfd146a
IR
3422 &decl1, &decl2, &code1, &code2,
3423 &multi_step_cvt, &interm_types))
3424 return false;
3425
3426 /* Binary widening operation can only be supported directly by the
3427 architecture. */
3428 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3429
ebfd146a
IR
3430 if (!vec_stmt) /* transformation not required. */
3431 {
3432 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "=== vectorizable_promotion ===");
3435 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3436 return true;
3437 }
3438
3439 /** Transform. **/
3440
3441 if (vect_print_dump_info (REPORT_DETAILS))
3442 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3443 ncopies);
3444
383d9c83
IR
3445 if (code == WIDEN_MULT_EXPR)
3446 {
3447 if (CONSTANT_CLASS_P (op0))
3448 op0 = fold_convert (TREE_TYPE (op1), op0);
3449 else if (CONSTANT_CLASS_P (op1))
3450 op1 = fold_convert (TREE_TYPE (op0), op1);
3451 }
3452
ebfd146a 3453 /* Handle def. */
b8698a0f 3454 /* In case of multi-step promotion, we first generate promotion operations
ebfd146a 3455 to the intermediate types, and then from that types to the final one.
b8698a0f
L
3456 We store vector destination in VEC_DSTS in the correct order for
3457 recursive creation of promotion operations in
ebfd146a
IR
3458 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3459 according to TYPES recieved from supportable_widening_operation(). */
3460 if (multi_step_cvt)
3461 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3462 else
3463 vec_dsts = VEC_alloc (tree, heap, 1);
3464
3465 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3466 VEC_quick_push (tree, vec_dsts, vec_dest);
3467
3468 if (multi_step_cvt)
3469 {
3470 for (i = VEC_length (tree, interm_types) - 1;
3471 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3472 {
3473 vec_dest = vect_create_destination_var (scalar_dest,
3474 intermediate_type);
3475 VEC_quick_push (tree, vec_dsts, vec_dest);
3476 }
3477 }
b8698a0f 3478
ebfd146a
IR
3479 if (!slp_node)
3480 {
b8698a0f 3481 vec_oprnds0 = VEC_alloc (tree, heap,
ebfd146a
IR
3482 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3483 if (op_type == binary_op)
3484 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3485 }
3486
3487 /* In case the vectorization factor (VF) is bigger than the number
3488 of elements that we can fit in a vectype (nunits), we have to generate
3489 more than one vector stmt - i.e - we need to "unroll" the
3490 vector stmt by a factor VF/nunits. */
3491
3492 prev_stmt_info = NULL;
3493 for (j = 0; j < ncopies; j++)
3494 {
3495 /* Handle uses. */
3496 if (j == 0)
3497 {
3498 if (slp_node)
9dc3f7de
IR
3499 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3500 &vec_oprnds1, -1);
ebfd146a
IR
3501 else
3502 {
3503 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3504 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3505 if (op_type == binary_op)
3506 {
3507 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3508 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3509 }
3510 }
3511 }
3512 else
3513 {
3514 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3515 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3516 if (op_type == binary_op)
3517 {
3518 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3519 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3520 }
3521 }
3522
9dc3f7de 3523 /* Arguments are ready. Create the new vector stmts. */
ebfd146a
IR
3524 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3525 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
b8698a0f 3526 multi_step_cvt, stmt,
ebfd146a
IR
3527 tmp_vec_dsts,
3528 gsi, slp_node, code1, code2,
3529 decl1, decl2, op_type,
3530 &prev_stmt_info);
3531 }
3532
3533 VEC_free (tree, heap, vec_dsts);
3534 VEC_free (tree, heap, tmp_vec_dsts);
3535 VEC_free (tree, heap, interm_types);
3536 VEC_free (tree, heap, vec_oprnds0);
3537 VEC_free (tree, heap, vec_oprnds1);
3538
3539 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3540 return true;
3541}
3542
3543
3544/* Function vectorizable_store.
3545
b8698a0f
L
3546 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3547 can be vectorized.
3548 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3549 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3550 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3551
3552static bool
3553vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3554 slp_tree slp_node)
3555{
3556 tree scalar_dest;
3557 tree data_ref;
3558 tree op;
3559 tree vec_oprnd = NULL_TREE;
3560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3561 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3562 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3563 tree elem_type;
ebfd146a 3564 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3565 struct loop *loop = NULL;
ebfd146a
IR
3566 enum machine_mode vec_mode;
3567 tree dummy;
3568 enum dr_alignment_support alignment_support_scheme;
3569 tree def;
3570 gimple def_stmt;
3571 enum vect_def_type dt;
3572 stmt_vec_info prev_stmt_info = NULL;
3573 tree dataref_ptr = NULL_TREE;
3574 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3575 int ncopies;
3576 int j;
3577 gimple next_stmt, first_stmt = NULL;
3578 bool strided_store = false;
272c6793 3579 bool store_lanes_p = false;
ebfd146a
IR
3580 unsigned int group_size, i;
3581 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3582 bool inv_p;
3583 VEC(tree,heap) *vec_oprnds = NULL;
3584 bool slp = (slp_node != NULL);
ebfd146a 3585 unsigned int vec_num;
a70d6342 3586 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3587 tree aggr_type;
a70d6342
IR
3588
3589 if (loop_vinfo)
3590 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3591
3592 /* Multiple types in SLP are handled by creating the appropriate number of
3593 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3594 case of SLP. */
437f4a00 3595 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3596 ncopies = 1;
3597 else
3598 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3599
3600 gcc_assert (ncopies >= 1);
3601
3602 /* FORNOW. This restriction should be relaxed. */
a70d6342 3603 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a
IR
3604 {
3605 if (vect_print_dump_info (REPORT_DETAILS))
3606 fprintf (vect_dump, "multiple types in nested loop.");
3607 return false;
3608 }
3609
a70d6342 3610 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3611 return false;
3612
8644a673 3613 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3614 return false;
3615
3616 /* Is vectorizable store? */
3617
3618 if (!is_gimple_assign (stmt))
3619 return false;
3620
3621 scalar_dest = gimple_assign_lhs (stmt);
3622 if (TREE_CODE (scalar_dest) != ARRAY_REF
3623 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3624 && TREE_CODE (scalar_dest) != COMPONENT_REF
3625 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3626 && TREE_CODE (scalar_dest) != REALPART_EXPR
3627 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3628 return false;
3629
3630 gcc_assert (gimple_assign_single_p (stmt));
3631 op = gimple_assign_rhs1 (stmt);
a70d6342 3632 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
ebfd146a
IR
3633 {
3634 if (vect_print_dump_info (REPORT_DETAILS))
3635 fprintf (vect_dump, "use not simple.");
3636 return false;
3637 }
3638
3639 /* The scalar rhs type needs to be trivially convertible to the vector
3640 component type. This should always be the case. */
272c6793
RS
3641 elem_type = TREE_TYPE (vectype);
3642 if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
b8698a0f 3643 {
ebfd146a
IR
3644 if (vect_print_dump_info (REPORT_DETAILS))
3645 fprintf (vect_dump, "??? operands of different types");
3646 return false;
3647 }
3648
3649 vec_mode = TYPE_MODE (vectype);
3650 /* FORNOW. In some cases can vectorize even if data-type not supported
3651 (e.g. - array initialization with 0). */
947131ba 3652 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3653 return false;
3654
3655 if (!STMT_VINFO_DATA_REF (stmt_info))
3656 return false;
3657
a1e53f3f
L
3658 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3659 {
3660 if (vect_print_dump_info (REPORT_DETAILS))
3661 fprintf (vect_dump, "negative step for store.");
3662 return false;
3663 }
3664
ebfd146a
IR
3665 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3666 {
3667 strided_store = true;
e14c1050 3668 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3669 if (!slp && !PURE_SLP_STMT (stmt_info))
3670 {
e14c1050 3671 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3672 if (vect_store_lanes_supported (vectype, group_size))
3673 store_lanes_p = true;
3674 else if (!vect_strided_store_supported (vectype, group_size))
b602d918
RS
3675 return false;
3676 }
b8698a0f 3677
ebfd146a
IR
3678 if (first_stmt == stmt)
3679 {
3680 /* STMT is the leader of the group. Check the operands of all the
3681 stmts of the group. */
e14c1050 3682 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3683 while (next_stmt)
3684 {
3685 gcc_assert (gimple_assign_single_p (next_stmt));
3686 op = gimple_assign_rhs1 (next_stmt);
b8698a0f 3687 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
a70d6342 3688 &def, &dt))
ebfd146a
IR
3689 {
3690 if (vect_print_dump_info (REPORT_DETAILS))
3691 fprintf (vect_dump, "use not simple.");
3692 return false;
3693 }
e14c1050 3694 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3695 }
3696 }
3697 }
3698
3699 if (!vec_stmt) /* transformation not required. */
3700 {
3701 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
272c6793 3702 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
ebfd146a
IR
3703 return true;
3704 }
3705
3706 /** Transform. **/
3707
3708 if (strided_store)
3709 {
3710 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 3711 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 3712
e14c1050 3713 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
3714
3715 /* FORNOW */
a70d6342 3716 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3717
3718 /* We vectorize all the stmts of the interleaving group when we
3719 reach the last stmt in the group. */
e14c1050
IR
3720 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3721 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3722 && !slp)
3723 {
3724 *vec_stmt = NULL;
3725 return true;
3726 }
3727
3728 if (slp)
4b5caab7
IR
3729 {
3730 strided_store = false;
3731 /* VEC_NUM is the number of vect stmts to be created for this
3732 group. */
3733 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3734 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3735 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3736 }
ebfd146a 3737 else
4b5caab7
IR
3738 /* VEC_NUM is the number of vect stmts to be created for this
3739 group. */
ebfd146a
IR
3740 vec_num = group_size;
3741 }
b8698a0f 3742 else
ebfd146a
IR
3743 {
3744 first_stmt = stmt;
3745 first_dr = dr;
3746 group_size = vec_num = 1;
ebfd146a 3747 }
b8698a0f 3748
ebfd146a
IR
3749 if (vect_print_dump_info (REPORT_DETAILS))
3750 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3751
3752 dr_chain = VEC_alloc (tree, heap, group_size);
3753 oprnds = VEC_alloc (tree, heap, group_size);
3754
720f5239 3755 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 3756 gcc_assert (alignment_support_scheme);
272c6793
RS
3757 /* Targets with store-lane instructions must not require explicit
3758 realignment. */
3759 gcc_assert (!store_lanes_p
3760 || alignment_support_scheme == dr_aligned
3761 || alignment_support_scheme == dr_unaligned_supported);
3762
3763 if (store_lanes_p)
3764 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3765 else
3766 aggr_type = vectype;
ebfd146a
IR
3767
3768 /* In case the vectorization factor (VF) is bigger than the number
3769 of elements that we can fit in a vectype (nunits), we have to generate
3770 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 3771 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
3772 vect_get_vec_def_for_copy_stmt. */
3773
3774 /* In case of interleaving (non-unit strided access):
3775
3776 S1: &base + 2 = x2
3777 S2: &base = x0
3778 S3: &base + 1 = x1
3779 S4: &base + 3 = x3
3780
3781 We create vectorized stores starting from base address (the access of the
3782 first stmt in the chain (S2 in the above example), when the last store stmt
3783 of the chain (S4) is reached:
3784
3785 VS1: &base = vx2
3786 VS2: &base + vec_size*1 = vx0
3787 VS3: &base + vec_size*2 = vx1
3788 VS4: &base + vec_size*3 = vx3
3789
3790 Then permutation statements are generated:
3791
3792 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3793 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3794 ...
b8698a0f 3795
ebfd146a
IR
3796 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3797 (the order of the data-refs in the output of vect_permute_store_chain
3798 corresponds to the order of scalar stmts in the interleaving chain - see
3799 the documentation of vect_permute_store_chain()).
3800
3801 In case of both multiple types and interleaving, above vector stores and
ff802fa1 3802 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 3803 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 3804 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
3805 */
3806
3807 prev_stmt_info = NULL;
3808 for (j = 0; j < ncopies; j++)
3809 {
3810 gimple new_stmt;
3811 gimple ptr_incr;
3812
3813 if (j == 0)
3814 {
3815 if (slp)
3816 {
3817 /* Get vectorized arguments for SLP_NODE. */
9dc3f7de
IR
3818 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3819 NULL, -1);
ebfd146a
IR
3820
3821 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3822 }
3823 else
3824 {
b8698a0f
L
3825 /* For interleaved stores we collect vectorized defs for all the
3826 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3827 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
3828 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3829
3830 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3831 OPRNDS are of size 1. */
b8698a0f 3832 next_stmt = first_stmt;
ebfd146a
IR
3833 for (i = 0; i < group_size; i++)
3834 {
b8698a0f
L
3835 /* Since gaps are not supported for interleaved stores,
3836 GROUP_SIZE is the exact number of stmts in the chain.
3837 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3838 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
3839 iteration of the loop will be executed. */
3840 gcc_assert (next_stmt
3841 && gimple_assign_single_p (next_stmt));
3842 op = gimple_assign_rhs1 (next_stmt);
3843
b8698a0f 3844 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 3845 NULL);
b8698a0f
L
3846 VEC_quick_push(tree, dr_chain, vec_oprnd);
3847 VEC_quick_push(tree, oprnds, vec_oprnd);
e14c1050 3848 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3849 }
3850 }
3851
3852 /* We should have catched mismatched types earlier. */
3853 gcc_assert (useless_type_conversion_p (vectype,
3854 TREE_TYPE (vec_oprnd)));
272c6793 3855 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
920e8172
RS
3856 NULL_TREE, &dummy, gsi,
3857 &ptr_incr, false, &inv_p);
a70d6342 3858 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 3859 }
b8698a0f 3860 else
ebfd146a 3861 {
b8698a0f
L
3862 /* For interleaved stores we created vectorized defs for all the
3863 defs stored in OPRNDS in the previous iteration (previous copy).
3864 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
3865 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3866 next copy.
3867 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3868 OPRNDS are of size 1. */
3869 for (i = 0; i < group_size; i++)
3870 {
3871 op = VEC_index (tree, oprnds, i);
b8698a0f 3872 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 3873 &dt);
b8698a0f 3874 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
ebfd146a
IR
3875 VEC_replace(tree, dr_chain, i, vec_oprnd);
3876 VEC_replace(tree, oprnds, i, vec_oprnd);
3877 }
272c6793
RS
3878 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3879 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
3880 }
3881
272c6793 3882 if (store_lanes_p)
ebfd146a 3883 {
272c6793 3884 tree vec_array;
267d3070 3885
272c6793
RS
3886 /* Combine all the vectors into an array. */
3887 vec_array = create_vector_array (vectype, vec_num);
3888 for (i = 0; i < vec_num; i++)
c2d7ab2a 3889 {
272c6793
RS
3890 vec_oprnd = VEC_index (tree, dr_chain, i);
3891 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 3892 }
b8698a0f 3893
272c6793
RS
3894 /* Emit:
3895 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3896 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3897 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3898 gimple_call_set_lhs (new_stmt, data_ref);
267d3070
RS
3899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3900 mark_symbols_for_renaming (new_stmt);
272c6793
RS
3901 }
3902 else
3903 {
3904 new_stmt = NULL;
3905 if (strided_store)
3906 {
3907 result_chain = VEC_alloc (tree, heap, group_size);
3908 /* Permute. */
3909 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3910 &result_chain);
3911 }
c2d7ab2a 3912
272c6793
RS
3913 next_stmt = first_stmt;
3914 for (i = 0; i < vec_num; i++)
3915 {
3916 struct ptr_info_def *pi;
3917
3918 if (i > 0)
3919 /* Bump the vector pointer. */
3920 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3921 stmt, NULL_TREE);
3922
3923 if (slp)
3924 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3925 else if (strided_store)
3926 /* For strided stores vectorized defs are interleaved in
3927 vect_permute_store_chain(). */
3928 vec_oprnd = VEC_index (tree, result_chain, i);
3929
3930 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3931 build_int_cst (reference_alias_ptr_type
3932 (DR_REF (first_dr)), 0));
3933 pi = get_ptr_info (dataref_ptr);
3934 pi->align = TYPE_ALIGN_UNIT (vectype);
3935 if (aligned_access_p (first_dr))
3936 pi->misalign = 0;
3937 else if (DR_MISALIGNMENT (first_dr) == -1)
3938 {
3939 TREE_TYPE (data_ref)
3940 = build_aligned_type (TREE_TYPE (data_ref),
3941 TYPE_ALIGN (elem_type));
3942 pi->align = TYPE_ALIGN_UNIT (elem_type);
3943 pi->misalign = 0;
3944 }
3945 else
3946 {
3947 TREE_TYPE (data_ref)
3948 = build_aligned_type (TREE_TYPE (data_ref),
3949 TYPE_ALIGN (elem_type));
3950 pi->misalign = DR_MISALIGNMENT (first_dr);
3951 }
c2d7ab2a 3952
272c6793
RS
3953 /* Arguments are ready. Create the new vector stmt. */
3954 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3955 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3956 mark_symbols_for_renaming (new_stmt);
3957
3958 if (slp)
3959 continue;
3960
e14c1050 3961 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
3962 if (!next_stmt)
3963 break;
3964 }
ebfd146a 3965 }
1da0876c
RS
3966 if (!slp)
3967 {
3968 if (j == 0)
3969 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3970 else
3971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3972 prev_stmt_info = vinfo_for_stmt (new_stmt);
3973 }
ebfd146a
IR
3974 }
3975
b8698a0f
L
3976 VEC_free (tree, heap, dr_chain);
3977 VEC_free (tree, heap, oprnds);
ebfd146a 3978 if (result_chain)
b8698a0f 3979 VEC_free (tree, heap, result_chain);
ff802fa1
IR
3980 if (vec_oprnds)
3981 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
3982
3983 return true;
3984}
3985
a1e53f3f
L
3986/* Given a vector type VECTYPE returns a builtin DECL to be used
3987 for vector permutation and stores a mask into *MASK that implements
3988 reversal of the vector elements. If that is impossible to do
3989 returns NULL (and *MASK is unchanged). */
3990
3991static tree
3992perm_mask_for_reverse (tree vectype, tree *mask)
3993{
3994 tree builtin_decl;
3995 tree mask_element_type, mask_type;
3996 tree mask_vec = NULL;
3997 int i;
3998 int nunits;
3999 if (!targetm.vectorize.builtin_vec_perm)
4000 return NULL;
4001
4002 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
4003 &mask_element_type);
4004 if (!builtin_decl || !mask_element_type)
4005 return NULL;
4006
4007 mask_type = get_vectype_for_scalar_type (mask_element_type);
4008 nunits = TYPE_VECTOR_SUBPARTS (vectype);
bb67d9c7
RG
4009 if (!mask_type
4010 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
a1e53f3f
L
4011 return NULL;
4012
4013 for (i = 0; i < nunits; i++)
4014 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
4015 mask_vec = build_vector (mask_type, mask_vec);
4016
4017 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
4018 return NULL;
4019 if (mask)
4020 *mask = mask_vec;
4021 return builtin_decl;
4022}
4023
4024/* Given a vector variable X, that was generated for the scalar LHS of
4025 STMT, generate instructions to reverse the vector elements of X,
4026 insert them a *GSI and return the permuted vector variable. */
4027
4028static tree
4029reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
4030{
4031 tree vectype = TREE_TYPE (x);
4032 tree mask_vec, builtin_decl;
4033 tree perm_dest, data_ref;
4034 gimple perm_stmt;
4035
4036 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
4037
4038 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4039
4040 /* Generate the permute statement. */
4041 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
2a2651b7
RG
4042 if (!useless_type_conversion_p (vectype,
4043 TREE_TYPE (TREE_TYPE (builtin_decl))))
4044 {
4045 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
4046 tem = make_ssa_name (tem, perm_stmt);
4047 gimple_call_set_lhs (perm_stmt, tem);
4048 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4049 perm_stmt = gimple_build_assign (NULL_TREE,
4050 build1 (VIEW_CONVERT_EXPR,
4051 vectype, tem));
4052 }
a1e53f3f 4053 data_ref = make_ssa_name (perm_dest, perm_stmt);
2a2651b7 4054 gimple_set_lhs (perm_stmt, data_ref);
a1e53f3f
L
4055 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4056
4057 return data_ref;
4058}
4059
ebfd146a
IR
4060/* vectorizable_load.
4061
b8698a0f
L
4062 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4063 can be vectorized.
4064 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4065 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4066 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4067
4068static bool
4069vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4070 slp_tree slp_node, slp_instance slp_node_instance)
4071{
4072 tree scalar_dest;
4073 tree vec_dest = NULL;
4074 tree data_ref = NULL;
4075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4076 stmt_vec_info prev_stmt_info;
ebfd146a 4077 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4078 struct loop *loop = NULL;
ebfd146a 4079 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4080 bool nested_in_vect_loop = false;
ebfd146a
IR
4081 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4082 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4083 tree elem_type;
ebfd146a 4084 tree new_temp;
947131ba 4085 enum machine_mode mode;
ebfd146a
IR
4086 gimple new_stmt = NULL;
4087 tree dummy;
4088 enum dr_alignment_support alignment_support_scheme;
4089 tree dataref_ptr = NULL_TREE;
4090 gimple ptr_incr;
4091 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4092 int ncopies;
4093 int i, j, group_size;
4094 tree msq = NULL_TREE, lsq;
4095 tree offset = NULL_TREE;
4096 tree realignment_token = NULL_TREE;
4097 gimple phi = NULL;
4098 VEC(tree,heap) *dr_chain = NULL;
4099 bool strided_load = false;
272c6793 4100 bool load_lanes_p = false;
ebfd146a
IR
4101 gimple first_stmt;
4102 tree scalar_type;
4103 bool inv_p;
a1e53f3f 4104 bool negative;
ebfd146a
IR
4105 bool compute_in_loop = false;
4106 struct loop *at_loop;
4107 int vec_num;
4108 bool slp = (slp_node != NULL);
4109 bool slp_perm = false;
4110 enum tree_code code;
a70d6342
IR
4111 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4112 int vf;
272c6793 4113 tree aggr_type;
a70d6342
IR
4114
4115 if (loop_vinfo)
4116 {
4117 loop = LOOP_VINFO_LOOP (loop_vinfo);
4118 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4119 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4120 }
4121 else
3533e503 4122 vf = 1;
ebfd146a
IR
4123
4124 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4125 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4126 case of SLP. */
437f4a00 4127 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4128 ncopies = 1;
4129 else
4130 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4131
4132 gcc_assert (ncopies >= 1);
4133
4134 /* FORNOW. This restriction should be relaxed. */
4135 if (nested_in_vect_loop && ncopies > 1)
4136 {
4137 if (vect_print_dump_info (REPORT_DETAILS))
4138 fprintf (vect_dump, "multiple types in nested loop.");
4139 return false;
4140 }
4141
a70d6342 4142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4143 return false;
4144
8644a673 4145 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4146 return false;
4147
4148 /* Is vectorizable load? */
4149 if (!is_gimple_assign (stmt))
4150 return false;
4151
4152 scalar_dest = gimple_assign_lhs (stmt);
4153 if (TREE_CODE (scalar_dest) != SSA_NAME)
4154 return false;
4155
4156 code = gimple_assign_rhs_code (stmt);
4157 if (code != ARRAY_REF
4158 && code != INDIRECT_REF
e9dbe7bb
IR
4159 && code != COMPONENT_REF
4160 && code != IMAGPART_EXPR
70f34814 4161 && code != REALPART_EXPR
42373e0b
RG
4162 && code != MEM_REF
4163 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4164 return false;
4165
4166 if (!STMT_VINFO_DATA_REF (stmt_info))
4167 return false;
4168
a1e53f3f
L
4169 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4170 if (negative && ncopies > 1)
4171 {
4172 if (vect_print_dump_info (REPORT_DETAILS))
4173 fprintf (vect_dump, "multiple types with negative step.");
4174 return false;
4175 }
4176
ebfd146a 4177 scalar_type = TREE_TYPE (DR_REF (dr));
947131ba 4178 mode = TYPE_MODE (vectype);
ebfd146a
IR
4179
4180 /* FORNOW. In some cases can vectorize even if data-type not supported
4181 (e.g. - data copies). */
947131ba 4182 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a
IR
4183 {
4184 if (vect_print_dump_info (REPORT_DETAILS))
4185 fprintf (vect_dump, "Aligned load, but unsupported type.");
4186 return false;
4187 }
4188
4189 /* The vector component type needs to be trivially convertible to the
4190 scalar lhs. This should always be the case. */
272c6793
RS
4191 elem_type = TREE_TYPE (vectype);
4192 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
b8698a0f 4193 {
ebfd146a
IR
4194 if (vect_print_dump_info (REPORT_DETAILS))
4195 fprintf (vect_dump, "??? operands of different types");
4196 return false;
4197 }
4198
4199 /* Check if the load is a part of an interleaving chain. */
4200 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4201 {
4202 strided_load = true;
4203 /* FORNOW */
4204 gcc_assert (! nested_in_vect_loop);
4205
e14c1050 4206 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4207 if (!slp && !PURE_SLP_STMT (stmt_info))
4208 {
e14c1050 4209 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4210 if (vect_load_lanes_supported (vectype, group_size))
4211 load_lanes_p = true;
4212 else if (!vect_strided_load_supported (vectype, group_size))
b602d918
RS
4213 return false;
4214 }
ebfd146a
IR
4215 }
4216
a1e53f3f
L
4217 if (negative)
4218 {
4219 gcc_assert (!strided_load);
4220 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4221 if (alignment_support_scheme != dr_aligned
4222 && alignment_support_scheme != dr_unaligned_supported)
4223 {
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "negative step but alignment required.");
4226 return false;
4227 }
4228 if (!perm_mask_for_reverse (vectype, NULL))
4229 {
4230 if (vect_print_dump_info (REPORT_DETAILS))
4231 fprintf (vect_dump, "negative step and reversing not supported.");
4232 return false;
4233 }
4234 }
4235
ebfd146a
IR
4236 if (!vec_stmt) /* transformation not required. */
4237 {
4238 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
272c6793 4239 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
ebfd146a
IR
4240 return true;
4241 }
4242
4243 if (vect_print_dump_info (REPORT_DETAILS))
0ea25ecd 4244 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4245
4246 /** Transform. **/
4247
4248 if (strided_load)
4249 {
e14c1050 4250 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
4251 /* Check if the chain of loads is already vectorized. */
4252 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4253 {
4254 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4255 return true;
4256 }
4257 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4258 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4259
4260 /* VEC_NUM is the number of vect stmts to be created for this group. */
4261 if (slp)
4262 {
4263 strided_load = false;
4264 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
a70d6342
IR
4265 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4266 slp_perm = true;
4267 }
ebfd146a
IR
4268 else
4269 vec_num = group_size;
ebfd146a
IR
4270 }
4271 else
4272 {
4273 first_stmt = stmt;
4274 first_dr = dr;
4275 group_size = vec_num = 1;
4276 }
4277
720f5239 4278 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4279 gcc_assert (alignment_support_scheme);
272c6793
RS
4280 /* Targets with load-lane instructions must not require explicit
4281 realignment. */
4282 gcc_assert (!load_lanes_p
4283 || alignment_support_scheme == dr_aligned
4284 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4285
4286 /* In case the vectorization factor (VF) is bigger than the number
4287 of elements that we can fit in a vectype (nunits), we have to generate
4288 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4289 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4290 from one copy of the vector stmt to the next, in the field
ff802fa1 4291 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4292 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4293 stmts that use the defs of the current stmt. The example below
4294 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4295 need to create 4 vectorized stmts):
ebfd146a
IR
4296
4297 before vectorization:
4298 RELATED_STMT VEC_STMT
4299 S1: x = memref - -
4300 S2: z = x + 1 - -
4301
4302 step 1: vectorize stmt S1:
4303 We first create the vector stmt VS1_0, and, as usual, record a
4304 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4305 Next, we create the vector stmt VS1_1, and record a pointer to
4306 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4307 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4308 stmts and pointers:
4309 RELATED_STMT VEC_STMT
4310 VS1_0: vx0 = memref0 VS1_1 -
4311 VS1_1: vx1 = memref1 VS1_2 -
4312 VS1_2: vx2 = memref2 VS1_3 -
4313 VS1_3: vx3 = memref3 - -
4314 S1: x = load - VS1_0
4315 S2: z = x + 1 - -
4316
b8698a0f
L
4317 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4318 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4319 stmt S2. */
4320
4321 /* In case of interleaving (non-unit strided access):
4322
4323 S1: x2 = &base + 2
4324 S2: x0 = &base
4325 S3: x1 = &base + 1
4326 S4: x3 = &base + 3
4327
b8698a0f 4328 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4329 starting from the access of the first stmt of the chain:
4330
4331 VS1: vx0 = &base
4332 VS2: vx1 = &base + vec_size*1
4333 VS3: vx3 = &base + vec_size*2
4334 VS4: vx4 = &base + vec_size*3
4335
4336 Then permutation statements are generated:
4337
4338 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4339 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4340 ...
4341
4342 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4343 (the order of the data-refs in the output of vect_permute_load_chain
4344 corresponds to the order of scalar stmts in the interleaving chain - see
4345 the documentation of vect_permute_load_chain()).
4346 The generation of permutation stmts and recording them in
4347 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4348
b8698a0f 4349 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4350 permutation stmts above are created for every copy. The result vector
4351 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4352 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4353
4354 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4355 on a target that supports unaligned accesses (dr_unaligned_supported)
4356 we generate the following code:
4357 p = initial_addr;
4358 indx = 0;
4359 loop {
4360 p = p + indx * vectype_size;
4361 vec_dest = *(p);
4362 indx = indx + 1;
4363 }
4364
4365 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4366 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4367 then generate the following code, in which the data in each iteration is
4368 obtained by two vector loads, one from the previous iteration, and one
4369 from the current iteration:
4370 p1 = initial_addr;
4371 msq_init = *(floor(p1))
4372 p2 = initial_addr + VS - 1;
4373 realignment_token = call target_builtin;
4374 indx = 0;
4375 loop {
4376 p2 = p2 + indx * vectype_size
4377 lsq = *(floor(p2))
4378 vec_dest = realign_load (msq, lsq, realignment_token)
4379 indx = indx + 1;
4380 msq = lsq;
4381 } */
4382
4383 /* If the misalignment remains the same throughout the execution of the
4384 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4385 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4386 This can only occur when vectorizing memory accesses in the inner-loop
4387 nested within an outer-loop that is being vectorized. */
4388
a70d6342 4389 if (loop && nested_in_vect_loop_p (loop, stmt)
ebfd146a
IR
4390 && (TREE_INT_CST_LOW (DR_STEP (dr))
4391 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4392 {
4393 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4394 compute_in_loop = true;
4395 }
4396
4397 if ((alignment_support_scheme == dr_explicit_realign_optimized
4398 || alignment_support_scheme == dr_explicit_realign)
4399 && !compute_in_loop)
4400 {
4401 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4402 alignment_support_scheme, NULL_TREE,
4403 &at_loop);
4404 if (alignment_support_scheme == dr_explicit_realign_optimized)
4405 {
4406 phi = SSA_NAME_DEF_STMT (msq);
4407 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4408 }
4409 }
4410 else
4411 at_loop = loop;
4412
a1e53f3f
L
4413 if (negative)
4414 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4415
272c6793
RS
4416 if (load_lanes_p)
4417 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4418 else
4419 aggr_type = vectype;
4420
ebfd146a
IR
4421 prev_stmt_info = NULL;
4422 for (j = 0; j < ncopies; j++)
b8698a0f 4423 {
272c6793 4424 /* 1. Create the vector or array pointer update chain. */
ebfd146a 4425 if (j == 0)
272c6793 4426 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
920e8172
RS
4427 offset, &dummy, gsi,
4428 &ptr_incr, false, &inv_p);
ebfd146a 4429 else
272c6793
RS
4430 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4431 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 4432
5ce1ee7f
RS
4433 if (strided_load || slp_perm)
4434 dr_chain = VEC_alloc (tree, heap, vec_num);
4435
272c6793 4436 if (load_lanes_p)
ebfd146a 4437 {
272c6793
RS
4438 tree vec_array;
4439
4440 vec_array = create_vector_array (vectype, vec_num);
4441
4442 /* Emit:
4443 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4444 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4445 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4446 gimple_call_set_lhs (new_stmt, vec_array);
4447 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4448 mark_symbols_for_renaming (new_stmt);
ebfd146a 4449
272c6793
RS
4450 /* Extract each vector into an SSA_NAME. */
4451 for (i = 0; i < vec_num; i++)
ebfd146a 4452 {
272c6793
RS
4453 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4454 vec_array, i);
4455 VEC_quick_push (tree, dr_chain, new_temp);
4456 }
4457
4458 /* Record the mapping between SSA_NAMEs and statements. */
4459 vect_record_strided_load_vectors (stmt, dr_chain);
4460 }
4461 else
4462 {
4463 for (i = 0; i < vec_num; i++)
4464 {
4465 if (i > 0)
4466 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4467 stmt, NULL_TREE);
4468
4469 /* 2. Create the vector-load in the loop. */
4470 switch (alignment_support_scheme)
4471 {
4472 case dr_aligned:
4473 case dr_unaligned_supported:
be1ac4ec 4474 {
272c6793
RS
4475 struct ptr_info_def *pi;
4476 data_ref
4477 = build2 (MEM_REF, vectype, dataref_ptr,
4478 build_int_cst (reference_alias_ptr_type
4479 (DR_REF (first_dr)), 0));
4480 pi = get_ptr_info (dataref_ptr);
4481 pi->align = TYPE_ALIGN_UNIT (vectype);
4482 if (alignment_support_scheme == dr_aligned)
4483 {
4484 gcc_assert (aligned_access_p (first_dr));
4485 pi->misalign = 0;
4486 }
4487 else if (DR_MISALIGNMENT (first_dr) == -1)
4488 {
4489 TREE_TYPE (data_ref)
4490 = build_aligned_type (TREE_TYPE (data_ref),
4491 TYPE_ALIGN (elem_type));
4492 pi->align = TYPE_ALIGN_UNIT (elem_type);
4493 pi->misalign = 0;
4494 }
4495 else
4496 {
4497 TREE_TYPE (data_ref)
4498 = build_aligned_type (TREE_TYPE (data_ref),
4499 TYPE_ALIGN (elem_type));
4500 pi->misalign = DR_MISALIGNMENT (first_dr);
4501 }
4502 break;
be1ac4ec 4503 }
272c6793 4504 case dr_explicit_realign:
267d3070 4505 {
272c6793
RS
4506 tree ptr, bump;
4507 tree vs_minus_1;
4508
4509 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4510
4511 if (compute_in_loop)
4512 msq = vect_setup_realignment (first_stmt, gsi,
4513 &realignment_token,
4514 dr_explicit_realign,
4515 dataref_ptr, NULL);
4516
4517 new_stmt = gimple_build_assign_with_ops
4518 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4519 build_int_cst
4520 (TREE_TYPE (dataref_ptr),
4521 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4522 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4523 gimple_assign_set_lhs (new_stmt, ptr);
4524 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4525 data_ref
4526 = build2 (MEM_REF, vectype, ptr,
4527 build_int_cst (reference_alias_ptr_type
4528 (DR_REF (first_dr)), 0));
4529 vec_dest = vect_create_destination_var (scalar_dest,
4530 vectype);
4531 new_stmt = gimple_build_assign (vec_dest, data_ref);
4532 new_temp = make_ssa_name (vec_dest, new_stmt);
4533 gimple_assign_set_lhs (new_stmt, new_temp);
4534 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4535 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4537 msq = new_temp;
4538
4539 bump = size_binop (MULT_EXPR, vs_minus_1,
4540 TYPE_SIZE_UNIT (scalar_type));
4541 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4542 new_stmt = gimple_build_assign_with_ops
4543 (BIT_AND_EXPR, NULL_TREE, ptr,
4544 build_int_cst
4545 (TREE_TYPE (ptr),
4546 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4547 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4548 gimple_assign_set_lhs (new_stmt, ptr);
4549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4550 data_ref
4551 = build2 (MEM_REF, vectype, ptr,
4552 build_int_cst (reference_alias_ptr_type
4553 (DR_REF (first_dr)), 0));
4554 break;
267d3070 4555 }
272c6793
RS
4556 case dr_explicit_realign_optimized:
4557 new_stmt = gimple_build_assign_with_ops
4558 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4559 build_int_cst
4560 (TREE_TYPE (dataref_ptr),
4561 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4562 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4563 new_stmt);
4564 gimple_assign_set_lhs (new_stmt, new_temp);
4565 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4566 data_ref
4567 = build2 (MEM_REF, vectype, new_temp,
4568 build_int_cst (reference_alias_ptr_type
4569 (DR_REF (first_dr)), 0));
4570 break;
4571 default:
4572 gcc_unreachable ();
4573 }
ebfd146a 4574 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 4575 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
4576 new_temp = make_ssa_name (vec_dest, new_stmt);
4577 gimple_assign_set_lhs (new_stmt, new_temp);
4578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793 4579 mark_symbols_for_renaming (new_stmt);
ebfd146a 4580
272c6793
RS
4581 /* 3. Handle explicit realignment if necessary/supported.
4582 Create in loop:
4583 vec_dest = realign_load (msq, lsq, realignment_token) */
4584 if (alignment_support_scheme == dr_explicit_realign_optimized
4585 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 4586 {
272c6793
RS
4587 lsq = gimple_assign_lhs (new_stmt);
4588 if (!realignment_token)
4589 realignment_token = dataref_ptr;
4590 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4591 new_stmt
4592 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4593 vec_dest, msq, lsq,
4594 realignment_token);
4595 new_temp = make_ssa_name (vec_dest, new_stmt);
4596 gimple_assign_set_lhs (new_stmt, new_temp);
4597 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4598
4599 if (alignment_support_scheme == dr_explicit_realign_optimized)
4600 {
4601 gcc_assert (phi);
4602 if (i == vec_num - 1 && j == ncopies - 1)
4603 add_phi_arg (phi, lsq,
4604 loop_latch_edge (containing_loop),
4605 UNKNOWN_LOCATION);
4606 msq = lsq;
4607 }
ebfd146a 4608 }
ebfd146a 4609
272c6793
RS
4610 /* 4. Handle invariant-load. */
4611 if (inv_p && !bb_vinfo)
ebfd146a 4612 {
ab70d825
RG
4613 tree vec_inv;
4614 gimple_stmt_iterator gsi2 = *gsi;
272c6793 4615 gcc_assert (!strided_load);
ab70d825
RG
4616 gsi_next (&gsi2);
4617 vec_inv = build_vector_from_val (vectype, scalar_dest);
4618 new_temp = vect_init_vector (stmt, vec_inv,
4619 vectype, &gsi2);
4620 new_stmt = SSA_NAME_DEF_STMT (new_temp);
272c6793 4621 }
ebfd146a 4622
272c6793
RS
4623 if (negative)
4624 {
4625 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
ebfd146a
IR
4626 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4627 }
267d3070 4628
272c6793
RS
4629 /* Collect vector loads and later create their permutation in
4630 vect_transform_strided_load (). */
4631 if (strided_load || slp_perm)
4632 VEC_quick_push (tree, dr_chain, new_temp);
267d3070 4633
272c6793
RS
4634 /* Store vector loads in the corresponding SLP_NODE. */
4635 if (slp && !slp_perm)
4636 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4637 new_stmt);
4638 }
ebfd146a
IR
4639 }
4640
4641 if (slp && !slp_perm)
4642 continue;
4643
4644 if (slp_perm)
4645 {
a70d6342 4646 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
4647 slp_node_instance, false))
4648 {
4649 VEC_free (tree, heap, dr_chain);
4650 return false;
4651 }
4652 }
4653 else
4654 {
4655 if (strided_load)
4656 {
272c6793
RS
4657 if (!load_lanes_p)
4658 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
ebfd146a 4659 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4660 }
4661 else
4662 {
4663 if (j == 0)
4664 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4665 else
4666 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4667 prev_stmt_info = vinfo_for_stmt (new_stmt);
4668 }
4669 }
5ce1ee7f
RS
4670 if (dr_chain)
4671 VEC_free (tree, heap, dr_chain);
ebfd146a
IR
4672 }
4673
ebfd146a
IR
4674 return true;
4675}
4676
4677/* Function vect_is_simple_cond.
b8698a0f 4678
ebfd146a
IR
4679 Input:
4680 LOOP - the loop that is being vectorized.
4681 COND - Condition that is checked for simple use.
4682
4683 Returns whether a COND can be vectorized. Checks whether
4684 condition operands are supportable using vec_is_simple_use. */
4685
4686static bool
4687vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4688{
4689 tree lhs, rhs;
4690 tree def;
4691 enum vect_def_type dt;
4692
4693 if (!COMPARISON_CLASS_P (cond))
4694 return false;
4695
4696 lhs = TREE_OPERAND (cond, 0);
4697 rhs = TREE_OPERAND (cond, 1);
4698
4699 if (TREE_CODE (lhs) == SSA_NAME)
4700 {
4701 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
b8698a0f 4702 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
a70d6342 4703 &dt))
ebfd146a
IR
4704 return false;
4705 }
4706 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4707 && TREE_CODE (lhs) != FIXED_CST)
4708 return false;
4709
4710 if (TREE_CODE (rhs) == SSA_NAME)
4711 {
4712 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
b8698a0f 4713 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
a70d6342 4714 &dt))
ebfd146a
IR
4715 return false;
4716 }
4717 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4718 && TREE_CODE (rhs) != FIXED_CST)
4719 return false;
4720
4721 return true;
4722}
4723
4724/* vectorizable_condition.
4725
b8698a0f
L
4726 Check if STMT is conditional modify expression that can be vectorized.
4727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4728 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
4729 at GSI.
4730
4731 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4732 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4733 else caluse if it is 2).
ebfd146a
IR
4734
4735 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4736
4bbe8262 4737bool
ebfd146a 4738vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4bbe8262 4739 gimple *vec_stmt, tree reduc_def, int reduc_index)
ebfd146a
IR
4740{
4741 tree scalar_dest = NULL_TREE;
4742 tree vec_dest = NULL_TREE;
ebfd146a
IR
4743 tree cond_expr, then_clause, else_clause;
4744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4745 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ff802fa1
IR
4746 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4747 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
4748 tree vec_compare, vec_cond_expr;
4749 tree new_temp;
4750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4751 enum machine_mode vec_mode;
4752 tree def;
a855b1b1 4753 enum vect_def_type dt, dts[4];
ebfd146a
IR
4754 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4755 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4756 enum tree_code code;
a855b1b1
MM
4757 stmt_vec_info prev_stmt_info = NULL;
4758 int j;
ebfd146a 4759
a70d6342
IR
4760 /* FORNOW: unsupported in basic block SLP. */
4761 gcc_assert (loop_vinfo);
b8698a0f 4762
437f4a00
IR
4763 /* FORNOW: SLP not supported. */
4764 if (STMT_SLP_TYPE (stmt_info))
4765 return false;
4766
ebfd146a 4767 gcc_assert (ncopies >= 1);
a855b1b1 4768 if (reduc_index && ncopies > 1)
ebfd146a
IR
4769 return false; /* FORNOW */
4770
4771 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4772 return false;
4773
4bbe8262
IR
4774 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4775 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4776 && reduc_def))
ebfd146a
IR
4777 return false;
4778
ebfd146a 4779 /* FORNOW: not yet supported. */
b8698a0f 4780 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
4781 {
4782 if (vect_print_dump_info (REPORT_DETAILS))
4783 fprintf (vect_dump, "value used after loop.");
4784 return false;
4785 }
4786
4787 /* Is vectorizable conditional operation? */
4788 if (!is_gimple_assign (stmt))
4789 return false;
4790
4791 code = gimple_assign_rhs_code (stmt);
4792
4793 if (code != COND_EXPR)
4794 return false;
4795
4e71066d
RG
4796 cond_expr = gimple_assign_rhs1 (stmt);
4797 then_clause = gimple_assign_rhs2 (stmt);
4798 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a
IR
4799
4800 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4801 return false;
4802
4803 /* We do not handle two different vector types for the condition
4804 and the values. */
8533c9d8
SP
4805 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4806 TREE_TYPE (vectype)))
ebfd146a
IR
4807 return false;
4808
4809 if (TREE_CODE (then_clause) == SSA_NAME)
4810 {
4811 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
b8698a0f 4812 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
ebfd146a
IR
4813 &then_def_stmt, &def, &dt))
4814 return false;
4815 }
b8698a0f 4816 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
4817 && TREE_CODE (then_clause) != REAL_CST
4818 && TREE_CODE (then_clause) != FIXED_CST)
4819 return false;
4820
4821 if (TREE_CODE (else_clause) == SSA_NAME)
4822 {
4823 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
a70d6342 4824 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
ebfd146a
IR
4825 &else_def_stmt, &def, &dt))
4826 return false;
4827 }
b8698a0f 4828 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
4829 && TREE_CODE (else_clause) != REAL_CST
4830 && TREE_CODE (else_clause) != FIXED_CST)
4831 return false;
4832
4833
4834 vec_mode = TYPE_MODE (vectype);
4835
b8698a0f 4836 if (!vec_stmt)
ebfd146a
IR
4837 {
4838 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4e71066d
RG
4839 return expand_vec_cond_expr_p (TREE_TYPE (gimple_assign_lhs (stmt)),
4840 vec_mode);
ebfd146a
IR
4841 }
4842
4843 /* Transform */
4844
4845 /* Handle def. */
4846 scalar_dest = gimple_assign_lhs (stmt);
4847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4848
4849 /* Handle cond expr. */
a855b1b1
MM
4850 for (j = 0; j < ncopies; j++)
4851 {
4852 gimple new_stmt;
4853 if (j == 0)
4854 {
4855 gimple gtemp;
4856 vec_cond_lhs =
4857 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4858 stmt, NULL);
4859 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4860 NULL, &gtemp, &def, &dts[0]);
4861 vec_cond_rhs =
4862 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4863 stmt, NULL);
4864 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4865 NULL, &gtemp, &def, &dts[1]);
4866 if (reduc_index == 1)
4867 vec_then_clause = reduc_def;
4868 else
4869 {
4870 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4871 stmt, NULL);
4872 vect_is_simple_use (then_clause, loop_vinfo,
4873 NULL, &gtemp, &def, &dts[2]);
4874 }
4875 if (reduc_index == 2)
4876 vec_else_clause = reduc_def;
4877 else
4878 {
4879 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4880 stmt, NULL);
4881 vect_is_simple_use (else_clause, loop_vinfo,
4882 NULL, &gtemp, &def, &dts[3]);
4883 }
4884 }
4885 else
4886 {
4887 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4888 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4889 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4890 vec_then_clause);
4891 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4892 vec_else_clause);
4893 }
4894
9dc3f7de 4895 /* Arguments are ready. Create the new vector stmt. */
a855b1b1
MM
4896 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4897 vec_cond_lhs, vec_cond_rhs);
4898 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4899 vec_compare, vec_then_clause, vec_else_clause);
4900
4901 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4902 new_temp = make_ssa_name (vec_dest, new_stmt);
4903 gimple_assign_set_lhs (new_stmt, new_temp);
4904 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4905 if (j == 0)
4906 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4907 else
4908 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4909
4910 prev_stmt_info = vinfo_for_stmt (new_stmt);
4911 }
b8698a0f 4912
ebfd146a
IR
4913 return true;
4914}
4915
4916
8644a673 4917/* Make sure the statement is vectorizable. */
ebfd146a
IR
4918
4919bool
a70d6342 4920vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 4921{
8644a673 4922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 4923 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 4924 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 4925 bool ok;
a70d6342 4926 tree scalar_type, vectype;
1107f3ae 4927 gimple pattern_stmt, pattern_def_stmt;
ebfd146a
IR
4928
4929 if (vect_print_dump_info (REPORT_DETAILS))
ebfd146a 4930 {
8644a673
IR
4931 fprintf (vect_dump, "==> examining statement: ");
4932 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4933 }
ebfd146a 4934
1825a1f3 4935 if (gimple_has_volatile_ops (stmt))
b8698a0f 4936 {
1825a1f3
IR
4937 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4938 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4939
4940 return false;
4941 }
b8698a0f
L
4942
4943 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
4944 to include:
4945 - the COND_EXPR which is the loop exit condition
4946 - any LABEL_EXPRs in the loop
b8698a0f 4947 - computations that are used only for array indexing or loop control.
8644a673 4948 In basic blocks we only analyze statements that are a part of some SLP
83197f37 4949 instance, therefore, all the statements are relevant.
ebfd146a 4950
83197f37
IR
4951 Pattern statement need to be analyzed instead of the original statement
4952 if the original statement is not relevant. Otherwise, we analyze both
4953 statements. */
4954
4955 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 4956 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 4957 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 4958 {
9d5e7640 4959 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 4960 && pattern_stmt
9d5e7640
IR
4961 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4962 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4963 {
83197f37 4964 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
4965 stmt = pattern_stmt;
4966 stmt_info = vinfo_for_stmt (pattern_stmt);
4967 if (vect_print_dump_info (REPORT_DETAILS))
4968 {
4969 fprintf (vect_dump, "==> examining pattern statement: ");
4970 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4971 }
4972 }
4973 else
4974 {
4975 if (vect_print_dump_info (REPORT_DETAILS))
4976 fprintf (vect_dump, "irrelevant.");
ebfd146a 4977
9d5e7640
IR
4978 return true;
4979 }
8644a673 4980 }
83197f37
IR
4981 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
4982 && pattern_stmt
4983 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
4984 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
4985 {
4986 /* Analyze PATTERN_STMT too. */
4987 if (vect_print_dump_info (REPORT_DETAILS))
4988 {
4989 fprintf (vect_dump, "==> examining pattern statement: ");
4990 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4991 }
4992
4993 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
4994 return false;
4995 }
ebfd146a 4996
1107f3ae
IR
4997 if (is_pattern_stmt_p (stmt_info)
4998 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
4999 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5000 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5001 {
5002 /* Analyze def stmt of STMT if it's a pattern stmt. */
5003 if (vect_print_dump_info (REPORT_DETAILS))
5004 {
5005 fprintf (vect_dump, "==> examining pattern def statement: ");
5006 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5007 }
5008
5009 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5010 return false;
5011 }
5012
5013
8644a673
IR
5014 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5015 {
5016 case vect_internal_def:
5017 break;
ebfd146a 5018
8644a673 5019 case vect_reduction_def:
7c5222ff 5020 case vect_nested_cycle:
a70d6342 5021 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5022 || relevance == vect_used_in_outer_by_reduction
a70d6342 5023 || relevance == vect_unused_in_scope));
8644a673
IR
5024 break;
5025
5026 case vect_induction_def:
5027 case vect_constant_def:
5028 case vect_external_def:
5029 case vect_unknown_def_type:
5030 default:
5031 gcc_unreachable ();
5032 }
ebfd146a 5033
a70d6342
IR
5034 if (bb_vinfo)
5035 {
5036 gcc_assert (PURE_SLP_STMT (stmt_info));
5037
b690cc0f 5038 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
a70d6342
IR
5039 if (vect_print_dump_info (REPORT_DETAILS))
5040 {
5041 fprintf (vect_dump, "get vectype for scalar type: ");
5042 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5043 }
5044
5045 vectype = get_vectype_for_scalar_type (scalar_type);
5046 if (!vectype)
5047 {
5048 if (vect_print_dump_info (REPORT_DETAILS))
5049 {
5050 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5051 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5052 }
5053 return false;
5054 }
5055
5056 if (vect_print_dump_info (REPORT_DETAILS))
5057 {
5058 fprintf (vect_dump, "vectype: ");
5059 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5060 }
5061
5062 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5063 }
5064
8644a673 5065 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5066 {
8644a673
IR
5067 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5068 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5069 *need_to_vectorize = true;
ebfd146a
IR
5070 }
5071
8644a673 5072 ok = true;
b8698a0f 5073 if (!bb_vinfo
a70d6342
IR
5074 && (STMT_VINFO_RELEVANT_P (stmt_info)
5075 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8644a673
IR
5076 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
5077 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
5078 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5079 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5080 || vectorizable_operation (stmt, NULL, NULL, NULL)
5081 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5082 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5083 || vectorizable_call (stmt, NULL, NULL)
5084 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5085 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4bbe8262 5086 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
a70d6342
IR
5087 else
5088 {
5089 if (bb_vinfo)
57416708 5090 ok = (vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5091 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5092 || vectorizable_assignment (stmt, NULL, NULL, node)
5093 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5094 || vectorizable_store (stmt, NULL, NULL, node));
b8698a0f 5095 }
8644a673
IR
5096
5097 if (!ok)
ebfd146a 5098 {
8644a673
IR
5099 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5100 {
5101 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5102 fprintf (vect_dump, "supported: ");
5103 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5104 }
b8698a0f 5105
ebfd146a
IR
5106 return false;
5107 }
5108
a70d6342
IR
5109 if (bb_vinfo)
5110 return true;
5111
8644a673
IR
5112 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5113 need extra handling, except for vectorizable reductions. */
5114 if (STMT_VINFO_LIVE_P (stmt_info)
5115 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5116 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5117
8644a673 5118 if (!ok)
ebfd146a 5119 {
8644a673
IR
5120 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5121 {
5122 fprintf (vect_dump, "not vectorized: live stmt not ");
5123 fprintf (vect_dump, "supported: ");
5124 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5125 }
b8698a0f 5126
8644a673 5127 return false;
ebfd146a
IR
5128 }
5129
ebfd146a
IR
5130 return true;
5131}
5132
5133
5134/* Function vect_transform_stmt.
5135
5136 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5137
5138bool
5139vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
b8698a0f 5140 bool *strided_store, slp_tree slp_node,
ebfd146a
IR
5141 slp_instance slp_node_instance)
5142{
5143 bool is_store = false;
5144 gimple vec_stmt = NULL;
5145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5146 bool done;
ebfd146a
IR
5147
5148 switch (STMT_VINFO_TYPE (stmt_info))
5149 {
5150 case type_demotion_vec_info_type:
5151 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
5152 gcc_assert (done);
5153 break;
5154
5155 case type_promotion_vec_info_type:
5156 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
5157 gcc_assert (done);
5158 break;
5159
5160 case type_conversion_vec_info_type:
5161 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5162 gcc_assert (done);
5163 break;
5164
5165 case induc_vec_info_type:
5166 gcc_assert (!slp_node);
5167 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5168 gcc_assert (done);
5169 break;
5170
9dc3f7de
IR
5171 case shift_vec_info_type:
5172 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5173 gcc_assert (done);
5174 break;
5175
ebfd146a
IR
5176 case op_vec_info_type:
5177 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5178 gcc_assert (done);
5179 break;
5180
5181 case assignment_vec_info_type:
5182 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5183 gcc_assert (done);
5184 break;
5185
5186 case load_vec_info_type:
b8698a0f 5187 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5188 slp_node_instance);
5189 gcc_assert (done);
5190 break;
5191
5192 case store_vec_info_type:
5193 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5194 gcc_assert (done);
5195 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5196 {
5197 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5198 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5199 one are skipped, and there vec_stmt_info shouldn't be freed
5200 meanwhile. */
5201 *strided_store = true;
5202 if (STMT_VINFO_VEC_STMT (stmt_info))
5203 is_store = true;
5204 }
5205 else
5206 is_store = true;
5207 break;
5208
5209 case condition_vec_info_type:
5210 gcc_assert (!slp_node);
4bbe8262 5211 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
ebfd146a
IR
5212 gcc_assert (done);
5213 break;
5214
5215 case call_vec_info_type:
5216 gcc_assert (!slp_node);
5217 done = vectorizable_call (stmt, gsi, &vec_stmt);
039d9ea1 5218 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5219 break;
5220
5221 case reduc_vec_info_type:
b5aeb3bb 5222 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5223 gcc_assert (done);
5224 break;
5225
5226 default:
5227 if (!STMT_VINFO_LIVE_P (stmt_info))
5228 {
5229 if (vect_print_dump_info (REPORT_DETAILS))
5230 fprintf (vect_dump, "stmt not supported.");
5231 gcc_unreachable ();
5232 }
5233 }
5234
5235 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5236 is being vectorized, but outside the immediately enclosing loop. */
5237 if (vec_stmt
a70d6342
IR
5238 && STMT_VINFO_LOOP_VINFO (stmt_info)
5239 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5240 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5241 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5242 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5243 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5244 vect_used_in_outer_by_reduction))
ebfd146a 5245 {
a70d6342
IR
5246 struct loop *innerloop = LOOP_VINFO_LOOP (
5247 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5248 imm_use_iterator imm_iter;
5249 use_operand_p use_p;
5250 tree scalar_dest;
5251 gimple exit_phi;
5252
5253 if (vect_print_dump_info (REPORT_DETAILS))
a70d6342 5254 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5255
5256 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5257 (to be used when vectorizing outer-loop stmts that use the DEF of
5258 STMT). */
5259 if (gimple_code (stmt) == GIMPLE_PHI)
5260 scalar_dest = PHI_RESULT (stmt);
5261 else
5262 scalar_dest = gimple_assign_lhs (stmt);
5263
5264 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5265 {
5266 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5267 {
5268 exit_phi = USE_STMT (use_p);
5269 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5270 }
5271 }
5272 }
5273
5274 /* Handle stmts whose DEF is used outside the loop-nest that is
5275 being vectorized. */
5276 if (STMT_VINFO_LIVE_P (stmt_info)
5277 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5278 {
5279 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5280 gcc_assert (done);
5281 }
5282
5283 if (vec_stmt)
83197f37 5284 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 5285
b8698a0f 5286 return is_store;
ebfd146a
IR
5287}
5288
5289
b8698a0f 5290/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
5291 stmt_vec_info. */
5292
5293void
5294vect_remove_stores (gimple first_stmt)
5295{
5296 gimple next = first_stmt;
5297 gimple tmp;
5298 gimple_stmt_iterator next_si;
5299
5300 while (next)
5301 {
5302 /* Free the attached stmt_vec_info and remove the stmt. */
5303 next_si = gsi_for_stmt (next);
5304 gsi_remove (&next_si, true);
e14c1050 5305 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
ebfd146a
IR
5306 free_stmt_vec_info (next);
5307 next = tmp;
5308 }
5309}
5310
5311
5312/* Function new_stmt_vec_info.
5313
5314 Create and initialize a new stmt_vec_info struct for STMT. */
5315
5316stmt_vec_info
b8698a0f 5317new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 5318 bb_vec_info bb_vinfo)
ebfd146a
IR
5319{
5320 stmt_vec_info res;
5321 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5322
5323 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5324 STMT_VINFO_STMT (res) = stmt;
5325 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 5326 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 5327 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
5328 STMT_VINFO_LIVE_P (res) = false;
5329 STMT_VINFO_VECTYPE (res) = NULL;
5330 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 5331 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
5332 STMT_VINFO_IN_PATTERN_P (res) = false;
5333 STMT_VINFO_RELATED_STMT (res) = NULL;
1107f3ae 5334 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
ebfd146a
IR
5335 STMT_VINFO_DATA_REF (res) = NULL;
5336
5337 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5338 STMT_VINFO_DR_OFFSET (res) = NULL;
5339 STMT_VINFO_DR_INIT (res) = NULL;
5340 STMT_VINFO_DR_STEP (res) = NULL;
5341 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5342
5343 if (gimple_code (stmt) == GIMPLE_PHI
5344 && is_loop_header_bb_p (gimple_bb (stmt)))
5345 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5346 else
8644a673
IR
5347 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5348
ebfd146a
IR
5349 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5350 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5351 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
32e8bb8e 5352 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
5353 GROUP_FIRST_ELEMENT (res) = NULL;
5354 GROUP_NEXT_ELEMENT (res) = NULL;
5355 GROUP_SIZE (res) = 0;
5356 GROUP_STORE_COUNT (res) = 0;
5357 GROUP_GAP (res) = 0;
5358 GROUP_SAME_DR_STMT (res) = NULL;
5359 GROUP_READ_WRITE_DEPENDENCE (res) = false;
ebfd146a
IR
5360
5361 return res;
5362}
5363
5364
5365/* Create a hash table for stmt_vec_info. */
5366
5367void
5368init_stmt_vec_info_vec (void)
5369{
5370 gcc_assert (!stmt_vec_info_vec);
5371 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5372}
5373
5374
5375/* Free hash table for stmt_vec_info. */
5376
5377void
5378free_stmt_vec_info_vec (void)
5379{
5380 gcc_assert (stmt_vec_info_vec);
5381 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5382}
5383
5384
5385/* Free stmt vectorization related info. */
5386
5387void
5388free_stmt_vec_info (gimple stmt)
5389{
5390 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5391
5392 if (!stmt_info)
5393 return;
5394
5395 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5396 set_vinfo_for_stmt (stmt, NULL);
5397 free (stmt_info);
5398}
5399
5400
bb67d9c7 5401/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 5402
bb67d9c7 5403 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
5404 by the target. */
5405
bb67d9c7
RG
5406static tree
5407get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
5408{
5409 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 5410 enum machine_mode simd_mode;
2f816591 5411 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
5412 int nunits;
5413 tree vectype;
5414
cc4b5170 5415 if (nbytes == 0)
ebfd146a
IR
5416 return NULL_TREE;
5417
2f816591
RG
5418 /* We can't build a vector type of elements with alignment bigger than
5419 their size. */
5420 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5421 return NULL_TREE;
5422
6d7971b8
RG
5423 /* If we'd build a vector type of elements whose mode precision doesn't
5424 match their types precision we'll get mismatched types on vector
5425 extracts via BIT_FIELD_REFs. This effectively means we disable
5426 vectorization of bool and/or enum types in some languages. */
5427 if (INTEGRAL_TYPE_P (scalar_type)
5428 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5429 return NULL_TREE;
5430
cc4b5170
RG
5431 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5432 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5433 return NULL_TREE;
5434
ccbf5bb4
RG
5435 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5436 When the component mode passes the above test simply use a type
5437 corresponding to that mode. The theory is that any use that
5438 would cause problems with this will disable vectorization anyway. */
5439 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5440 && !INTEGRAL_TYPE_P (scalar_type)
5441 && !POINTER_TYPE_P (scalar_type))
5442 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5443
bb67d9c7
RG
5444 /* If no size was supplied use the mode the target prefers. Otherwise
5445 lookup a vector mode of the specified size. */
5446 if (size == 0)
5447 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5448 else
5449 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
5450 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5451 if (nunits <= 1)
5452 return NULL_TREE;
ebfd146a
IR
5453
5454 vectype = build_vector_type (scalar_type, nunits);
5455 if (vect_print_dump_info (REPORT_DETAILS))
5456 {
5457 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5458 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5459 }
5460
5461 if (!vectype)
5462 return NULL_TREE;
5463
5464 if (vect_print_dump_info (REPORT_DETAILS))
5465 {
5466 fprintf (vect_dump, "vectype: ");
5467 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5468 }
5469
5470 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5471 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5472 {
5473 if (vect_print_dump_info (REPORT_DETAILS))
5474 fprintf (vect_dump, "mode not supported by target.");
5475 return NULL_TREE;
5476 }
5477
5478 return vectype;
5479}
5480
bb67d9c7
RG
5481unsigned int current_vector_size;
5482
5483/* Function get_vectype_for_scalar_type.
5484
5485 Returns the vector type corresponding to SCALAR_TYPE as supported
5486 by the target. */
5487
5488tree
5489get_vectype_for_scalar_type (tree scalar_type)
5490{
5491 tree vectype;
5492 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5493 current_vector_size);
5494 if (vectype
5495 && current_vector_size == 0)
5496 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5497 return vectype;
5498}
5499
b690cc0f
RG
5500/* Function get_same_sized_vectype
5501
5502 Returns a vector type corresponding to SCALAR_TYPE of size
5503 VECTOR_TYPE if supported by the target. */
5504
5505tree
bb67d9c7 5506get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 5507{
bb67d9c7
RG
5508 return get_vectype_for_scalar_type_and_size
5509 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
5510}
5511
ebfd146a
IR
5512/* Function vect_is_simple_use.
5513
5514 Input:
a70d6342
IR
5515 LOOP_VINFO - the vect info of the loop that is being vectorized.
5516 BB_VINFO - the vect info of the basic block that is being vectorized.
5517 OPERAND - operand of a stmt in the loop or bb.
ebfd146a
IR
5518 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5519
5520 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 5521 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 5522 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 5523 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
5524 is the case in reduction/induction computations).
5525 For basic blocks, supportable operands are constants and bb invariants.
5526 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
5527
5528bool
b8698a0f 5529vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
a70d6342 5530 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 5531 tree *def, enum vect_def_type *dt)
b8698a0f 5532{
ebfd146a
IR
5533 basic_block bb;
5534 stmt_vec_info stmt_vinfo;
a70d6342 5535 struct loop *loop = NULL;
b8698a0f 5536
a70d6342
IR
5537 if (loop_vinfo)
5538 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5539
5540 *def_stmt = NULL;
5541 *def = NULL_TREE;
b8698a0f 5542
ebfd146a
IR
5543 if (vect_print_dump_info (REPORT_DETAILS))
5544 {
5545 fprintf (vect_dump, "vect_is_simple_use: operand ");
5546 print_generic_expr (vect_dump, operand, TDF_SLIM);
5547 }
b8698a0f 5548
ebfd146a
IR
5549 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5550 {
5551 *dt = vect_constant_def;
5552 return true;
5553 }
b8698a0f 5554
ebfd146a
IR
5555 if (is_gimple_min_invariant (operand))
5556 {
5557 *def = operand;
8644a673 5558 *dt = vect_external_def;
ebfd146a
IR
5559 return true;
5560 }
5561
5562 if (TREE_CODE (operand) == PAREN_EXPR)
5563 {
5564 if (vect_print_dump_info (REPORT_DETAILS))
5565 fprintf (vect_dump, "non-associatable copy.");
5566 operand = TREE_OPERAND (operand, 0);
5567 }
b8698a0f 5568
ebfd146a
IR
5569 if (TREE_CODE (operand) != SSA_NAME)
5570 {
5571 if (vect_print_dump_info (REPORT_DETAILS))
5572 fprintf (vect_dump, "not ssa-name.");
5573 return false;
5574 }
b8698a0f 5575
ebfd146a
IR
5576 *def_stmt = SSA_NAME_DEF_STMT (operand);
5577 if (*def_stmt == NULL)
5578 {
5579 if (vect_print_dump_info (REPORT_DETAILS))
5580 fprintf (vect_dump, "no def_stmt.");
5581 return false;
5582 }
5583
5584 if (vect_print_dump_info (REPORT_DETAILS))
5585 {
5586 fprintf (vect_dump, "def_stmt: ");
5587 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5588 }
5589
8644a673 5590 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
5591 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5592 if (gimple_nop_p (*def_stmt))
5593 {
5594 *def = operand;
8644a673 5595 *dt = vect_external_def;
ebfd146a
IR
5596 return true;
5597 }
5598
5599 bb = gimple_bb (*def_stmt);
a70d6342
IR
5600
5601 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5602 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 5603 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 5604 *dt = vect_external_def;
ebfd146a
IR
5605 else
5606 {
5607 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5608 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5609 }
5610
5611 if (*dt == vect_unknown_def_type)
5612 {
5613 if (vect_print_dump_info (REPORT_DETAILS))
5614 fprintf (vect_dump, "Unsupported pattern.");
5615 return false;
5616 }
5617
5618 if (vect_print_dump_info (REPORT_DETAILS))
5619 fprintf (vect_dump, "type of def: %d.",*dt);
5620
5621 switch (gimple_code (*def_stmt))
5622 {
5623 case GIMPLE_PHI:
5624 *def = gimple_phi_result (*def_stmt);
5625 break;
5626
5627 case GIMPLE_ASSIGN:
5628 *def = gimple_assign_lhs (*def_stmt);
5629 break;
5630
5631 case GIMPLE_CALL:
5632 *def = gimple_call_lhs (*def_stmt);
5633 if (*def != NULL)
5634 break;
5635 /* FALLTHRU */
5636 default:
5637 if (vect_print_dump_info (REPORT_DETAILS))
5638 fprintf (vect_dump, "unsupported defining stmt: ");
5639 return false;
5640 }
5641
5642 return true;
5643}
5644
b690cc0f
RG
5645/* Function vect_is_simple_use_1.
5646
5647 Same as vect_is_simple_use_1 but also determines the vector operand
5648 type of OPERAND and stores it to *VECTYPE. If the definition of
5649 OPERAND is vect_uninitialized_def, vect_constant_def or
5650 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5651 is responsible to compute the best suited vector type for the
5652 scalar operand. */
5653
5654bool
5655vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5656 bb_vec_info bb_vinfo, gimple *def_stmt,
5657 tree *def, enum vect_def_type *dt, tree *vectype)
5658{
5659 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5660 return false;
5661
5662 /* Now get a vector type if the def is internal, otherwise supply
5663 NULL_TREE and leave it up to the caller to figure out a proper
5664 type for the use stmt. */
5665 if (*dt == vect_internal_def
5666 || *dt == vect_induction_def
5667 || *dt == vect_reduction_def
5668 || *dt == vect_double_reduction_def
5669 || *dt == vect_nested_cycle)
5670 {
5671 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
5672
5673 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5674 && !STMT_VINFO_RELEVANT (stmt_info)
5675 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 5676 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 5677
b690cc0f
RG
5678 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5679 gcc_assert (*vectype != NULL_TREE);
5680 }
5681 else if (*dt == vect_uninitialized_def
5682 || *dt == vect_constant_def
5683 || *dt == vect_external_def)
5684 *vectype = NULL_TREE;
5685 else
5686 gcc_unreachable ();
5687
5688 return true;
5689}
5690
ebfd146a
IR
5691
5692/* Function supportable_widening_operation
5693
b8698a0f
L
5694 Check whether an operation represented by the code CODE is a
5695 widening operation that is supported by the target platform in
b690cc0f
RG
5696 vector form (i.e., when operating on arguments of type VECTYPE_IN
5697 producing a result of type VECTYPE_OUT).
b8698a0f 5698
ebfd146a
IR
5699 Widening operations we currently support are NOP (CONVERT), FLOAT
5700 and WIDEN_MULT. This function checks if these operations are supported
5701 by the target platform either directly (via vector tree-codes), or via
5702 target builtins.
5703
5704 Output:
b8698a0f
L
5705 - CODE1 and CODE2 are codes of vector operations to be used when
5706 vectorizing the operation, if available.
ebfd146a 5707 - DECL1 and DECL2 are decls of target builtin functions to be used
ff802fa1 5708 when vectorizing the operation, if available. In this case,
b8698a0f 5709 CODE1 and CODE2 are CALL_EXPR.
ebfd146a
IR
5710 - MULTI_STEP_CVT determines the number of required intermediate steps in
5711 case of multi-step conversion (like char->short->int - in that case
5712 MULTI_STEP_CVT will be 1).
b8698a0f
L
5713 - INTERM_TYPES contains the intermediate type required to perform the
5714 widening operation (short in the above example). */
ebfd146a
IR
5715
5716bool
b690cc0f
RG
5717supportable_widening_operation (enum tree_code code, gimple stmt,
5718 tree vectype_out, tree vectype_in,
ebfd146a
IR
5719 tree *decl1, tree *decl2,
5720 enum tree_code *code1, enum tree_code *code2,
5721 int *multi_step_cvt,
5722 VEC (tree, heap) **interm_types)
5723{
5724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5725 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5726 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
5727 bool ordered_p;
5728 enum machine_mode vec_mode;
81f40b79 5729 enum insn_code icode1, icode2;
ebfd146a 5730 optab optab1, optab2;
b690cc0f
RG
5731 tree vectype = vectype_in;
5732 tree wide_vectype = vectype_out;
ebfd146a
IR
5733 enum tree_code c1, c2;
5734
5735 /* The result of a vectorized widening operation usually requires two vectors
b8698a0f
L
5736 (because the widened results do not fit int one vector). The generated
5737 vector results would normally be expected to be generated in the same
ebfd146a
IR
5738 order as in the original scalar computation, i.e. if 8 results are
5739 generated in each vector iteration, they are to be organized as follows:
b8698a0f 5740 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
ebfd146a 5741
b8698a0f 5742 However, in the special case that the result of the widening operation is
ebfd146a 5743 used in a reduction computation only, the order doesn't matter (because
b8698a0f 5744 when vectorizing a reduction we change the order of the computation).
ebfd146a
IR
5745 Some targets can take advantage of this and generate more efficient code.
5746 For example, targets like Altivec, that support widen_mult using a sequence
5747 of {mult_even,mult_odd} generate the following vectors:
5748 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5749
5750 When vectorizing outer-loops, we execute the inner-loop sequentially
b8698a0f 5751 (each vectorized inner-loop iteration contributes to VF outer-loop
ff802fa1 5752 iterations in parallel). We therefore don't allow to change the order
ebfd146a
IR
5753 of the computation in the inner-loop during outer-loop vectorization. */
5754
5755 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5756 && !nested_in_vect_loop_p (vect_loop, stmt))
5757 ordered_p = false;
5758 else
5759 ordered_p = true;
5760
5761 if (!ordered_p
5762 && code == WIDEN_MULT_EXPR
5763 && targetm.vectorize.builtin_mul_widen_even
5764 && targetm.vectorize.builtin_mul_widen_even (vectype)
5765 && targetm.vectorize.builtin_mul_widen_odd
5766 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5767 {
5768 if (vect_print_dump_info (REPORT_DETAILS))
5769 fprintf (vect_dump, "Unordered widening operation detected.");
5770
5771 *code1 = *code2 = CALL_EXPR;
5772 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5773 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5774 return true;
5775 }
5776
5777 switch (code)
5778 {
5779 case WIDEN_MULT_EXPR:
5780 if (BYTES_BIG_ENDIAN)
5781 {
5782 c1 = VEC_WIDEN_MULT_HI_EXPR;
5783 c2 = VEC_WIDEN_MULT_LO_EXPR;
5784 }
5785 else
5786 {
5787 c2 = VEC_WIDEN_MULT_HI_EXPR;
5788 c1 = VEC_WIDEN_MULT_LO_EXPR;
5789 }
5790 break;
5791
5792 CASE_CONVERT:
5793 if (BYTES_BIG_ENDIAN)
5794 {
5795 c1 = VEC_UNPACK_HI_EXPR;
5796 c2 = VEC_UNPACK_LO_EXPR;
5797 }
5798 else
5799 {
5800 c2 = VEC_UNPACK_HI_EXPR;
5801 c1 = VEC_UNPACK_LO_EXPR;
5802 }
5803 break;
5804
5805 case FLOAT_EXPR:
5806 if (BYTES_BIG_ENDIAN)
5807 {
5808 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5809 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5810 }
5811 else
5812 {
5813 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5814 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5815 }
5816 break;
5817
5818 case FIX_TRUNC_EXPR:
5819 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5820 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5821 computing the operation. */
5822 return false;
5823
5824 default:
5825 gcc_unreachable ();
5826 }
5827
5828 if (code == FIX_TRUNC_EXPR)
5829 {
5830 /* The signedness is determined from output operand. */
b690cc0f
RG
5831 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5832 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
5833 }
5834 else
5835 {
5836 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5837 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5838 }
5839
5840 if (!optab1 || !optab2)
5841 return false;
5842
5843 vec_mode = TYPE_MODE (vectype);
947131ba
RS
5844 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5845 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
5846 return false;
5847
b8698a0f 5848 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a
IR
5849 types. */
5850 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5851 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5852 {
5853 int i;
5854 tree prev_type = vectype, intermediate_type;
5855 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5856 optab optab3, optab4;
5857
5858 if (!CONVERT_EXPR_CODE_P (code))
5859 return false;
b8698a0f 5860
ebfd146a
IR
5861 *code1 = c1;
5862 *code2 = c2;
b8698a0f 5863
ebfd146a 5864 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
ff802fa1
IR
5865 intermediate steps in promotion sequence. We try
5866 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5867 not. */
ebfd146a
IR
5868 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5869 for (i = 0; i < 3; i++)
5870 {
5871 intermediate_mode = insn_data[icode1].operand[0].mode;
5872 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5873 TYPE_UNSIGNED (prev_type));
5874 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5875 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5876
5877 if (!optab3 || !optab4
947131ba
RS
5878 || ((icode1 = optab_handler (optab1, prev_mode))
5879 == CODE_FOR_nothing)
ebfd146a 5880 || insn_data[icode1].operand[0].mode != intermediate_mode
947131ba
RS
5881 || ((icode2 = optab_handler (optab2, prev_mode))
5882 == CODE_FOR_nothing)
ebfd146a 5883 || insn_data[icode2].operand[0].mode != intermediate_mode
947131ba
RS
5884 || ((icode1 = optab_handler (optab3, intermediate_mode))
5885 == CODE_FOR_nothing)
5886 || ((icode2 = optab_handler (optab4, intermediate_mode))
5887 == CODE_FOR_nothing))
ebfd146a
IR
5888 return false;
5889
5890 VEC_quick_push (tree, *interm_types, intermediate_type);
5891 (*multi_step_cvt)++;
5892
5893 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5894 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5895 return true;
5896
5897 prev_type = intermediate_type;
5898 prev_mode = intermediate_mode;
5899 }
5900
5901 return false;
5902 }
5903
5904 *code1 = c1;
5905 *code2 = c2;
5906 return true;
5907}
5908
5909
5910/* Function supportable_narrowing_operation
5911
b8698a0f
L
5912 Check whether an operation represented by the code CODE is a
5913 narrowing operation that is supported by the target platform in
b690cc0f
RG
5914 vector form (i.e., when operating on arguments of type VECTYPE_IN
5915 and producing a result of type VECTYPE_OUT).
b8698a0f 5916
ebfd146a 5917 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 5918 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
5919 the target platform directly via vector tree-codes.
5920
5921 Output:
b8698a0f
L
5922 - CODE1 is the code of a vector operation to be used when
5923 vectorizing the operation, if available.
ebfd146a
IR
5924 - MULTI_STEP_CVT determines the number of required intermediate steps in
5925 case of multi-step conversion (like int->short->char - in that case
5926 MULTI_STEP_CVT will be 1).
5927 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 5928 narrowing operation (short in the above example). */
ebfd146a
IR
5929
5930bool
5931supportable_narrowing_operation (enum tree_code code,
b690cc0f 5932 tree vectype_out, tree vectype_in,
ebfd146a
IR
5933 enum tree_code *code1, int *multi_step_cvt,
5934 VEC (tree, heap) **interm_types)
5935{
5936 enum machine_mode vec_mode;
5937 enum insn_code icode1;
5938 optab optab1, interm_optab;
b690cc0f
RG
5939 tree vectype = vectype_in;
5940 tree narrow_vectype = vectype_out;
ebfd146a
IR
5941 enum tree_code c1;
5942 tree intermediate_type, prev_type;
5943 int i;
5944
5945 switch (code)
5946 {
5947 CASE_CONVERT:
5948 c1 = VEC_PACK_TRUNC_EXPR;
5949 break;
5950
5951 case FIX_TRUNC_EXPR:
5952 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5953 break;
5954
5955 case FLOAT_EXPR:
5956 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5957 tree code and optabs used for computing the operation. */
5958 return false;
5959
5960 default:
5961 gcc_unreachable ();
5962 }
5963
5964 if (code == FIX_TRUNC_EXPR)
5965 /* The signedness is determined from output operand. */
b690cc0f 5966 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
5967 else
5968 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5969
5970 if (!optab1)
5971 return false;
5972
5973 vec_mode = TYPE_MODE (vectype);
947131ba 5974 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
5975 return false;
5976
5977 /* Check if it's a multi-step conversion that can be done using intermediate
5978 types. */
5979 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5980 {
5981 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5982
5983 *code1 = c1;
5984 prev_type = vectype;
5985 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
ff802fa1
IR
5986 intermediate steps in promotion sequence. We try
5987 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5988 not. */
ebfd146a
IR
5989 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5990 for (i = 0; i < 3; i++)
5991 {
5992 intermediate_mode = insn_data[icode1].operand[0].mode;
5993 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5994 TYPE_UNSIGNED (prev_type));
b8698a0f 5995 interm_optab = optab_for_tree_code (c1, intermediate_type,
ebfd146a 5996 optab_default);
b8698a0f 5997 if (!interm_optab
947131ba
RS
5998 || ((icode1 = optab_handler (optab1, prev_mode))
5999 == CODE_FOR_nothing)
ebfd146a 6000 || insn_data[icode1].operand[0].mode != intermediate_mode
947131ba
RS
6001 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6002 == CODE_FOR_nothing))
ebfd146a
IR
6003 return false;
6004
6005 VEC_quick_push (tree, *interm_types, intermediate_type);
6006 (*multi_step_cvt)++;
6007
6008 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6009 return true;
6010
6011 prev_type = intermediate_type;
6012 prev_mode = intermediate_mode;
6013 }
6014
6015 return false;
6016 }
6017
6018 *code1 = c1;
6019 return true;
6020}