]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
mips: Improved vectorization support for Loongson and mips3d-ps.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
4dee9718 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
62f7fd21 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
718f9c0f 40#include "diagnostic-core.h"
ebfd146a
IR
41#include "tree-vectorizer.h"
42#include "langhooks.h"
43
44
272c6793
RS
45/* Return a variable of type ELEM_TYPE[NELEMS]. */
46
47static tree
48create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49{
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
52}
53
54/* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
58
59static tree
60read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
62{
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
65
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
72
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
78
79 return vect_name;
80}
81
82/* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
85
86static void
87write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
89{
90 tree array_ref;
91 gimple new_stmt;
92
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
96
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
100}
101
102/* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
105
106static tree
107create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
108{
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
111
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
119}
120
ebfd146a
IR
121/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
122
123/* Function vect_mark_relevant.
124
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
126
127static void
128vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
83197f37
IR
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
ebfd146a
IR
131{
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 135 gimple pattern_stmt;
ebfd146a
IR
136
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
139
83197f37
IR
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
ebfd146a
IR
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
145 {
83197f37
IR
146 bool found = false;
147 if (!used_in_pattern)
148 {
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
ebfd146a 153
83197f37
IR
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
ebfd146a 158
83197f37
IR
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
ab0ef706
JJ
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
164 {
165 if (is_gimple_debug (USE_STMT (use_p)))
166 continue;
167 use_stmt = USE_STMT (use_p);
168
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
171 {
172 found = true;
173 break;
174 }
175 }
83197f37
IR
176 }
177
178 if (!found)
179 {
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
184
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 " relevant/live.");
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 stmt = pattern_stmt;
195 }
ebfd146a
IR
196 }
197
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 {
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
207 return;
208 }
209
210 VEC_safe_push (gimple, heap, *worklist, stmt);
211}
212
213
214/* Function vect_stmt_relevant_p.
215
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
218
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
223
224 CHECKME: what other side effects would the vectorizer allow? */
225
226static bool
227vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
229{
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 ssa_op_iter op_iter;
232 imm_use_iterator imm_iter;
233 use_operand_p use_p;
234 def_operand_p def_p;
235
8644a673 236 *relevant = vect_unused_in_scope;
ebfd146a
IR
237 *live_p = false;
238
239 /* cond stmt other than loop exit cond. */
b8698a0f
L
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
8644a673 243 *relevant = vect_used_in_scope;
ebfd146a
IR
244
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 247 if (gimple_vdef (stmt))
ebfd146a
IR
248 {
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 251 *relevant = vect_used_in_scope;
ebfd146a
IR
252 }
253
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 {
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 {
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
261 {
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264
3157b0c2
AO
265 if (is_gimple_debug (USE_STMT (use_p)))
266 continue;
267
ebfd146a
IR
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
272
273 *live_p = true;
274 }
275 }
276 }
277
278 return (*live_p || *relevant);
279}
280
281
b8698a0f 282/* Function exist_non_indexing_operands_for_use_p
ebfd146a 283
ff802fa1 284 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
285 used in STMT for anything other than indexing an array. */
286
287static bool
288exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
289{
290 tree operand;
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 292
ff802fa1 293 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
297 return true;
59a05b0c 298
ebfd146a
IR
299 /* STMT has a data_ref. FORNOW this means that its of one of
300 the following forms:
301 -1- ARRAY_REF = var
302 -2- var = ARRAY_REF
303 (This should have been verified in analyze_data_refs).
304
305 'var' in the second case corresponds to a def, not a use,
b8698a0f 306 so USE cannot correspond to any operands that are not used
ebfd146a
IR
307 for array indexing.
308
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
ebfd146a
IR
311
312 if (!gimple_assign_copy_p (stmt))
313 return false;
59a05b0c
EB
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 return false;
ebfd146a 316 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
317 if (TREE_CODE (operand) != SSA_NAME)
318 return false;
319
320 if (operand == use)
321 return true;
322
323 return false;
324}
325
326
b8698a0f 327/*
ebfd146a
IR
328 Function process_use.
329
330 Inputs:
331 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 333 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
336 be performed.
ebfd146a
IR
337
338 Outputs:
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
343 Exceptions:
344 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 345 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 346 of the respective DEF_STMT is left unchanged.
b8698a0f
L
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
351
352 Return true if everything is as expected. Return false otherwise. */
353
354static bool
b8698a0f 355process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
aec7ae7d
JJ
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
357 bool force)
ebfd146a
IR
358{
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
363 tree def;
364 gimple def_stmt;
365 enum vect_def_type dt;
366
b8698a0f 367 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 368 that are used for address computation are not considered relevant. */
aec7ae7d 369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
370 return true;
371
a70d6342 372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 373 {
8644a673 374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 return false;
377 }
378
379 if (!def_stmt || gimple_nop_p (def_stmt))
380 return true;
381
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
384 {
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
387 return true;
388 }
389
b8698a0f
L
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
402 {
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
410 return true;
411 }
412
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
415 d = def_stmt
416 inner-loop:
417 stmt # use (d)
418 outer-loop-tail-bb:
419 ... */
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
421 {
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 424
ebfd146a
IR
425 switch (relevant)
426 {
8644a673 427 case vect_unused_in_scope:
7c5222ff
IR
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 430 break;
7c5222ff 431
ebfd146a 432 case vect_used_in_outer_by_reduction:
7c5222ff 433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
434 relevant = vect_used_by_reduction;
435 break;
7c5222ff 436
ebfd146a 437 case vect_used_in_outer:
7c5222ff 438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 439 relevant = vect_used_in_scope;
ebfd146a 440 break;
7c5222ff 441
8644a673 442 case vect_used_in_scope:
ebfd146a
IR
443 break;
444
445 default:
446 gcc_unreachable ();
b8698a0f 447 }
ebfd146a
IR
448 }
449
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
452 ...
453 inner-loop:
454 d = def_stmt
06066f92 455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
456 stmt # use (d) */
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
458 {
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 461
ebfd146a
IR
462 switch (relevant)
463 {
8644a673 464 case vect_unused_in_scope:
b8698a0f 465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
468 break;
469
ebfd146a
IR
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
472 break;
473
8644a673 474 case vect_used_in_scope:
ebfd146a
IR
475 relevant = vect_used_in_outer;
476 break;
477
478 default:
479 gcc_unreachable ();
480 }
481 }
482
83197f37
IR
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
485 return true;
486}
487
488
489/* Function vect_mark_stmts_to_be_vectorized.
490
491 Not all stmts in the loop need to be vectorized. For example:
492
493 for i...
494 for j...
495 1. T0 = i + j
496 2. T1 = a[T0]
497
498 3. j = j + 1
499
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
502
503 This pass detects such stmts. */
504
505bool
506vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
507{
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
513 gimple stmt;
514 unsigned int i;
515 stmt_vec_info stmt_vinfo;
516 basic_block bb;
517 gimple phi;
518 bool live_p;
06066f92
IR
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
ebfd146a
IR
521
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
524
525 worklist = VEC_alloc (gimple, heap, 64);
526
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
529 {
530 bb = bbs[i];
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 532 {
ebfd146a
IR
533 phi = gsi_stmt (si);
534 if (vect_print_dump_info (REPORT_DETAILS))
535 {
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
538 }
539
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
542 }
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
544 {
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
547 {
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 550 }
ebfd146a
IR
551
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
554 }
555 }
556
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
559 {
560 use_operand_p use_p;
561 ssa_op_iter iter;
562
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
565 {
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 }
569
b8698a0f
L
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
576
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
581
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
584 live_p = false
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 587 those that are used by a reduction computation, and those that are
ff802fa1 588 (also) used by a regular computation. This allows us later on to
b8698a0f 589 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 590 order of the results that they produce does not have to be kept. */
ebfd146a 591
06066f92
IR
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
594 switch (def_type)
ebfd146a 595 {
06066f92
IR
596 case vect_reduction_def:
597 switch (tmp_relevant)
598 {
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
601 break;
602
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
605 break;
606 /* fall through */
607
608 default:
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
611
612 VEC_free (gimple, heap, worklist);
613 return false;
614 }
615
b8698a0f 616 live_p = false;
06066f92 617 break;
b8698a0f 618
06066f92
IR
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
623 {
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
7c5222ff 626
06066f92
IR
627 VEC_free (gimple, heap, worklist);
628 return false;
629 }
7c5222ff 630
b8698a0f
L
631 live_p = false;
632 break;
633
06066f92
IR
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
637 {
7c5222ff 638 if (vect_print_dump_info (REPORT_DETAILS))
06066f92 639 fprintf (vect_dump, "unsupported use of double reduction.");
7c5222ff
IR
640
641 VEC_free (gimple, heap, worklist);
642 return false;
06066f92
IR
643 }
644
645 live_p = false;
b8698a0f 646 break;
7c5222ff 647
06066f92
IR
648 default:
649 break;
7c5222ff 650 }
b8698a0f 651
aec7ae7d 652 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
653 {
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
658 {
69d2aade
JJ
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
661
662 i = 1;
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
664 {
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 666 live_p, relevant, &worklist, false)
69d2aade 667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 668 live_p, relevant, &worklist, false))
69d2aade
JJ
669 {
670 VEC_free (gimple, heap, worklist);
671 return false;
672 }
673 i = 2;
674 }
675 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 676 {
69d2aade 677 op = gimple_op (stmt, i);
9d5e7640 678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 679 &worklist, false))
9d5e7640
IR
680 {
681 VEC_free (gimple, heap, worklist);
682 return false;
683 }
684 }
685 }
686 else if (is_gimple_call (stmt))
687 {
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
689 {
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 692 &worklist, false))
9d5e7640
IR
693 {
694 VEC_free (gimple, heap, worklist);
695 return false;
696 }
697 }
698 }
699 }
700 else
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
702 {
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 705 &worklist, false))
9d5e7640
IR
706 {
707 VEC_free (gimple, heap, worklist);
708 return false;
709 }
710 }
aec7ae7d
JJ
711
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
713 {
714 tree off;
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
716 gcc_assert (decl);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
718 &worklist, true))
719 {
720 VEC_free (gimple, heap, worklist);
721 return false;
722 }
723 }
ebfd146a
IR
724 } /* while worklist */
725
726 VEC_free (gimple, heap, worklist);
727 return true;
728}
729
730
720f5239
IR
731/* Get cost by calling cost target builtin. */
732
733static inline
734int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
735{
736 tree dummy_type = NULL;
737 int dummy = 0;
738
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
740 dummy_type, dummy);
741}
742
ff802fa1
IR
743
744/* Get cost for STMT. */
745
ebfd146a
IR
746int
747cost_for_stmt (gimple stmt)
748{
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
750
751 switch (STMT_VINFO_TYPE (stmt_info))
752 {
753 case load_vec_info_type:
720f5239 754 return vect_get_stmt_cost (scalar_load);
ebfd146a 755 case store_vec_info_type:
720f5239 756 return vect_get_stmt_cost (scalar_store);
ebfd146a
IR
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
720f5239 766 return vect_get_stmt_cost (scalar_stmt);
ebfd146a
IR
767 case undef_vec_info_type:
768 default:
769 gcc_unreachable ();
770 }
771}
772
b8698a0f 773/* Function vect_model_simple_cost.
ebfd146a 774
b8698a0f 775 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
778
779void
b8698a0f 780vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
781 enum vect_def_type *dt, slp_tree slp_node)
782{
783 int i;
784 int inside_cost = 0, outside_cost = 0;
785
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
788 return;
789
720f5239 790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
791
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
794 {
8644a673 795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
720f5239 796 outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a 797 }
b8698a0f 798
ebfd146a
IR
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
802
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
806}
807
808
b8698a0f
L
809/* Function vect_cost_strided_group_size
810
ebfd146a
IR
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
814
815static int
816vect_cost_strided_group_size (stmt_vec_info stmt_info)
817{
e14c1050 818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
819
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 821 return GROUP_SIZE (stmt_info);
ebfd146a
IR
822
823 return 1;
824}
825
826
827/* Function vect_model_store_cost
828
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
831
832void
b8698a0f 833vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793
RS
834 bool store_lanes_p, enum vect_def_type dt,
835 slp_tree slp_node)
ebfd146a
IR
836{
837 int group_size;
720f5239
IR
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
840 gimple first_stmt;
ebfd146a
IR
841
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
844 return;
845
8644a673 846 if (dt == vect_constant_def || dt == vect_external_def)
720f5239 847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
ebfd146a
IR
848
849 /* Strided access? */
e14c1050 850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
720f5239
IR
851 {
852 if (slp_node)
853 {
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
855 group_size = 1;
856 }
857 else
858 {
e14c1050 859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
720f5239
IR
860 group_size = vect_cost_strided_group_size (stmt_info);
861 }
862
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
864 }
ebfd146a
IR
865 /* Not a strided access. */
866 else
720f5239
IR
867 {
868 group_size = 1;
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
870 }
ebfd146a 871
272c6793
RS
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
877 {
878 /* Uses a high and low interleave operation for each needed permute. */
b8698a0f 879 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 880 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
881
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
884 group_size);
885
886 }
887
888 /* Costs of the stores. */
720f5239 889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
ebfd146a
IR
890
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
894
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
898}
899
900
720f5239
IR
901/* Calculate cost of DR's memory access. */
902void
903vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
905{
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
907
908 switch (alignment_support_scheme)
909 {
910 case dr_aligned:
911 {
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
913
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
916
917 break;
918 }
919
920 case dr_unaligned_supported:
921 {
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
925
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
930
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
933 "hardware.");
934
935 break;
936 }
937
938 default:
939 gcc_unreachable ();
940 }
941}
942
943
ebfd146a
IR
944/* Function vect_model_load_cost
945
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
b8698a0f 948 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
949 access scheme chosen. */
950
951void
272c6793
RS
952vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
953 slp_tree slp_node)
ebfd146a
IR
954{
955 int group_size;
ebfd146a
IR
956 gimple first_stmt;
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
720f5239 958 unsigned int inside_cost = 0, outside_cost = 0;
ebfd146a
IR
959
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
962 return;
963
964 /* Strided accesses? */
e14c1050
IR
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a
IR
967 {
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
970 }
971 /* Not a strided access. */
972 else
973 {
974 group_size = 1;
975 first_dr = dr;
976 }
977
272c6793
RS
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
983 {
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 986 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
987
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
990 group_size);
ebfd146a
IR
991 }
992
993 /* The loads themselves. */
720f5239 994 vect_get_load_cost (first_dr, ncopies,
e14c1050
IR
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
996 || slp_node),
720f5239
IR
997 &inside_cost, &outside_cost);
998
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1002
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1006}
1007
1008
1009/* Calculate cost of DR's memory access. */
1010void
1011vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1014{
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1016
1017 switch (alignment_support_scheme)
ebfd146a
IR
1018 {
1019 case dr_aligned:
1020 {
9940b13c 1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
ebfd146a
IR
1022
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1025
1026 break;
1027 }
1028 case dr_unaligned_supported:
1029 {
720f5239
IR
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1033
720f5239
IR
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
ebfd146a
IR
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1040 "hardware.");
1041
1042 break;
1043 }
1044 case dr_explicit_realign:
1045 {
720f5239
IR
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
1048
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 outside costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
720f5239 1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
1054
1055 break;
1056 }
1057 case dr_explicit_realign_optimized:
1058 {
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1061 "pipelined.");
1062
1063 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1064 load, and possibly a mask operation to "prime" the loop. However,
ebfd146a
IR
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
ff802fa1 1067 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1068 and a realignment op. */
1069
720f5239 1070 if (add_realign_cost)
ebfd146a 1071 {
720f5239 1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
ebfd146a 1073 if (targetm.vectorize.builtin_mask_for_load)
720f5239 1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
1075 }
1076
720f5239
IR
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
1079 break;
1080 }
1081
1082 default:
1083 gcc_unreachable ();
1084 }
ebfd146a
IR
1085}
1086
1087
1088/* Function vect_init_vector.
1089
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
ff802fa1
IR
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
b8698a0f 1093 Return the DEF of INIT_STMT.
ebfd146a
IR
1094 It will be used in the vectorization of STMT. */
1095
1096tree
1097vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1099{
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1101 tree new_var;
1102 gimple init_stmt;
1103 tree vec_oprnd;
1104 edge pe;
1105 tree new_temp;
1106 basic_block new_bb;
b8698a0f 1107
ebfd146a 1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
b8698a0f 1109 add_referenced_var (new_var);
ebfd146a
IR
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1113
1114 if (gsi)
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1116 else
1117 {
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1119
a70d6342
IR
1120 if (loop_vinfo)
1121 {
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1123
1124 if (nested_in_vect_loop_p (loop, stmt))
1125 loop = loop->inner;
b8698a0f 1126
a70d6342
IR
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1130 }
1131 else
1132 {
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1134 basic_block bb;
1135 gimple_stmt_iterator gsi_bb_start;
1136
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1139 gsi_bb_start = gsi_after_labels (bb);
a70d6342
IR
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1141 }
ebfd146a
IR
1142 }
1143
1144 if (vect_print_dump_info (REPORT_DETAILS))
1145 {
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1148 }
1149
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1151 return vec_oprnd;
1152}
1153
a70d6342 1154
ebfd146a
IR
1155/* Function vect_get_vec_def_for_operand.
1156
ff802fa1 1157 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1158 used in the vectorized stmt for STMT.
1159
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1162
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1165
1166tree
1167vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1168{
1169 tree vec_oprnd;
1170 gimple vec_stmt;
1171 gimple def_stmt;
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1174 unsigned int nunits;
ebfd146a
IR
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1176 tree vec_inv;
1177 tree vec_cst;
e7e9eb2f 1178 tree t = NULL_TREE;
ebfd146a 1179 tree def;
e7e9eb2f 1180 int i;
ebfd146a
IR
1181 enum vect_def_type dt;
1182 bool is_simple_use;
1183 tree vector_type;
1184
1185 if (vect_print_dump_info (REPORT_DETAILS))
1186 {
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1189 }
1190
b8698a0f 1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
a70d6342 1192 &dt);
ebfd146a
IR
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1195 {
1196 if (def)
1197 {
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1200 }
1201 if (def_stmt)
1202 {
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1205 }
1206 }
1207
1208 switch (dt)
1209 {
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1212 {
7569a6cc
RG
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
9dc3f7de 1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1216
b8698a0f 1217 if (scalar_def)
ebfd146a
IR
1218 *scalar_def = op;
1219
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1223
7b7b1813
RG
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1226 op));
7569a6cc 1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
ebfd146a
IR
1228 }
1229
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1231 case vect_external_def:
ebfd146a
IR
1232 {
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1236
b8698a0f 1237 if (scalar_def)
ebfd146a
IR
1238 *scalar_def = def;
1239
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1243
e7e9eb2f
NF
1244 for (i = nunits - 1; i >= 0; --i)
1245 {
1246 t = tree_cons (NULL_TREE, def, t);
1247 }
1248
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
ebfd146a
IR
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1252 }
1253
1254 /* Case 3: operand is defined inside the loop. */
8644a673 1255 case vect_internal_def:
ebfd146a 1256 {
b8698a0f 1257 if (scalar_def)
ebfd146a
IR
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1259
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1262
ebfd146a 1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1264 /* Get vectorized pattern statement. */
1265 if (!vec_stmt
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1275 else
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1277 return vec_oprnd;
1278 }
1279
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
06066f92 1282 case vect_double_reduction_def:
7c5222ff 1283 case vect_nested_cycle:
ebfd146a
IR
1284 {
1285 struct loop *loop;
1286
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1288 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1289
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1293 }
1294
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1297 {
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1299
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1305 else
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1307 return vec_oprnd;
1308 }
1309
1310 default:
1311 gcc_unreachable ();
1312 }
1313}
1314
1315
1316/* Function vect_get_vec_def_for_stmt_copy
1317
ff802fa1 1318 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1321 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1323 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1324 DT is the type of the vector def VEC_OPRND.
1325
1326 Context:
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1329 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1330 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1334 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1337
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1339
ebfd146a
IR
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1343 VS1.3: vx.3 = memref3
ebfd146a
IR
1344
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1349
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
b8698a0f
L
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1354 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1355 returns the vector-def 'vx.0'.
1356
b8698a0f
L
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1361
b8698a0f
L
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1371
1372tree
1373vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1374{
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1377
1378 /* Do nothing; can reuse same def. */
8644a673 1379 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1380 return vec_oprnd;
1381
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1392 return vec_oprnd;
1393}
1394
1395
1396/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1398
1399static void
b8698a0f
L
1400vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
ebfd146a
IR
1402 VEC(tree,heap) **vec_oprnds1)
1403{
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1405
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1408
1409 if (vec_oprnds1 && *vec_oprnds1)
1410 {
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1414 }
1415}
1416
1417
d092494c
IR
1418/* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
ebfd146a 1421
d092494c 1422void
ebfd146a 1423vect_get_vec_defs (tree op0, tree op1, gimple stmt,
d092494c
IR
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1427{
1428 if (slp_node)
d092494c
IR
1429 {
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1433
1434 VEC_quick_push (tree, ops, op0);
1435 if (op1)
1436 VEC_quick_push (tree, ops, op1);
1437
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1439
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1441 if (op1)
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1443
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1446 }
ebfd146a
IR
1447 else
1448 {
1449 tree vec_oprnd;
1450
b8698a0f
L
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
ebfd146a
IR
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1454
1455 if (op1)
1456 {
b8698a0f
L
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
ebfd146a
IR
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1460 }
1461 }
1462}
1463
1464
1465/* Function vect_finish_stmt_generation.
1466
1467 Insert a new stmt. */
1468
1469void
1470vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1472{
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1476
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1478
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1480
b8698a0f 1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1482 bb_vinfo));
ebfd146a
IR
1483
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 {
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1488 }
1489
ad885386 1490 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1491}
1492
1493/* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1496
1497tree
1498vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1499{
1500 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1501
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1505 return NULL_TREE;
1506
1507 if (!fndecl
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1510 return NULL_TREE;
1511
62f7fd21 1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1513 vectype_in);
1514}
1515
1516/* Function vectorizable_call.
1517
b8698a0f
L
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1522
1523static bool
190c2236
JJ
1524vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1525 slp_tree slp_node)
ebfd146a
IR
1526{
1527 tree vec_dest;
1528 tree scalar_dest;
1529 tree op, type;
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1533 int nunits_in;
1534 int nunits_out;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1537 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1538 gimple def_stmt;
0502fb85
UB
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1541 gimple new_stmt = NULL;
ebfd146a
IR
1542 int ncopies, j;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1545 size_t i, nargs;
9d5e7640 1546 tree lhs;
ebfd146a 1547
190c2236 1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1549 return false;
1550
8644a673 1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1552 return false;
1553
ebfd146a
IR
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1556 return false;
1557
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1559 return false;
1560
822ba6d7 1561 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1562 return false;
1563
b690cc0f
RG
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1565
ebfd146a
IR
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
b690cc0f 1568 vectype_in = NULL_TREE;
ebfd146a
IR
1569 nargs = gimple_call_num_args (stmt);
1570
1b1562a5
MM
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1575 return false;
1576
1577 for (i = 0; i < nargs; i++)
1578 {
b690cc0f
RG
1579 tree opvectype;
1580
ebfd146a
IR
1581 op = gimple_call_arg (stmt, i);
1582
1583 /* We can only handle calls with arguments of the same type. */
1584 if (rhs_type
8533c9d8 1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a
IR
1586 {
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1589 return false;
1590 }
b690cc0f
RG
1591 if (!rhs_type)
1592 rhs_type = TREE_TYPE (op);
ebfd146a 1593
190c2236 1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
b690cc0f 1595 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a
IR
1596 {
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1599 return false;
1600 }
ebfd146a 1601
b690cc0f
RG
1602 if (!vectype_in)
1603 vectype_in = opvectype;
1604 else if (opvectype
1605 && opvectype != vectype_in)
1606 {
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1609 return false;
1610 }
1611 }
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
ebfd146a 1614 if (!vectype_in)
b690cc0f 1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1616 if (vec_stmt)
1617 gcc_assert (vectype_in);
1618 if (!vectype_in)
1619 {
1620 if (vect_print_dump_info (REPORT_DETAILS))
1621 {
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1624 }
1625
1626 return false;
1627 }
ebfd146a
IR
1628
1629 /* FORNOW */
b690cc0f
RG
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1632 if (nunits_in == nunits_out / 2)
1633 modifier = NARROW;
1634 else if (nunits_out == nunits_in)
1635 modifier = NONE;
1636 else if (nunits_out == nunits_in / 2)
1637 modifier = WIDEN;
1638 else
1639 return false;
1640
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1647 {
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1650
1651 return false;
1652 }
1653
5006671f 1654 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1655
190c2236
JJ
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1657 ncopies = 1;
1658 else if (modifier == NARROW)
ebfd146a
IR
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1660 else
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1662
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1666
1667 if (!vec_stmt) /* transformation not required. */
1668 {
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1673 return true;
1674 }
1675
1676 /** Transform. **/
1677
1678 if (vect_print_dump_info (REPORT_DETAILS))
9d5e7640 1679 fprintf (vect_dump, "transform call.");
ebfd146a
IR
1680
1681 /* Handle def. */
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1684
1685 prev_stmt_info = NULL;
1686 switch (modifier)
1687 {
1688 case NONE:
1689 for (j = 0; j < ncopies; ++j)
1690 {
1691 /* Build argument list for the vectorized call. */
1692 if (j == 0)
1693 vargs = VEC_alloc (tree, heap, nargs);
1694 else
1695 VEC_truncate (tree, vargs, 0);
1696
190c2236
JJ
1697 if (slp_node)
1698 {
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1702
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1706 vec_oprnds0
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1708
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1711 {
1712 size_t k;
1713 for (k = 0; k < nargs; k++)
1714 {
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1720 }
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1727 new_stmt);
1728 }
1729
1730 for (i = 0; i < nargs; i++)
1731 {
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1736 }
1737 VEC_free (slp_void_p, heap, vec_defs);
1738 continue;
1739 }
1740
ebfd146a
IR
1741 for (i = 0; i < nargs; i++)
1742 {
1743 op = gimple_call_arg (stmt, i);
1744 if (j == 0)
1745 vec_oprnd0
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1747 else
63827fb8
IR
1748 {
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1750 vec_oprnd0
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1752 }
ebfd146a
IR
1753
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1755 }
1756
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1760
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1762 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1763
1764 if (j == 0)
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1766 else
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1768
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1770 }
1771
1772 break;
1773
1774 case NARROW:
1775 for (j = 0; j < ncopies; ++j)
1776 {
1777 /* Build argument list for the vectorized call. */
1778 if (j == 0)
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1780 else
1781 VEC_truncate (tree, vargs, 0);
1782
190c2236
JJ
1783 if (slp_node)
1784 {
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1788
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1792 vec_oprnds0
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1794
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1797 i += 2)
1798 {
1799 size_t k;
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1802 {
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1810 }
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1817 new_stmt);
1818 }
1819
1820 for (i = 0; i < nargs; i++)
1821 {
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1826 }
1827 VEC_free (slp_void_p, heap, vec_defs);
1828 continue;
1829 }
1830
ebfd146a
IR
1831 for (i = 0; i < nargs; i++)
1832 {
1833 op = gimple_call_arg (stmt, i);
1834 if (j == 0)
1835 {
1836 vec_oprnd0
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1838 vec_oprnd1
63827fb8 1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1840 }
1841 else
1842 {
336ecb65 1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 1844 vec_oprnd0
63827fb8 1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 1846 vec_oprnd1
63827fb8 1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1848 }
1849
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1852 }
1853
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1857
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1859 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1860
1861 if (j == 0)
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1863 else
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1865
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1867 }
1868
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1870
1871 break;
1872
1873 case WIDEN:
1874 /* No current target implements this case. */
1875 return false;
1876 }
1877
1878 VEC_free (tree, heap, vargs);
1879
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1883
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1888
dd34c087
JJ
1889 if (slp_node)
1890 return true;
1891
ebfd146a 1892 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
1893 if (is_pattern_stmt_p (stmt_info))
1894 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1895 else
1896 lhs = gimple_call_lhs (stmt);
1897 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 1898 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 1899 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
1900 STMT_VINFO_STMT (stmt_info) = new_stmt;
1901 gsi_replace (gsi, new_stmt, false);
1902 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1903
1904 return true;
1905}
1906
1907
1908/* Function vect_gen_widened_results_half
1909
1910 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 1911 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 1912 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
1913 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1914 needs to be created (DECL is a function-decl of a target-builtin).
1915 STMT is the original scalar stmt that we are vectorizing. */
1916
1917static gimple
1918vect_gen_widened_results_half (enum tree_code code,
1919 tree decl,
1920 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1921 tree vec_dest, gimple_stmt_iterator *gsi,
1922 gimple stmt)
b8698a0f 1923{
ebfd146a 1924 gimple new_stmt;
b8698a0f
L
1925 tree new_temp;
1926
1927 /* Generate half of the widened result: */
1928 if (code == CALL_EXPR)
1929 {
1930 /* Target specific support */
ebfd146a
IR
1931 if (op_type == binary_op)
1932 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1933 else
1934 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1935 new_temp = make_ssa_name (vec_dest, new_stmt);
1936 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
1937 }
1938 else
ebfd146a 1939 {
b8698a0f
L
1940 /* Generic support */
1941 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
1942 if (op_type != binary_op)
1943 vec_oprnd1 = NULL;
1944 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1945 vec_oprnd1);
1946 new_temp = make_ssa_name (vec_dest, new_stmt);
1947 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 1948 }
ebfd146a
IR
1949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1950
ebfd146a
IR
1951 return new_stmt;
1952}
1953
4a00c761
JJ
1954
1955/* Get vectorized definitions for loop-based vectorization. For the first
1956 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1957 scalar operand), and for the rest we get a copy with
1958 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1959 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1960 The vectors are collected into VEC_OPRNDS. */
1961
1962static void
1963vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1964 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1965{
1966 tree vec_oprnd;
1967
1968 /* Get first vector operand. */
1969 /* All the vector operands except the very first one (that is scalar oprnd)
1970 are stmt copies. */
1971 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1972 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1973 else
1974 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1975
1976 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1977
1978 /* Get second vector operand. */
1979 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1980 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1981
1982 *oprnd = vec_oprnd;
1983
1984 /* For conversion in multiple steps, continue to get operands
1985 recursively. */
1986 if (multi_step_cvt)
1987 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1988}
1989
1990
1991/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1992 For multi-step conversions store the resulting vectors and call the function
1993 recursively. */
1994
1995static void
1996vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1997 int multi_step_cvt, gimple stmt,
1998 VEC (tree, heap) *vec_dsts,
1999 gimple_stmt_iterator *gsi,
2000 slp_tree slp_node, enum tree_code code,
2001 stmt_vec_info *prev_stmt_info)
2002{
2003 unsigned int i;
2004 tree vop0, vop1, new_tmp, vec_dest;
2005 gimple new_stmt;
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2007
2008 vec_dest = VEC_pop (tree, vec_dsts);
2009
2010 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2011 {
2012 /* Create demotion operation. */
2013 vop0 = VEC_index (tree, *vec_oprnds, i);
2014 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2015 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2016 new_tmp = make_ssa_name (vec_dest, new_stmt);
2017 gimple_assign_set_lhs (new_stmt, new_tmp);
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2019
2020 if (multi_step_cvt)
2021 /* Store the resulting vector for next recursive call. */
2022 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2023 else
2024 {
2025 /* This is the last step of the conversion sequence. Store the
2026 vectors in SLP_NODE or in vector info of the scalar statement
2027 (or in STMT_VINFO_RELATED_STMT chain). */
2028 if (slp_node)
2029 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2030 else
2031 {
2032 if (!*prev_stmt_info)
2033 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2034 else
2035 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2036
2037 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2038 }
2039 }
2040 }
2041
2042 /* For multi-step demotion operations we first generate demotion operations
2043 from the source type to the intermediate types, and then combine the
2044 results (stored in VEC_OPRNDS) in demotion operation to the destination
2045 type. */
2046 if (multi_step_cvt)
2047 {
2048 /* At each level of recursion we have half of the operands we had at the
2049 previous level. */
2050 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2051 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2052 stmt, vec_dsts, gsi, slp_node,
2053 VEC_PACK_TRUNC_EXPR,
2054 prev_stmt_info);
2055 }
2056
2057 VEC_quick_push (tree, vec_dsts, vec_dest);
2058}
2059
2060
2061/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2062 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2063 the resulting vectors and call the function recursively. */
2064
2065static void
2066vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2067 VEC (tree, heap) **vec_oprnds1,
2068 gimple stmt, tree vec_dest,
2069 gimple_stmt_iterator *gsi,
2070 enum tree_code code1,
2071 enum tree_code code2, tree decl1,
2072 tree decl2, int op_type)
2073{
2074 int i;
2075 tree vop0, vop1, new_tmp1, new_tmp2;
2076 gimple new_stmt1, new_stmt2;
2077 VEC (tree, heap) *vec_tmp = NULL;
2078
2079 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2080 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2081 {
2082 if (op_type == binary_op)
2083 vop1 = VEC_index (tree, *vec_oprnds1, i);
2084 else
2085 vop1 = NULL_TREE;
2086
2087 /* Generate the two halves of promotion operation. */
2088 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2091 op_type, vec_dest, gsi, stmt);
2092 if (is_gimple_call (new_stmt1))
2093 {
2094 new_tmp1 = gimple_call_lhs (new_stmt1);
2095 new_tmp2 = gimple_call_lhs (new_stmt2);
2096 }
2097 else
2098 {
2099 new_tmp1 = gimple_assign_lhs (new_stmt1);
2100 new_tmp2 = gimple_assign_lhs (new_stmt2);
2101 }
2102
2103 /* Store the results for the next step. */
2104 VEC_quick_push (tree, vec_tmp, new_tmp1);
2105 VEC_quick_push (tree, vec_tmp, new_tmp2);
2106 }
2107
2108 VEC_free (tree, heap, *vec_oprnds0);
2109 *vec_oprnds0 = vec_tmp;
2110}
2111
2112
b8698a0f
L
2113/* Check if STMT performs a conversion operation, that can be vectorized.
2114 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2115 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2117
2118static bool
2119vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2120 gimple *vec_stmt, slp_tree slp_node)
2121{
2122 tree vec_dest;
2123 tree scalar_dest;
4a00c761 2124 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2125 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2128 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2129 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2130 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2131 tree new_temp;
2132 tree def;
2133 gimple def_stmt;
2134 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2135 gimple new_stmt = NULL;
2136 stmt_vec_info prev_stmt_info;
2137 int nunits_in;
2138 int nunits_out;
2139 tree vectype_out, vectype_in;
4a00c761
JJ
2140 int ncopies, i, j;
2141 tree lhs_type, rhs_type;
ebfd146a 2142 enum { NARROW, NONE, WIDEN } modifier;
4a00c761 2143 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
ebfd146a 2144 tree vop0;
4a00c761
JJ
2145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2146 int multi_step_cvt = 0;
2147 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2148 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2149 int op_type;
2150 enum machine_mode rhs_mode;
2151 unsigned short fltsz;
ebfd146a
IR
2152
2153 /* Is STMT a vectorizable conversion? */
2154
4a00c761 2155 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2156 return false;
2157
8644a673 2158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2159 return false;
2160
2161 if (!is_gimple_assign (stmt))
2162 return false;
2163
2164 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2165 return false;
2166
2167 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2168 if (!CONVERT_EXPR_CODE_P (code)
2169 && code != FIX_TRUNC_EXPR
2170 && code != FLOAT_EXPR
2171 && code != WIDEN_MULT_EXPR
2172 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2173 return false;
2174
4a00c761
JJ
2175 op_type = TREE_CODE_LENGTH (code);
2176
ebfd146a 2177 /* Check types of lhs and rhs. */
b690cc0f 2178 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2179 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2181
ebfd146a
IR
2182 op0 = gimple_assign_rhs1 (stmt);
2183 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2184
2185 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2186 && !((INTEGRAL_TYPE_P (lhs_type)
2187 && INTEGRAL_TYPE_P (rhs_type))
2188 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2189 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2190 return false;
2191
2192 if ((INTEGRAL_TYPE_P (lhs_type)
2193 && (TYPE_PRECISION (lhs_type)
2194 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2195 || (INTEGRAL_TYPE_P (rhs_type)
2196 && (TYPE_PRECISION (rhs_type)
2197 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2198 {
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump,
2201 "type conversion to/from bit-precision unsupported.");
2202 return false;
2203 }
2204
b690cc0f 2205 /* Check the operands of the operation. */
4a00c761 2206 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
b690cc0f
RG
2207 &def_stmt, &def, &dt[0], &vectype_in))
2208 {
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "use not simple.");
2211 return false;
2212 }
4a00c761
JJ
2213 if (op_type == binary_op)
2214 {
2215 bool ok;
2216
2217 op1 = gimple_assign_rhs2 (stmt);
2218 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2219 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2220 OP1. */
2221 if (CONSTANT_CLASS_P (op0))
2222 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2223 &def_stmt, &def, &dt[1], &vectype_in);
2224 else
2225 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2226 &dt[1]);
2227
2228 if (!ok)
2229 {
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "use not simple.");
2232 return false;
2233 }
2234 }
2235
b690cc0f
RG
2236 /* If op0 is an external or constant defs use a vector type of
2237 the same size as the output vector type. */
ebfd146a 2238 if (!vectype_in)
b690cc0f 2239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2240 if (vec_stmt)
2241 gcc_assert (vectype_in);
2242 if (!vectype_in)
2243 {
2244 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761
JJ
2245 {
2246 fprintf (vect_dump, "no vectype for scalar type ");
2247 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2248 }
7d8930a0
IR
2249
2250 return false;
2251 }
ebfd146a 2252
b690cc0f
RG
2253 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2254 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2255 if (nunits_in < nunits_out)
ebfd146a
IR
2256 modifier = NARROW;
2257 else if (nunits_out == nunits_in)
2258 modifier = NONE;
ebfd146a 2259 else
4a00c761 2260 modifier = WIDEN;
ebfd146a 2261
ff802fa1
IR
2262 /* Multiple types in SLP are handled by creating the appropriate number of
2263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2264 case of SLP. */
437f4a00 2265 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2266 ncopies = 1;
4a00c761
JJ
2267 else if (modifier == NARROW)
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2269 else
2270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2271
ebfd146a
IR
2272 /* Sanity check: make sure that at least one copy of the vectorized stmt
2273 needs to be generated. */
2274 gcc_assert (ncopies >= 1);
2275
ebfd146a 2276 /* Supportable by target? */
4a00c761 2277 switch (modifier)
ebfd146a 2278 {
4a00c761
JJ
2279 case NONE:
2280 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2281 return false;
2282 if (supportable_convert_operation (code, vectype_out, vectype_in,
2283 &decl1, &code1))
2284 break;
2285 /* FALLTHRU */
2286 unsupported:
ebfd146a 2287 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761 2288 fprintf (vect_dump, "conversion not supported by target.");
ebfd146a 2289 return false;
ebfd146a 2290
4a00c761
JJ
2291 case WIDEN:
2292 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2293 &decl1, &decl2, &code1, &code2,
2294 &multi_step_cvt, &interm_types))
2295 {
2296 /* Binary widening operation can only be supported directly by the
2297 architecture. */
2298 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2299 break;
2300 }
2301
2302 if (code != FLOAT_EXPR
2303 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2304 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2305 goto unsupported;
2306
2307 rhs_mode = TYPE_MODE (rhs_type);
2308 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2309 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2310 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2311 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2312 {
2313 cvt_type
2314 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2315 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2316 if (cvt_type == NULL_TREE)
2317 goto unsupported;
2318
2319 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2320 {
2321 if (!supportable_convert_operation (code, vectype_out,
2322 cvt_type, &decl1, &codecvt1))
2323 goto unsupported;
2324 }
2325 else if (!supportable_widening_operation (code, stmt, vectype_out,
2326 cvt_type, &decl1, &decl2,
2327 &codecvt1, &codecvt2,
2328 &multi_step_cvt,
2329 &interm_types))
2330 continue;
2331 else
2332 gcc_assert (multi_step_cvt == 0);
2333
2334 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2335 vectype_in, NULL, NULL, &code1,
2336 &code2, &multi_step_cvt,
2337 &interm_types))
2338 break;
2339 }
2340
2341 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2342 goto unsupported;
2343
2344 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2345 codecvt2 = ERROR_MARK;
2346 else
2347 {
2348 multi_step_cvt++;
2349 VEC_safe_push (tree, heap, interm_types, cvt_type);
2350 cvt_type = NULL_TREE;
2351 }
2352 break;
2353
2354 case NARROW:
2355 gcc_assert (op_type == unary_op);
2356 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2357 &code1, &multi_step_cvt,
2358 &interm_types))
2359 break;
2360
2361 if (code != FIX_TRUNC_EXPR
2362 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2363 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2364 goto unsupported;
2365
2366 rhs_mode = TYPE_MODE (rhs_type);
2367 cvt_type
2368 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2369 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2370 if (cvt_type == NULL_TREE)
2371 goto unsupported;
2372 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2373 &decl1, &codecvt1))
2374 goto unsupported;
2375 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2376 &code1, &multi_step_cvt,
2377 &interm_types))
2378 break;
2379 goto unsupported;
2380
2381 default:
2382 gcc_unreachable ();
ebfd146a
IR
2383 }
2384
2385 if (!vec_stmt) /* transformation not required. */
2386 {
4a00c761
JJ
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vectorizable_conversion ===");
2389 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2390 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2391 else if (modifier == NARROW)
2392 {
2393 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2394 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2395 }
2396 else
2397 {
2398 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2399 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2400 }
2401 VEC_free (tree, heap, interm_types);
ebfd146a
IR
2402 return true;
2403 }
2404
2405 /** Transform. **/
2406 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761 2407 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
ebfd146a 2408
4a00c761
JJ
2409 if (op_type == binary_op)
2410 {
2411 if (CONSTANT_CLASS_P (op0))
2412 op0 = fold_convert (TREE_TYPE (op1), op0);
2413 else if (CONSTANT_CLASS_P (op1))
2414 op1 = fold_convert (TREE_TYPE (op0), op1);
2415 }
2416
2417 /* In case of multi-step conversion, we first generate conversion operations
2418 to the intermediate types, and then from that types to the final one.
2419 We create vector destinations for the intermediate type (TYPES) received
2420 from supportable_*_operation, and store them in the correct order
2421 for future use in vect_create_vectorized_*_stmts (). */
2422 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
ebfd146a 2423 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4a00c761
JJ
2424 VEC_quick_push (tree, vec_dsts, vec_dest);
2425
2426 if (multi_step_cvt)
2427 {
2428 for (i = VEC_length (tree, interm_types) - 1;
2429 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2430 {
2431 vec_dest = vect_create_destination_var (scalar_dest,
2432 intermediate_type);
2433 VEC_quick_push (tree, vec_dsts, vec_dest);
2434 }
2435 }
ebfd146a 2436
4a00c761
JJ
2437 if (cvt_type)
2438 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2439
2440 if (!slp_node)
2441 {
2442 if (modifier == NONE)
2443 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2444 else if (modifier == WIDEN)
2445 {
2446 vec_oprnds0 = VEC_alloc (tree, heap,
2447 (multi_step_cvt
2448 ? vect_pow2 (multi_step_cvt) : 1));
2449 if (op_type == binary_op)
2450 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2451 }
2452 else
2453 vec_oprnds0 = VEC_alloc (tree, heap,
2454 2 * (multi_step_cvt
2455 ? vect_pow2 (multi_step_cvt) : 1));
2456 }
2457 else if (code == WIDEN_LSHIFT_EXPR)
2458 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
ebfd146a 2459
4a00c761 2460 last_oprnd = op0;
ebfd146a
IR
2461 prev_stmt_info = NULL;
2462 switch (modifier)
2463 {
2464 case NONE:
2465 for (j = 0; j < ncopies; j++)
2466 {
ebfd146a 2467 if (j == 0)
d092494c
IR
2468 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2469 -1);
ebfd146a
IR
2470 else
2471 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2472
ac47786e 2473 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
4a00c761
JJ
2474 {
2475 /* Arguments are ready, create the new vector stmt. */
2476 if (code1 == CALL_EXPR)
2477 {
2478 new_stmt = gimple_build_call (decl1, 1, vop0);
2479 new_temp = make_ssa_name (vec_dest, new_stmt);
2480 gimple_call_set_lhs (new_stmt, new_temp);
2481 }
2482 else
2483 {
2484 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2485 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2486 vop0, NULL);
2487 new_temp = make_ssa_name (vec_dest, new_stmt);
2488 gimple_assign_set_lhs (new_stmt, new_temp);
2489 }
2490
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2492 if (slp_node)
2493 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2494 new_stmt);
2495 }
2496
ebfd146a
IR
2497 if (j == 0)
2498 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2499 else
2500 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2501 prev_stmt_info = vinfo_for_stmt (new_stmt);
2502 }
2503 break;
2504
2505 case WIDEN:
2506 /* In case the vectorization factor (VF) is bigger than the number
2507 of elements that we can fit in a vectype (nunits), we have to
2508 generate more than one vector stmt - i.e - we need to "unroll"
2509 the vector stmt by a factor VF/nunits. */
2510 for (j = 0; j < ncopies; j++)
2511 {
4a00c761 2512 /* Handle uses. */
ebfd146a 2513 if (j == 0)
4a00c761
JJ
2514 {
2515 if (slp_node)
2516 {
2517 if (code == WIDEN_LSHIFT_EXPR)
2518 {
2519 unsigned int k;
ebfd146a 2520
4a00c761
JJ
2521 vec_oprnd1 = op1;
2522 /* Store vec_oprnd1 for every vector stmt to be created
2523 for SLP_NODE. We check during the analysis that all
2524 the shift arguments are the same. */
2525 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2526 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2527
2528 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2529 slp_node, -1);
2530 }
2531 else
2532 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2533 &vec_oprnds1, slp_node, -1);
2534 }
2535 else
2536 {
2537 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2538 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2539 if (op_type == binary_op)
2540 {
2541 if (code == WIDEN_LSHIFT_EXPR)
2542 vec_oprnd1 = op1;
2543 else
2544 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2545 NULL);
2546 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2547 }
2548 }
2549 }
ebfd146a 2550 else
4a00c761
JJ
2551 {
2552 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2553 VEC_truncate (tree, vec_oprnds0, 0);
2554 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2555 if (op_type == binary_op)
2556 {
2557 if (code == WIDEN_LSHIFT_EXPR)
2558 vec_oprnd1 = op1;
2559 else
2560 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2561 vec_oprnd1);
2562 VEC_truncate (tree, vec_oprnds1, 0);
2563 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2564 }
2565 }
ebfd146a 2566
4a00c761
JJ
2567 /* Arguments are ready. Create the new vector stmts. */
2568 for (i = multi_step_cvt; i >= 0; i--)
2569 {
2570 tree this_dest = VEC_index (tree, vec_dsts, i);
2571 enum tree_code c1 = code1, c2 = code2;
2572 if (i == 0 && codecvt2 != ERROR_MARK)
2573 {
2574 c1 = codecvt1;
2575 c2 = codecvt2;
2576 }
2577 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2578 &vec_oprnds1,
2579 stmt, this_dest, gsi,
2580 c1, c2, decl1, decl2,
2581 op_type);
2582 }
2583
2584 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2585 {
2586 if (cvt_type)
2587 {
2588 if (codecvt1 == CALL_EXPR)
2589 {
2590 new_stmt = gimple_build_call (decl1, 1, vop0);
2591 new_temp = make_ssa_name (vec_dest, new_stmt);
2592 gimple_call_set_lhs (new_stmt, new_temp);
2593 }
2594 else
2595 {
2596 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2597 new_temp = make_ssa_name (vec_dest, NULL);
2598 new_stmt = gimple_build_assign_with_ops (codecvt1,
2599 new_temp,
2600 vop0, NULL);
2601 }
2602
2603 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2604 }
2605 else
2606 new_stmt = SSA_NAME_DEF_STMT (vop0);
2607
2608 if (slp_node)
2609 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2610 new_stmt);
2611 else
2612 {
2613 if (!prev_stmt_info)
2614 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2615 else
2616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2617 prev_stmt_info = vinfo_for_stmt (new_stmt);
2618 }
2619 }
ebfd146a 2620 }
4a00c761
JJ
2621
2622 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2623 break;
2624
2625 case NARROW:
2626 /* In case the vectorization factor (VF) is bigger than the number
2627 of elements that we can fit in a vectype (nunits), we have to
2628 generate more than one vector stmt - i.e - we need to "unroll"
2629 the vector stmt by a factor VF/nunits. */
2630 for (j = 0; j < ncopies; j++)
2631 {
2632 /* Handle uses. */
4a00c761
JJ
2633 if (slp_node)
2634 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2635 slp_node, -1);
ebfd146a
IR
2636 else
2637 {
4a00c761
JJ
2638 VEC_truncate (tree, vec_oprnds0, 0);
2639 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2640 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2641 }
2642
4a00c761
JJ
2643 /* Arguments are ready. Create the new vector stmts. */
2644 if (cvt_type)
2645 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2646 {
2647 if (codecvt1 == CALL_EXPR)
2648 {
2649 new_stmt = gimple_build_call (decl1, 1, vop0);
2650 new_temp = make_ssa_name (vec_dest, new_stmt);
2651 gimple_call_set_lhs (new_stmt, new_temp);
2652 }
2653 else
2654 {
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 new_temp = make_ssa_name (vec_dest, NULL);
2657 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2658 vop0, NULL);
2659 }
ebfd146a 2660
4a00c761
JJ
2661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2662 VEC_replace (tree, vec_oprnds0, i, new_temp);
2663 }
ebfd146a 2664
4a00c761
JJ
2665 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2666 stmt, vec_dsts, gsi,
2667 slp_node, code1,
2668 &prev_stmt_info);
ebfd146a
IR
2669 }
2670
2671 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2672 break;
ebfd146a
IR
2673 }
2674
4a00c761
JJ
2675 VEC_free (tree, heap, vec_oprnds0);
2676 VEC_free (tree, heap, vec_oprnds1);
2677 VEC_free (tree, heap, vec_dsts);
2678 VEC_free (tree, heap, interm_types);
ebfd146a
IR
2679
2680 return true;
2681}
ff802fa1
IR
2682
2683
ebfd146a
IR
2684/* Function vectorizable_assignment.
2685
b8698a0f
L
2686 Check if STMT performs an assignment (copy) that can be vectorized.
2687 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2688 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2690
2691static bool
2692vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2693 gimple *vec_stmt, slp_tree slp_node)
2694{
2695 tree vec_dest;
2696 tree scalar_dest;
2697 tree op;
2698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2699 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2701 tree new_temp;
2702 tree def;
2703 gimple def_stmt;
2704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2705 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2706 int ncopies;
f18b55bd 2707 int i, j;
ebfd146a
IR
2708 VEC(tree,heap) *vec_oprnds = NULL;
2709 tree vop;
a70d6342 2710 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2711 gimple new_stmt = NULL;
2712 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2713 enum tree_code code;
2714 tree vectype_in;
ebfd146a
IR
2715
2716 /* Multiple types in SLP are handled by creating the appropriate number of
2717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2718 case of SLP. */
437f4a00 2719 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2720 ncopies = 1;
2721 else
2722 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2723
2724 gcc_assert (ncopies >= 1);
ebfd146a 2725
a70d6342 2726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2727 return false;
2728
8644a673 2729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2730 return false;
2731
2732 /* Is vectorizable assignment? */
2733 if (!is_gimple_assign (stmt))
2734 return false;
2735
2736 scalar_dest = gimple_assign_lhs (stmt);
2737 if (TREE_CODE (scalar_dest) != SSA_NAME)
2738 return false;
2739
fde9c428 2740 code = gimple_assign_rhs_code (stmt);
ebfd146a 2741 if (gimple_assign_single_p (stmt)
fde9c428
RG
2742 || code == PAREN_EXPR
2743 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2744 op = gimple_assign_rhs1 (stmt);
2745 else
2746 return false;
2747
7b7ec6c5
RG
2748 if (code == VIEW_CONVERT_EXPR)
2749 op = TREE_OPERAND (op, 0);
2750
fde9c428
RG
2751 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2752 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a
IR
2753 {
2754 if (vect_print_dump_info (REPORT_DETAILS))
2755 fprintf (vect_dump, "use not simple.");
2756 return false;
2757 }
2758
fde9c428
RG
2759 /* We can handle NOP_EXPR conversions that do not change the number
2760 of elements or the vector size. */
7b7ec6c5
RG
2761 if ((CONVERT_EXPR_CODE_P (code)
2762 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2763 && (!vectype_in
2764 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2765 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2766 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2767 return false;
2768
7b7b1813
RG
2769 /* We do not handle bit-precision changes. */
2770 if ((CONVERT_EXPR_CODE_P (code)
2771 || code == VIEW_CONVERT_EXPR)
2772 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2773 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2774 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2775 || ((TYPE_PRECISION (TREE_TYPE (op))
2776 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2777 /* But a conversion that does not change the bit-pattern is ok. */
2778 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2779 > TYPE_PRECISION (TREE_TYPE (op)))
2780 && TYPE_UNSIGNED (TREE_TYPE (op))))
2781 {
2782 if (vect_print_dump_info (REPORT_DETAILS))
2783 fprintf (vect_dump, "type conversion to/from bit-precision "
2784 "unsupported.");
2785 return false;
2786 }
2787
ebfd146a
IR
2788 if (!vec_stmt) /* transformation not required. */
2789 {
2790 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "=== vectorizable_assignment ===");
2793 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2794 return true;
2795 }
2796
2797 /** Transform. **/
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "transform assignment.");
2800
2801 /* Handle def. */
2802 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2803
2804 /* Handle use. */
f18b55bd 2805 for (j = 0; j < ncopies; j++)
ebfd146a 2806 {
f18b55bd
IR
2807 /* Handle uses. */
2808 if (j == 0)
d092494c 2809 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
2810 else
2811 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2812
2813 /* Arguments are ready. create the new vector stmt. */
ac47786e 2814 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
f18b55bd 2815 {
7b7ec6c5
RG
2816 if (CONVERT_EXPR_CODE_P (code)
2817 || code == VIEW_CONVERT_EXPR)
4a73490d 2818 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2819 new_stmt = gimple_build_assign (vec_dest, vop);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 gimple_assign_set_lhs (new_stmt, new_temp);
2822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2823 if (slp_node)
2824 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2825 }
ebfd146a
IR
2826
2827 if (slp_node)
f18b55bd
IR
2828 continue;
2829
2830 if (j == 0)
2831 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2832 else
2833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2834
2835 prev_stmt_info = vinfo_for_stmt (new_stmt);
2836 }
b8698a0f
L
2837
2838 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
2839 return true;
2840}
2841
9dc3f7de 2842
1107f3ae
IR
2843/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2844 either as shift by a scalar or by a vector. */
2845
2846bool
2847vect_supportable_shift (enum tree_code code, tree scalar_type)
2848{
2849
2850 enum machine_mode vec_mode;
2851 optab optab;
2852 int icode;
2853 tree vectype;
2854
2855 vectype = get_vectype_for_scalar_type (scalar_type);
2856 if (!vectype)
2857 return false;
2858
2859 optab = optab_for_tree_code (code, vectype, optab_scalar);
2860 if (!optab
2861 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2862 {
2863 optab = optab_for_tree_code (code, vectype, optab_vector);
2864 if (!optab
2865 || (optab_handler (optab, TYPE_MODE (vectype))
2866 == CODE_FOR_nothing))
2867 return false;
2868 }
2869
2870 vec_mode = TYPE_MODE (vectype);
2871 icode = (int) optab_handler (optab, vec_mode);
2872 if (icode == CODE_FOR_nothing)
2873 return false;
2874
2875 return true;
2876}
2877
2878
9dc3f7de
IR
2879/* Function vectorizable_shift.
2880
2881 Check if STMT performs a shift operation that can be vectorized.
2882 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2883 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2884 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2885
2886static bool
2887vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2888 gimple *vec_stmt, slp_tree slp_node)
2889{
2890 tree vec_dest;
2891 tree scalar_dest;
2892 tree op0, op1 = NULL;
2893 tree vec_oprnd1 = NULL_TREE;
2894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2895 tree vectype;
2896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2897 enum tree_code code;
2898 enum machine_mode vec_mode;
2899 tree new_temp;
2900 optab optab;
2901 int icode;
2902 enum machine_mode optab_op2_mode;
2903 tree def;
2904 gimple def_stmt;
2905 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2906 gimple new_stmt = NULL;
2907 stmt_vec_info prev_stmt_info;
2908 int nunits_in;
2909 int nunits_out;
2910 tree vectype_out;
cede2577 2911 tree op1_vectype;
9dc3f7de
IR
2912 int ncopies;
2913 int j, i;
2914 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2915 tree vop0, vop1;
2916 unsigned int k;
49eab32e 2917 bool scalar_shift_arg = true;
9dc3f7de
IR
2918 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2919 int vf;
2920
2921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2922 return false;
2923
2924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2925 return false;
2926
2927 /* Is STMT a vectorizable binary/unary operation? */
2928 if (!is_gimple_assign (stmt))
2929 return false;
2930
2931 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2932 return false;
2933
2934 code = gimple_assign_rhs_code (stmt);
2935
2936 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2937 || code == RROTATE_EXPR))
2938 return false;
2939
2940 scalar_dest = gimple_assign_lhs (stmt);
2941 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
2942 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2943 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2944 {
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "bit-precision shifts not supported.");
2947 return false;
2948 }
9dc3f7de
IR
2949
2950 op0 = gimple_assign_rhs1 (stmt);
2951 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2952 &def_stmt, &def, &dt[0], &vectype))
2953 {
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "use not simple.");
2956 return false;
2957 }
2958 /* If op0 is an external or constant def use a vector type with
2959 the same size as the output vector type. */
2960 if (!vectype)
2961 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2962 if (vec_stmt)
2963 gcc_assert (vectype);
2964 if (!vectype)
2965 {
2966 if (vect_print_dump_info (REPORT_DETAILS))
2967 {
2968 fprintf (vect_dump, "no vectype for scalar type ");
2969 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2970 }
2971
2972 return false;
2973 }
2974
2975 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2976 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2977 if (nunits_out != nunits_in)
2978 return false;
2979
2980 op1 = gimple_assign_rhs2 (stmt);
cede2577
JJ
2981 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2982 &dt[1], &op1_vectype))
9dc3f7de
IR
2983 {
2984 if (vect_print_dump_info (REPORT_DETAILS))
2985 fprintf (vect_dump, "use not simple.");
2986 return false;
2987 }
2988
2989 if (loop_vinfo)
2990 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2991 else
2992 vf = 1;
2993
2994 /* Multiple types in SLP are handled by creating the appropriate number of
2995 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2996 case of SLP. */
437f4a00 2997 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
2998 ncopies = 1;
2999 else
3000 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3001
3002 gcc_assert (ncopies >= 1);
3003
3004 /* Determine whether the shift amount is a vector, or scalar. If the
3005 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3006
49eab32e
JJ
3007 if (dt[1] == vect_internal_def && !slp_node)
3008 scalar_shift_arg = false;
3009 else if (dt[1] == vect_constant_def
3010 || dt[1] == vect_external_def
3011 || dt[1] == vect_internal_def)
3012 {
3013 /* In SLP, need to check whether the shift count is the same,
3014 in loops if it is a constant or invariant, it is always
3015 a scalar shift. */
3016 if (slp_node)
3017 {
3018 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3019 gimple slpstmt;
3020
3021 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3022 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3023 scalar_shift_arg = false;
3024 }
3025 }
3026 else
3027 {
3028 if (vect_print_dump_info (REPORT_DETAILS))
3029 fprintf (vect_dump, "operand mode requires invariant argument.");
3030 return false;
3031 }
3032
9dc3f7de 3033 /* Vector shifted by vector. */
49eab32e 3034 if (!scalar_shift_arg)
9dc3f7de
IR
3035 {
3036 optab = optab_for_tree_code (code, vectype, optab_vector);
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "vector/vector shift/rotate found.");
aa948027
JJ
3039 if (!op1_vectype)
3040 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3041 if (op1_vectype == NULL_TREE
3042 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577
JJ
3043 {
3044 if (vect_print_dump_info (REPORT_DETAILS))
3045 fprintf (vect_dump, "unusable type for last operand in"
3046 " vector/vector shift/rotate.");
3047 return false;
3048 }
9dc3f7de
IR
3049 }
3050 /* See if the machine has a vector shifted by scalar insn and if not
3051 then see if it has a vector shifted by vector insn. */
49eab32e 3052 else
9dc3f7de
IR
3053 {
3054 optab = optab_for_tree_code (code, vectype, optab_scalar);
3055 if (optab
3056 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3057 {
9dc3f7de
IR
3058 if (vect_print_dump_info (REPORT_DETAILS))
3059 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3060 }
3061 else
3062 {
3063 optab = optab_for_tree_code (code, vectype, optab_vector);
3064 if (optab
3065 && (optab_handler (optab, TYPE_MODE (vectype))
3066 != CODE_FOR_nothing))
3067 {
49eab32e
JJ
3068 scalar_shift_arg = false;
3069
9dc3f7de
IR
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "vector/vector shift/rotate found.");
3072
3073 /* Unlike the other binary operators, shifts/rotates have
3074 the rhs being int, instead of the same type as the lhs,
3075 so make sure the scalar is the right type if we are
aa948027 3076 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3077 if (dt[1] == vect_constant_def)
3078 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3079 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3080 TREE_TYPE (op1)))
3081 {
3082 if (slp_node
3083 && TYPE_MODE (TREE_TYPE (vectype))
3084 != TYPE_MODE (TREE_TYPE (op1)))
3085 {
3086 if (vect_print_dump_info (REPORT_DETAILS))
3087 fprintf (vect_dump, "unusable type for last operand in"
3088 " vector/vector shift/rotate.");
3089 return false;
3090 }
3091 if (vec_stmt && !slp_node)
3092 {
3093 op1 = fold_convert (TREE_TYPE (vectype), op1);
3094 op1 = vect_init_vector (stmt, op1,
3095 TREE_TYPE (vectype), NULL);
3096 }
3097 }
9dc3f7de
IR
3098 }
3099 }
3100 }
9dc3f7de
IR
3101
3102 /* Supportable by target? */
3103 if (!optab)
3104 {
3105 if (vect_print_dump_info (REPORT_DETAILS))
3106 fprintf (vect_dump, "no optab.");
3107 return false;
3108 }
3109 vec_mode = TYPE_MODE (vectype);
3110 icode = (int) optab_handler (optab, vec_mode);
3111 if (icode == CODE_FOR_nothing)
3112 {
3113 if (vect_print_dump_info (REPORT_DETAILS))
3114 fprintf (vect_dump, "op not supported by target.");
3115 /* Check only during analysis. */
3116 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3117 || (vf < vect_min_worthwhile_factor (code)
3118 && !vec_stmt))
3119 return false;
3120 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "proceeding using word mode.");
3122 }
3123
3124 /* Worthwhile without SIMD support? Check only during analysis. */
3125 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3126 && vf < vect_min_worthwhile_factor (code)
3127 && !vec_stmt)
3128 {
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "not worthwhile without SIMD support.");
3131 return false;
3132 }
3133
3134 if (!vec_stmt) /* transformation not required. */
3135 {
3136 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3137 if (vect_print_dump_info (REPORT_DETAILS))
3138 fprintf (vect_dump, "=== vectorizable_shift ===");
3139 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3140 return true;
3141 }
3142
3143 /** Transform. **/
3144
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "transform binary/unary operation.");
3147
3148 /* Handle def. */
3149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3150
3151 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3152 created in the previous stages of the recursion, so no allocation is
3153 needed, except for the case of shift with scalar shift argument. In that
3154 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3155 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3156 In case of loop-based vectorization we allocate VECs of size 1. We
3157 allocate VEC_OPRNDS1 only in case of binary operation. */
3158 if (!slp_node)
3159 {
3160 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3161 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3162 }
3163 else if (scalar_shift_arg)
3164 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3165
3166 prev_stmt_info = NULL;
3167 for (j = 0; j < ncopies; j++)
3168 {
3169 /* Handle uses. */
3170 if (j == 0)
3171 {
3172 if (scalar_shift_arg)
3173 {
3174 /* Vector shl and shr insn patterns can be defined with scalar
3175 operand 2 (shift operand). In this case, use constant or loop
3176 invariant op1 directly, without extending it to vector mode
3177 first. */
3178 optab_op2_mode = insn_data[icode].operand[2].mode;
3179 if (!VECTOR_MODE_P (optab_op2_mode))
3180 {
3181 if (vect_print_dump_info (REPORT_DETAILS))
3182 fprintf (vect_dump, "operand 1 using scalar mode.");
3183 vec_oprnd1 = op1;
3184 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3185 if (slp_node)
3186 {
3187 /* Store vec_oprnd1 for every vector stmt to be created
3188 for SLP_NODE. We check during the analysis that all
3189 the shift arguments are the same.
3190 TODO: Allow different constants for different vector
3191 stmts generated for an SLP instance. */
3192 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3193 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3194 }
3195 }
3196 }
3197
3198 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3199 (a special case for certain kind of vector shifts); otherwise,
3200 operand 1 should be of a vector type (the usual case). */
3201 if (vec_oprnd1)
3202 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3203 slp_node, -1);
9dc3f7de
IR
3204 else
3205 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3206 slp_node, -1);
9dc3f7de
IR
3207 }
3208 else
3209 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3210
3211 /* Arguments are ready. Create the new vector stmt. */
3212 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3213 {
3214 vop1 = VEC_index (tree, vec_oprnds1, i);
3215 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3216 new_temp = make_ssa_name (vec_dest, new_stmt);
3217 gimple_assign_set_lhs (new_stmt, new_temp);
3218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3219 if (slp_node)
3220 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3221 }
3222
3223 if (slp_node)
3224 continue;
3225
3226 if (j == 0)
3227 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3228 else
3229 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3230 prev_stmt_info = vinfo_for_stmt (new_stmt);
3231 }
3232
3233 VEC_free (tree, heap, vec_oprnds0);
3234 VEC_free (tree, heap, vec_oprnds1);
3235
3236 return true;
3237}
3238
3239
ebfd146a
IR
3240/* Function vectorizable_operation.
3241
16949072
RG
3242 Check if STMT performs a binary, unary or ternary operation that can
3243 be vectorized.
b8698a0f 3244 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3245 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3246 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3247
3248static bool
3249vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3250 gimple *vec_stmt, slp_tree slp_node)
3251{
3252 tree vec_dest;
3253 tree scalar_dest;
16949072 3254 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b690cc0f 3256 tree vectype;
ebfd146a
IR
3257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3258 enum tree_code code;
3259 enum machine_mode vec_mode;
3260 tree new_temp;
3261 int op_type;
3262 optab optab;
3263 int icode;
ebfd146a
IR
3264 tree def;
3265 gimple def_stmt;
16949072
RG
3266 enum vect_def_type dt[3]
3267 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3268 gimple new_stmt = NULL;
3269 stmt_vec_info prev_stmt_info;
b690cc0f 3270 int nunits_in;
ebfd146a
IR
3271 int nunits_out;
3272 tree vectype_out;
3273 int ncopies;
3274 int j, i;
16949072
RG
3275 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3276 tree vop0, vop1, vop2;
a70d6342
IR
3277 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3278 int vf;
3279
a70d6342 3280 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3281 return false;
3282
8644a673 3283 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3284 return false;
3285
3286 /* Is STMT a vectorizable binary/unary operation? */
3287 if (!is_gimple_assign (stmt))
3288 return false;
3289
3290 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3291 return false;
3292
ebfd146a
IR
3293 code = gimple_assign_rhs_code (stmt);
3294
3295 /* For pointer addition, we should use the normal plus for
3296 the vector addition. */
3297 if (code == POINTER_PLUS_EXPR)
3298 code = PLUS_EXPR;
3299
3300 /* Support only unary or binary operations. */
3301 op_type = TREE_CODE_LENGTH (code);
16949072 3302 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a
IR
3303 {
3304 if (vect_print_dump_info (REPORT_DETAILS))
16949072
RG
3305 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3306 op_type);
ebfd146a
IR
3307 return false;
3308 }
3309
b690cc0f
RG
3310 scalar_dest = gimple_assign_lhs (stmt);
3311 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3312
7b7b1813
RG
3313 /* Most operations cannot handle bit-precision types without extra
3314 truncations. */
3315 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3316 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3317 /* Exception are bitwise binary operations. */
3318 && code != BIT_IOR_EXPR
3319 && code != BIT_XOR_EXPR
3320 && code != BIT_AND_EXPR)
3321 {
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3324 return false;
3325 }
3326
ebfd146a 3327 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
3328 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3329 &def_stmt, &def, &dt[0], &vectype))
ebfd146a
IR
3330 {
3331 if (vect_print_dump_info (REPORT_DETAILS))
3332 fprintf (vect_dump, "use not simple.");
3333 return false;
3334 }
b690cc0f
RG
3335 /* If op0 is an external or constant def use a vector type with
3336 the same size as the output vector type. */
3337 if (!vectype)
3338 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3339 if (vec_stmt)
3340 gcc_assert (vectype);
3341 if (!vectype)
3342 {
3343 if (vect_print_dump_info (REPORT_DETAILS))
3344 {
3345 fprintf (vect_dump, "no vectype for scalar type ");
3346 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3347 }
3348
3349 return false;
3350 }
b690cc0f
RG
3351
3352 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3353 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3354 if (nunits_out != nunits_in)
3355 return false;
ebfd146a 3356
16949072 3357 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3358 {
3359 op1 = gimple_assign_rhs2 (stmt);
b8698a0f 3360 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 3361 &dt[1]))
ebfd146a
IR
3362 {
3363 if (vect_print_dump_info (REPORT_DETAILS))
3364 fprintf (vect_dump, "use not simple.");
3365 return false;
3366 }
3367 }
16949072
RG
3368 if (op_type == ternary_op)
3369 {
3370 op2 = gimple_assign_rhs3 (stmt);
3371 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3372 &dt[2]))
3373 {
3374 if (vect_print_dump_info (REPORT_DETAILS))
3375 fprintf (vect_dump, "use not simple.");
3376 return false;
3377 }
3378 }
ebfd146a 3379
b690cc0f
RG
3380 if (loop_vinfo)
3381 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3382 else
3383 vf = 1;
3384
3385 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3386 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3387 case of SLP. */
437f4a00 3388 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3389 ncopies = 1;
3390 else
3391 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3392
3393 gcc_assert (ncopies >= 1);
3394
9dc3f7de 3395 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3396 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3397 || code == RROTATE_EXPR)
9dc3f7de 3398 return false;
ebfd146a 3399
16949072 3400 optab = optab_for_tree_code (code, vectype, optab_default);
ebfd146a
IR
3401
3402 /* Supportable by target? */
3403 if (!optab)
3404 {
3405 if (vect_print_dump_info (REPORT_DETAILS))
3406 fprintf (vect_dump, "no optab.");
3407 return false;
3408 }
3409 vec_mode = TYPE_MODE (vectype);
947131ba 3410 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
3411 if (icode == CODE_FOR_nothing)
3412 {
3413 if (vect_print_dump_info (REPORT_DETAILS))
3414 fprintf (vect_dump, "op not supported by target.");
3415 /* Check only during analysis. */
3416 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
a70d6342 3417 || (vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
3418 && !vec_stmt))
3419 return false;
3420 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761 3421 fprintf (vect_dump, "proceeding using word mode.");
383d9c83
IR
3422 }
3423
4a00c761
JJ
3424 /* Worthwhile without SIMD support? Check only during analysis. */
3425 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3426 && vf < vect_min_worthwhile_factor (code)
3427 && !vec_stmt)
7d8930a0
IR
3428 {
3429 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761 3430 fprintf (vect_dump, "not worthwhile without SIMD support.");
7d8930a0
IR
3431 return false;
3432 }
ebfd146a 3433
ebfd146a
IR
3434 if (!vec_stmt) /* transformation not required. */
3435 {
4a00c761 3436 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
ebfd146a 3437 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761
JJ
3438 fprintf (vect_dump, "=== vectorizable_operation ===");
3439 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
ebfd146a
IR
3440 return true;
3441 }
3442
3443 /** Transform. **/
3444
3445 if (vect_print_dump_info (REPORT_DETAILS))
4a00c761 3446 fprintf (vect_dump, "transform binary/unary operation.");
383d9c83 3447
ebfd146a 3448 /* Handle def. */
4a00c761 3449 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3450
4a00c761
JJ
3451 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3452 created in the previous stages of the recursion, so no allocation is
3453 needed, except for the case of shift with scalar shift argument. In that
3454 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3455 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3456 In case of loop-based vectorization we allocate VECs of size 1. We
3457 allocate VEC_OPRNDS1 only in case of binary operation. */
ebfd146a
IR
3458 if (!slp_node)
3459 {
4a00c761
JJ
3460 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3461 if (op_type == binary_op || op_type == ternary_op)
ebfd146a 3462 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4a00c761
JJ
3463 if (op_type == ternary_op)
3464 vec_oprnds2 = VEC_alloc (tree, heap, 1);
ebfd146a
IR
3465 }
3466
3467 /* In case the vectorization factor (VF) is bigger than the number
3468 of elements that we can fit in a vectype (nunits), we have to generate
3469 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3470 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3471 from one copy of the vector stmt to the next, in the field
3472 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3473 stages to find the correct vector defs to be used when vectorizing
3474 stmts that use the defs of the current stmt. The example below
3475 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3476 we need to create 4 vectorized stmts):
3477
3478 before vectorization:
3479 RELATED_STMT VEC_STMT
3480 S1: x = memref - -
3481 S2: z = x + 1 - -
3482
3483 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3484 there):
3485 RELATED_STMT VEC_STMT
3486 VS1_0: vx0 = memref0 VS1_1 -
3487 VS1_1: vx1 = memref1 VS1_2 -
3488 VS1_2: vx2 = memref2 VS1_3 -
3489 VS1_3: vx3 = memref3 - -
3490 S1: x = load - VS1_0
3491 S2: z = x + 1 - -
3492
3493 step2: vectorize stmt S2 (done here):
3494 To vectorize stmt S2 we first need to find the relevant vector
3495 def for the first operand 'x'. This is, as usual, obtained from
3496 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3497 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3498 relevant vector def 'vx0'. Having found 'vx0' we can generate
3499 the vector stmt VS2_0, and as usual, record it in the
3500 STMT_VINFO_VEC_STMT of stmt S2.
3501 When creating the second copy (VS2_1), we obtain the relevant vector
3502 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3503 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3504 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3505 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3506 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3507 chain of stmts and pointers:
3508 RELATED_STMT VEC_STMT
3509 VS1_0: vx0 = memref0 VS1_1 -
3510 VS1_1: vx1 = memref1 VS1_2 -
3511 VS1_2: vx2 = memref2 VS1_3 -
3512 VS1_3: vx3 = memref3 - -
3513 S1: x = load - VS1_0
3514 VS2_0: vz0 = vx0 + v1 VS2_1 -
3515 VS2_1: vz1 = vx1 + v1 VS2_2 -
3516 VS2_2: vz2 = vx2 + v1 VS2_3 -
3517 VS2_3: vz3 = vx3 + v1 - -
3518 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3519
3520 prev_stmt_info = NULL;
3521 for (j = 0; j < ncopies; j++)
3522 {
3523 /* Handle uses. */
3524 if (j == 0)
4a00c761
JJ
3525 {
3526 if (op_type == binary_op || op_type == ternary_op)
3527 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3528 slp_node, -1);
3529 else
3530 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3531 slp_node, -1);
3532 if (op_type == ternary_op)
36ba4aae 3533 {
4a00c761
JJ
3534 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3535 VEC_quick_push (tree, vec_oprnds2,
3536 vect_get_vec_def_for_operand (op2, stmt, NULL));
36ba4aae 3537 }
4a00c761 3538 }
ebfd146a 3539 else
4a00c761
JJ
3540 {
3541 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3542 if (op_type == ternary_op)
3543 {
3544 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3545 VEC_quick_push (tree, vec_oprnds2,
3546 vect_get_vec_def_for_stmt_copy (dt[2],
3547 vec_oprnd));
3548 }
3549 }
3550
3551 /* Arguments are ready. Create the new vector stmt. */
3552 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
ebfd146a 3553 {
4a00c761
JJ
3554 vop1 = ((op_type == binary_op || op_type == ternary_op)
3555 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3556 vop2 = ((op_type == ternary_op)
3557 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3558 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3559 vop0, vop1, vop2);
3560 new_temp = make_ssa_name (vec_dest, new_stmt);
3561 gimple_assign_set_lhs (new_stmt, new_temp);
3562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3563 if (slp_node)
3564 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
ebfd146a
IR
3565 }
3566
4a00c761
JJ
3567 if (slp_node)
3568 continue;
3569
3570 if (j == 0)
3571 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3572 else
3573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3574 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3575 }
3576
ebfd146a 3577 VEC_free (tree, heap, vec_oprnds0);
4a00c761
JJ
3578 if (vec_oprnds1)
3579 VEC_free (tree, heap, vec_oprnds1);
3580 if (vec_oprnds2)
3581 VEC_free (tree, heap, vec_oprnds2);
ebfd146a 3582
ebfd146a
IR
3583 return true;
3584}
3585
3586
3587/* Function vectorizable_store.
3588
b8698a0f
L
3589 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3590 can be vectorized.
3591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3592 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3594
3595static bool
3596vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3597 slp_tree slp_node)
3598{
3599 tree scalar_dest;
3600 tree data_ref;
3601 tree op;
3602 tree vec_oprnd = NULL_TREE;
3603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3605 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3606 tree elem_type;
ebfd146a 3607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3608 struct loop *loop = NULL;
ebfd146a
IR
3609 enum machine_mode vec_mode;
3610 tree dummy;
3611 enum dr_alignment_support alignment_support_scheme;
3612 tree def;
3613 gimple def_stmt;
3614 enum vect_def_type dt;
3615 stmt_vec_info prev_stmt_info = NULL;
3616 tree dataref_ptr = NULL_TREE;
3617 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3618 int ncopies;
3619 int j;
3620 gimple next_stmt, first_stmt = NULL;
3621 bool strided_store = false;
272c6793 3622 bool store_lanes_p = false;
ebfd146a
IR
3623 unsigned int group_size, i;
3624 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3625 bool inv_p;
3626 VEC(tree,heap) *vec_oprnds = NULL;
3627 bool slp = (slp_node != NULL);
ebfd146a 3628 unsigned int vec_num;
a70d6342 3629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3630 tree aggr_type;
a70d6342
IR
3631
3632 if (loop_vinfo)
3633 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3634
3635 /* Multiple types in SLP are handled by creating the appropriate number of
3636 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3637 case of SLP. */
437f4a00 3638 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3639 ncopies = 1;
3640 else
3641 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3642
3643 gcc_assert (ncopies >= 1);
3644
3645 /* FORNOW. This restriction should be relaxed. */
a70d6342 3646 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a
IR
3647 {
3648 if (vect_print_dump_info (REPORT_DETAILS))
3649 fprintf (vect_dump, "multiple types in nested loop.");
3650 return false;
3651 }
3652
a70d6342 3653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3654 return false;
3655
8644a673 3656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3657 return false;
3658
3659 /* Is vectorizable store? */
3660
3661 if (!is_gimple_assign (stmt))
3662 return false;
3663
3664 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3665 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3666 && is_pattern_stmt_p (stmt_info))
3667 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a
IR
3668 if (TREE_CODE (scalar_dest) != ARRAY_REF
3669 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3670 && TREE_CODE (scalar_dest) != COMPONENT_REF
3671 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3672 && TREE_CODE (scalar_dest) != REALPART_EXPR
3673 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3674 return false;
3675
3676 gcc_assert (gimple_assign_single_p (stmt));
3677 op = gimple_assign_rhs1 (stmt);
a70d6342 3678 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
ebfd146a
IR
3679 {
3680 if (vect_print_dump_info (REPORT_DETAILS))
3681 fprintf (vect_dump, "use not simple.");
3682 return false;
3683 }
3684
272c6793 3685 elem_type = TREE_TYPE (vectype);
ebfd146a 3686 vec_mode = TYPE_MODE (vectype);
7b7b1813 3687
ebfd146a
IR
3688 /* FORNOW. In some cases can vectorize even if data-type not supported
3689 (e.g. - array initialization with 0). */
947131ba 3690 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3691 return false;
3692
3693 if (!STMT_VINFO_DATA_REF (stmt_info))
3694 return false;
3695
a1e53f3f
L
3696 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3697 {
3698 if (vect_print_dump_info (REPORT_DETAILS))
3699 fprintf (vect_dump, "negative step for store.");
3700 return false;
3701 }
3702
ebfd146a
IR
3703 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3704 {
3705 strided_store = true;
e14c1050 3706 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3707 if (!slp && !PURE_SLP_STMT (stmt_info))
3708 {
e14c1050 3709 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3710 if (vect_store_lanes_supported (vectype, group_size))
3711 store_lanes_p = true;
3712 else if (!vect_strided_store_supported (vectype, group_size))
b602d918
RS
3713 return false;
3714 }
b8698a0f 3715
ebfd146a
IR
3716 if (first_stmt == stmt)
3717 {
3718 /* STMT is the leader of the group. Check the operands of all the
3719 stmts of the group. */
e14c1050 3720 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3721 while (next_stmt)
3722 {
3723 gcc_assert (gimple_assign_single_p (next_stmt));
3724 op = gimple_assign_rhs1 (next_stmt);
b8698a0f 3725 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
a70d6342 3726 &def, &dt))
ebfd146a
IR
3727 {
3728 if (vect_print_dump_info (REPORT_DETAILS))
3729 fprintf (vect_dump, "use not simple.");
3730 return false;
3731 }
e14c1050 3732 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3733 }
3734 }
3735 }
3736
3737 if (!vec_stmt) /* transformation not required. */
3738 {
3739 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
272c6793 3740 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
ebfd146a
IR
3741 return true;
3742 }
3743
3744 /** Transform. **/
3745
3746 if (strided_store)
3747 {
3748 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 3749 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 3750
e14c1050 3751 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
3752
3753 /* FORNOW */
a70d6342 3754 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3755
3756 /* We vectorize all the stmts of the interleaving group when we
3757 reach the last stmt in the group. */
e14c1050
IR
3758 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3759 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3760 && !slp)
3761 {
3762 *vec_stmt = NULL;
3763 return true;
3764 }
3765
3766 if (slp)
4b5caab7
IR
3767 {
3768 strided_store = false;
3769 /* VEC_NUM is the number of vect stmts to be created for this
3770 group. */
3771 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3772 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3773 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 3774 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 3775 }
ebfd146a 3776 else
4b5caab7
IR
3777 /* VEC_NUM is the number of vect stmts to be created for this
3778 group. */
ebfd146a
IR
3779 vec_num = group_size;
3780 }
b8698a0f 3781 else
ebfd146a
IR
3782 {
3783 first_stmt = stmt;
3784 first_dr = dr;
3785 group_size = vec_num = 1;
ebfd146a 3786 }
b8698a0f 3787
ebfd146a
IR
3788 if (vect_print_dump_info (REPORT_DETAILS))
3789 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3790
3791 dr_chain = VEC_alloc (tree, heap, group_size);
3792 oprnds = VEC_alloc (tree, heap, group_size);
3793
720f5239 3794 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 3795 gcc_assert (alignment_support_scheme);
272c6793
RS
3796 /* Targets with store-lane instructions must not require explicit
3797 realignment. */
3798 gcc_assert (!store_lanes_p
3799 || alignment_support_scheme == dr_aligned
3800 || alignment_support_scheme == dr_unaligned_supported);
3801
3802 if (store_lanes_p)
3803 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3804 else
3805 aggr_type = vectype;
ebfd146a
IR
3806
3807 /* In case the vectorization factor (VF) is bigger than the number
3808 of elements that we can fit in a vectype (nunits), we have to generate
3809 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 3810 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
3811 vect_get_vec_def_for_copy_stmt. */
3812
3813 /* In case of interleaving (non-unit strided access):
3814
3815 S1: &base + 2 = x2
3816 S2: &base = x0
3817 S3: &base + 1 = x1
3818 S4: &base + 3 = x3
3819
3820 We create vectorized stores starting from base address (the access of the
3821 first stmt in the chain (S2 in the above example), when the last store stmt
3822 of the chain (S4) is reached:
3823
3824 VS1: &base = vx2
3825 VS2: &base + vec_size*1 = vx0
3826 VS3: &base + vec_size*2 = vx1
3827 VS4: &base + vec_size*3 = vx3
3828
3829 Then permutation statements are generated:
3830
b826bea7
RH
3831 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3832 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
ebfd146a 3833 ...
b8698a0f 3834
ebfd146a
IR
3835 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3836 (the order of the data-refs in the output of vect_permute_store_chain
3837 corresponds to the order of scalar stmts in the interleaving chain - see
3838 the documentation of vect_permute_store_chain()).
3839
3840 In case of both multiple types and interleaving, above vector stores and
ff802fa1 3841 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 3842 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 3843 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
3844 */
3845
3846 prev_stmt_info = NULL;
3847 for (j = 0; j < ncopies; j++)
3848 {
3849 gimple new_stmt;
3850 gimple ptr_incr;
3851
3852 if (j == 0)
3853 {
3854 if (slp)
3855 {
3856 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
3857 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3858 NULL, slp_node, -1);
ebfd146a
IR
3859
3860 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3861 }
3862 else
3863 {
b8698a0f
L
3864 /* For interleaved stores we collect vectorized defs for all the
3865 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3866 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
3867 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3868
3869 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3870 OPRNDS are of size 1. */
b8698a0f 3871 next_stmt = first_stmt;
ebfd146a
IR
3872 for (i = 0; i < group_size; i++)
3873 {
b8698a0f
L
3874 /* Since gaps are not supported for interleaved stores,
3875 GROUP_SIZE is the exact number of stmts in the chain.
3876 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3877 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
3878 iteration of the loop will be executed. */
3879 gcc_assert (next_stmt
3880 && gimple_assign_single_p (next_stmt));
3881 op = gimple_assign_rhs1 (next_stmt);
3882
b8698a0f 3883 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 3884 NULL);
b8698a0f
L
3885 VEC_quick_push(tree, dr_chain, vec_oprnd);
3886 VEC_quick_push(tree, oprnds, vec_oprnd);
e14c1050 3887 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3888 }
3889 }
3890
3891 /* We should have catched mismatched types earlier. */
3892 gcc_assert (useless_type_conversion_p (vectype,
3893 TREE_TYPE (vec_oprnd)));
272c6793 3894 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
920e8172
RS
3895 NULL_TREE, &dummy, gsi,
3896 &ptr_incr, false, &inv_p);
a70d6342 3897 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 3898 }
b8698a0f 3899 else
ebfd146a 3900 {
b8698a0f
L
3901 /* For interleaved stores we created vectorized defs for all the
3902 defs stored in OPRNDS in the previous iteration (previous copy).
3903 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
3904 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3905 next copy.
3906 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3907 OPRNDS are of size 1. */
3908 for (i = 0; i < group_size; i++)
3909 {
3910 op = VEC_index (tree, oprnds, i);
b8698a0f 3911 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 3912 &dt);
b8698a0f 3913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
ebfd146a
IR
3914 VEC_replace(tree, dr_chain, i, vec_oprnd);
3915 VEC_replace(tree, oprnds, i, vec_oprnd);
3916 }
272c6793
RS
3917 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3918 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
3919 }
3920
272c6793 3921 if (store_lanes_p)
ebfd146a 3922 {
272c6793 3923 tree vec_array;
267d3070 3924
272c6793
RS
3925 /* Combine all the vectors into an array. */
3926 vec_array = create_vector_array (vectype, vec_num);
3927 for (i = 0; i < vec_num; i++)
c2d7ab2a 3928 {
272c6793
RS
3929 vec_oprnd = VEC_index (tree, dr_chain, i);
3930 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 3931 }
b8698a0f 3932
272c6793
RS
3933 /* Emit:
3934 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3935 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3936 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3937 gimple_call_set_lhs (new_stmt, data_ref);
267d3070
RS
3938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3939 mark_symbols_for_renaming (new_stmt);
272c6793
RS
3940 }
3941 else
3942 {
3943 new_stmt = NULL;
3944 if (strided_store)
3945 {
3946 result_chain = VEC_alloc (tree, heap, group_size);
3947 /* Permute. */
3948 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3949 &result_chain);
3950 }
c2d7ab2a 3951
272c6793
RS
3952 next_stmt = first_stmt;
3953 for (i = 0; i < vec_num; i++)
3954 {
3955 struct ptr_info_def *pi;
3956
3957 if (i > 0)
3958 /* Bump the vector pointer. */
3959 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3960 stmt, NULL_TREE);
3961
3962 if (slp)
3963 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3964 else if (strided_store)
3965 /* For strided stores vectorized defs are interleaved in
3966 vect_permute_store_chain(). */
3967 vec_oprnd = VEC_index (tree, result_chain, i);
3968
3969 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3970 build_int_cst (reference_alias_ptr_type
3971 (DR_REF (first_dr)), 0));
3972 pi = get_ptr_info (dataref_ptr);
3973 pi->align = TYPE_ALIGN_UNIT (vectype);
3974 if (aligned_access_p (first_dr))
3975 pi->misalign = 0;
3976 else if (DR_MISALIGNMENT (first_dr) == -1)
3977 {
3978 TREE_TYPE (data_ref)
3979 = build_aligned_type (TREE_TYPE (data_ref),
3980 TYPE_ALIGN (elem_type));
3981 pi->align = TYPE_ALIGN_UNIT (elem_type);
3982 pi->misalign = 0;
3983 }
3984 else
3985 {
3986 TREE_TYPE (data_ref)
3987 = build_aligned_type (TREE_TYPE (data_ref),
3988 TYPE_ALIGN (elem_type));
3989 pi->misalign = DR_MISALIGNMENT (first_dr);
3990 }
c2d7ab2a 3991
272c6793
RS
3992 /* Arguments are ready. Create the new vector stmt. */
3993 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3995 mark_symbols_for_renaming (new_stmt);
3996
3997 if (slp)
3998 continue;
3999
e14c1050 4000 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4001 if (!next_stmt)
4002 break;
4003 }
ebfd146a 4004 }
1da0876c
RS
4005 if (!slp)
4006 {
4007 if (j == 0)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4012 }
ebfd146a
IR
4013 }
4014
b8698a0f
L
4015 VEC_free (tree, heap, dr_chain);
4016 VEC_free (tree, heap, oprnds);
ebfd146a 4017 if (result_chain)
b8698a0f 4018 VEC_free (tree, heap, result_chain);
ff802fa1
IR
4019 if (vec_oprnds)
4020 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
4021
4022 return true;
4023}
4024
aec7ae7d
JJ
4025/* Given a vector type VECTYPE and permutation SEL returns
4026 the VECTOR_CST mask that implements the permutation of the
4027 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4028
b826bea7
RH
4029static tree
4030gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4031{
22e4dee7 4032 tree mask_elt_type, mask_type, mask_vec;
2635892a 4033 int i, nunits;
a1e53f3f 4034
22e4dee7 4035 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4036
4037 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4038 return NULL;
4039
22e4dee7 4040 mask_elt_type
2635892a
RH
4041 = lang_hooks.types.type_for_size
4042 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
22e4dee7 4043 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4044
22e4dee7 4045 mask_vec = NULL;
aec7ae7d
JJ
4046 for (i = nunits - 1; i >= 0; i--)
4047 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4048 mask_vec);
a1e53f3f
L
4049 mask_vec = build_vector (mask_type, mask_vec);
4050
2635892a 4051 return mask_vec;
a1e53f3f
L
4052}
4053
aec7ae7d
JJ
4054/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4055 reversal of the vector elements. If that is impossible to do,
4056 returns NULL. */
4057
4058static tree
4059perm_mask_for_reverse (tree vectype)
4060{
4061 int i, nunits;
4062 unsigned char *sel;
4063
4064 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4065 sel = XALLOCAVEC (unsigned char, nunits);
4066
4067 for (i = 0; i < nunits; ++i)
4068 sel[i] = nunits - 1 - i;
4069
b826bea7 4070 return gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4071}
4072
4073/* Given a vector variable X and Y, that was generated for the scalar
4074 STMT, generate instructions to permute the vector elements of X and Y
4075 using permutation mask MASK_VEC, insert them at *GSI and return the
4076 permuted vector variable. */
a1e53f3f
L
4077
4078static tree
aec7ae7d
JJ
4079permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4080 gimple_stmt_iterator *gsi)
a1e53f3f
L
4081{
4082 tree vectype = TREE_TYPE (x);
aec7ae7d 4083 tree perm_dest, data_ref;
a1e53f3f
L
4084 gimple perm_stmt;
4085
a1e53f3f 4086 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4087 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4088
4089 /* Generate the permute statement. */
aec7ae7d
JJ
4090 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4091 x, y, mask_vec);
a1e53f3f
L
4092 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4093
4094 return data_ref;
4095}
4096
ebfd146a
IR
4097/* vectorizable_load.
4098
b8698a0f
L
4099 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4100 can be vectorized.
4101 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4102 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4103 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4104
4105static bool
4106vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4107 slp_tree slp_node, slp_instance slp_node_instance)
4108{
4109 tree scalar_dest;
4110 tree vec_dest = NULL;
4111 tree data_ref = NULL;
4112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4113 stmt_vec_info prev_stmt_info;
ebfd146a 4114 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4115 struct loop *loop = NULL;
ebfd146a 4116 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4117 bool nested_in_vect_loop = false;
ebfd146a
IR
4118 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4119 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4120 tree elem_type;
ebfd146a 4121 tree new_temp;
947131ba 4122 enum machine_mode mode;
ebfd146a
IR
4123 gimple new_stmt = NULL;
4124 tree dummy;
4125 enum dr_alignment_support alignment_support_scheme;
4126 tree dataref_ptr = NULL_TREE;
4127 gimple ptr_incr;
4128 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4129 int ncopies;
4130 int i, j, group_size;
4131 tree msq = NULL_TREE, lsq;
4132 tree offset = NULL_TREE;
4133 tree realignment_token = NULL_TREE;
4134 gimple phi = NULL;
4135 VEC(tree,heap) *dr_chain = NULL;
4136 bool strided_load = false;
272c6793 4137 bool load_lanes_p = false;
ebfd146a 4138 gimple first_stmt;
ebfd146a 4139 bool inv_p;
a1e53f3f 4140 bool negative;
ebfd146a
IR
4141 bool compute_in_loop = false;
4142 struct loop *at_loop;
4143 int vec_num;
4144 bool slp = (slp_node != NULL);
4145 bool slp_perm = false;
4146 enum tree_code code;
a70d6342
IR
4147 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4148 int vf;
272c6793 4149 tree aggr_type;
aec7ae7d
JJ
4150 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4151 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4152 int gather_scale = 1;
4153 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4154
4155 if (loop_vinfo)
4156 {
4157 loop = LOOP_VINFO_LOOP (loop_vinfo);
4158 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4159 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4160 }
4161 else
3533e503 4162 vf = 1;
ebfd146a
IR
4163
4164 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4165 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4166 case of SLP. */
437f4a00 4167 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4168 ncopies = 1;
4169 else
4170 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4171
4172 gcc_assert (ncopies >= 1);
4173
4174 /* FORNOW. This restriction should be relaxed. */
4175 if (nested_in_vect_loop && ncopies > 1)
4176 {
4177 if (vect_print_dump_info (REPORT_DETAILS))
4178 fprintf (vect_dump, "multiple types in nested loop.");
4179 return false;
4180 }
4181
a70d6342 4182 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4183 return false;
4184
8644a673 4185 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4186 return false;
4187
4188 /* Is vectorizable load? */
4189 if (!is_gimple_assign (stmt))
4190 return false;
4191
4192 scalar_dest = gimple_assign_lhs (stmt);
4193 if (TREE_CODE (scalar_dest) != SSA_NAME)
4194 return false;
4195
4196 code = gimple_assign_rhs_code (stmt);
4197 if (code != ARRAY_REF
4198 && code != INDIRECT_REF
e9dbe7bb
IR
4199 && code != COMPONENT_REF
4200 && code != IMAGPART_EXPR
70f34814 4201 && code != REALPART_EXPR
42373e0b
RG
4202 && code != MEM_REF
4203 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4204 return false;
4205
4206 if (!STMT_VINFO_DATA_REF (stmt_info))
4207 return false;
4208
a1e53f3f
L
4209 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4210 if (negative && ncopies > 1)
4211 {
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "multiple types with negative step.");
4214 return false;
4215 }
4216
7b7b1813 4217 elem_type = TREE_TYPE (vectype);
947131ba 4218 mode = TYPE_MODE (vectype);
ebfd146a
IR
4219
4220 /* FORNOW. In some cases can vectorize even if data-type not supported
4221 (e.g. - data copies). */
947131ba 4222 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a
IR
4223 {
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "Aligned load, but unsupported type.");
4226 return false;
4227 }
4228
ebfd146a
IR
4229 /* Check if the load is a part of an interleaving chain. */
4230 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4231 {
4232 strided_load = true;
4233 /* FORNOW */
aec7ae7d 4234 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4235
e14c1050 4236 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4237 if (!slp && !PURE_SLP_STMT (stmt_info))
4238 {
e14c1050 4239 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4240 if (vect_load_lanes_supported (vectype, group_size))
4241 load_lanes_p = true;
4242 else if (!vect_strided_load_supported (vectype, group_size))
b602d918
RS
4243 return false;
4244 }
ebfd146a
IR
4245 }
4246
a1e53f3f
L
4247 if (negative)
4248 {
aec7ae7d 4249 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
a1e53f3f
L
4250 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4251 if (alignment_support_scheme != dr_aligned
4252 && alignment_support_scheme != dr_unaligned_supported)
4253 {
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "negative step but alignment required.");
4256 return false;
4257 }
2635892a 4258 if (!perm_mask_for_reverse (vectype))
a1e53f3f
L
4259 {
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "negative step and reversing not supported.");
4262 return false;
4263 }
4264 }
4265
aec7ae7d
JJ
4266 if (STMT_VINFO_GATHER_P (stmt_info))
4267 {
4268 gimple def_stmt;
4269 tree def;
4270 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4271 &gather_off, &gather_scale);
4272 gcc_assert (gather_decl);
4273 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4274 &def_stmt, &def, &gather_dt,
4275 &gather_off_vectype))
4276 {
4277 if (vect_print_dump_info (REPORT_DETAILS))
4278 fprintf (vect_dump, "gather index use not simple.");
4279 return false;
4280 }
4281 }
4282
ebfd146a
IR
4283 if (!vec_stmt) /* transformation not required. */
4284 {
4285 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
272c6793 4286 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
ebfd146a
IR
4287 return true;
4288 }
4289
4290 if (vect_print_dump_info (REPORT_DETAILS))
0ea25ecd 4291 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4292
4293 /** Transform. **/
4294
aec7ae7d
JJ
4295 if (STMT_VINFO_GATHER_P (stmt_info))
4296 {
4297 tree vec_oprnd0 = NULL_TREE, op;
4298 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4299 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4300 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4301 edge pe = loop_preheader_edge (loop);
4302 gimple_seq seq;
4303 basic_block new_bb;
4304 enum { NARROW, NONE, WIDEN } modifier;
4305 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4306
4307 if (nunits == gather_off_nunits)
4308 modifier = NONE;
4309 else if (nunits == gather_off_nunits / 2)
4310 {
4311 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4312 modifier = WIDEN;
4313
4314 for (i = 0; i < gather_off_nunits; ++i)
4315 sel[i] = i | nunits;
4316
b826bea7 4317 perm_mask = gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4318 gcc_assert (perm_mask != NULL_TREE);
4319 }
4320 else if (nunits == gather_off_nunits * 2)
4321 {
4322 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4323 modifier = NARROW;
4324
4325 for (i = 0; i < nunits; ++i)
4326 sel[i] = i < gather_off_nunits
4327 ? i : i + nunits - gather_off_nunits;
4328
b826bea7 4329 perm_mask = gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4330 gcc_assert (perm_mask != NULL_TREE);
4331 ncopies *= 2;
4332 }
4333 else
4334 gcc_unreachable ();
4335
4336 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4337 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4338 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4339 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4340 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4341 scaletype = TREE_VALUE (arglist);
4342 gcc_checking_assert (types_compatible_p (srctype, rettype)
4343 && types_compatible_p (srctype, masktype));
4344
4345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4346
4347 ptr = fold_convert (ptrtype, gather_base);
4348 if (!is_gimple_min_invariant (ptr))
4349 {
4350 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4351 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4352 gcc_assert (!new_bb);
4353 }
4354
4355 /* Currently we support only unconditional gather loads,
4356 so mask should be all ones. */
4357 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4358 mask = build_int_cst (TREE_TYPE (masktype), -1);
4359 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4360 {
4361 REAL_VALUE_TYPE r;
4362 long tmp[6];
4363 for (j = 0; j < 6; ++j)
4364 tmp[j] = -1;
4365 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4366 mask = build_real (TREE_TYPE (masktype), r);
4367 }
4368 else
4369 gcc_unreachable ();
4370 mask = build_vector_from_val (masktype, mask);
4371 mask = vect_init_vector (stmt, mask, masktype, NULL);
4372
4373 scale = build_int_cst (scaletype, gather_scale);
4374
4375 prev_stmt_info = NULL;
4376 for (j = 0; j < ncopies; ++j)
4377 {
4378 if (modifier == WIDEN && (j & 1))
4379 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4380 perm_mask, stmt, gsi);
4381 else if (j == 0)
4382 op = vec_oprnd0
4383 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4384 else
4385 op = vec_oprnd0
4386 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4387
4388 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4389 {
4390 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4391 == TYPE_VECTOR_SUBPARTS (idxtype));
4392 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4393 add_referenced_var (var);
4394 var = make_ssa_name (var, NULL);
4395 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4396 new_stmt
4397 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4398 op, NULL_TREE);
4399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4400 op = var;
4401 }
4402
4403 new_stmt
4404 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4405
4406 if (!useless_type_conversion_p (vectype, rettype))
4407 {
4408 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4409 == TYPE_VECTOR_SUBPARTS (rettype));
4410 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4411 add_referenced_var (var);
4412 op = make_ssa_name (var, new_stmt);
4413 gimple_call_set_lhs (new_stmt, op);
4414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4415 var = make_ssa_name (vec_dest, NULL);
4416 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4417 new_stmt
4418 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4419 NULL_TREE);
4420 }
4421 else
4422 {
4423 var = make_ssa_name (vec_dest, new_stmt);
4424 gimple_call_set_lhs (new_stmt, var);
4425 }
4426
4427 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4428
4429 if (modifier == NARROW)
4430 {
4431 if ((j & 1) == 0)
4432 {
4433 prev_res = var;
4434 continue;
4435 }
4436 var = permute_vec_elements (prev_res, var,
4437 perm_mask, stmt, gsi);
4438 new_stmt = SSA_NAME_DEF_STMT (var);
4439 }
4440
4441 if (prev_stmt_info == NULL)
4442 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4443 else
4444 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4445 prev_stmt_info = vinfo_for_stmt (new_stmt);
4446 }
4447 return true;
4448 }
4449
ebfd146a
IR
4450 if (strided_load)
4451 {
e14c1050 4452 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4
IR
4453 if (slp
4454 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4455 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4456 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4457
ebfd146a
IR
4458 /* Check if the chain of loads is already vectorized. */
4459 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4460 {
4461 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4462 return true;
4463 }
4464 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4465 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4466
4467 /* VEC_NUM is the number of vect stmts to be created for this group. */
4468 if (slp)
4469 {
4470 strided_load = false;
4471 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
a70d6342
IR
4472 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4473 slp_perm = true;
4474 }
ebfd146a
IR
4475 else
4476 vec_num = group_size;
ebfd146a
IR
4477 }
4478 else
4479 {
4480 first_stmt = stmt;
4481 first_dr = dr;
4482 group_size = vec_num = 1;
4483 }
4484
720f5239 4485 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4486 gcc_assert (alignment_support_scheme);
272c6793
RS
4487 /* Targets with load-lane instructions must not require explicit
4488 realignment. */
4489 gcc_assert (!load_lanes_p
4490 || alignment_support_scheme == dr_aligned
4491 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4492
4493 /* In case the vectorization factor (VF) is bigger than the number
4494 of elements that we can fit in a vectype (nunits), we have to generate
4495 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4496 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4497 from one copy of the vector stmt to the next, in the field
ff802fa1 4498 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4499 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4500 stmts that use the defs of the current stmt. The example below
4501 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4502 need to create 4 vectorized stmts):
ebfd146a
IR
4503
4504 before vectorization:
4505 RELATED_STMT VEC_STMT
4506 S1: x = memref - -
4507 S2: z = x + 1 - -
4508
4509 step 1: vectorize stmt S1:
4510 We first create the vector stmt VS1_0, and, as usual, record a
4511 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4512 Next, we create the vector stmt VS1_1, and record a pointer to
4513 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4514 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4515 stmts and pointers:
4516 RELATED_STMT VEC_STMT
4517 VS1_0: vx0 = memref0 VS1_1 -
4518 VS1_1: vx1 = memref1 VS1_2 -
4519 VS1_2: vx2 = memref2 VS1_3 -
4520 VS1_3: vx3 = memref3 - -
4521 S1: x = load - VS1_0
4522 S2: z = x + 1 - -
4523
b8698a0f
L
4524 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4525 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4526 stmt S2. */
4527
4528 /* In case of interleaving (non-unit strided access):
4529
4530 S1: x2 = &base + 2
4531 S2: x0 = &base
4532 S3: x1 = &base + 1
4533 S4: x3 = &base + 3
4534
b8698a0f 4535 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4536 starting from the access of the first stmt of the chain:
4537
4538 VS1: vx0 = &base
4539 VS2: vx1 = &base + vec_size*1
4540 VS3: vx3 = &base + vec_size*2
4541 VS4: vx4 = &base + vec_size*3
4542
4543 Then permutation statements are generated:
4544
4545 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4546 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4547 ...
4548
4549 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4550 (the order of the data-refs in the output of vect_permute_load_chain
4551 corresponds to the order of scalar stmts in the interleaving chain - see
4552 the documentation of vect_permute_load_chain()).
4553 The generation of permutation stmts and recording them in
4554 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4555
b8698a0f 4556 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4557 permutation stmts above are created for every copy. The result vector
4558 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4559 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4560
4561 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4562 on a target that supports unaligned accesses (dr_unaligned_supported)
4563 we generate the following code:
4564 p = initial_addr;
4565 indx = 0;
4566 loop {
4567 p = p + indx * vectype_size;
4568 vec_dest = *(p);
4569 indx = indx + 1;
4570 }
4571
4572 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4573 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4574 then generate the following code, in which the data in each iteration is
4575 obtained by two vector loads, one from the previous iteration, and one
4576 from the current iteration:
4577 p1 = initial_addr;
4578 msq_init = *(floor(p1))
4579 p2 = initial_addr + VS - 1;
4580 realignment_token = call target_builtin;
4581 indx = 0;
4582 loop {
4583 p2 = p2 + indx * vectype_size
4584 lsq = *(floor(p2))
4585 vec_dest = realign_load (msq, lsq, realignment_token)
4586 indx = indx + 1;
4587 msq = lsq;
4588 } */
4589
4590 /* If the misalignment remains the same throughout the execution of the
4591 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4592 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4593 This can only occur when vectorizing memory accesses in the inner-loop
4594 nested within an outer-loop that is being vectorized. */
4595
a70d6342 4596 if (loop && nested_in_vect_loop_p (loop, stmt)
ebfd146a
IR
4597 && (TREE_INT_CST_LOW (DR_STEP (dr))
4598 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4599 {
4600 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4601 compute_in_loop = true;
4602 }
4603
4604 if ((alignment_support_scheme == dr_explicit_realign_optimized
4605 || alignment_support_scheme == dr_explicit_realign)
4606 && !compute_in_loop)
4607 {
4608 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4609 alignment_support_scheme, NULL_TREE,
4610 &at_loop);
4611 if (alignment_support_scheme == dr_explicit_realign_optimized)
4612 {
4613 phi = SSA_NAME_DEF_STMT (msq);
4614 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4615 }
4616 }
4617 else
4618 at_loop = loop;
4619
a1e53f3f
L
4620 if (negative)
4621 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4622
272c6793
RS
4623 if (load_lanes_p)
4624 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4625 else
4626 aggr_type = vectype;
4627
ebfd146a
IR
4628 prev_stmt_info = NULL;
4629 for (j = 0; j < ncopies; j++)
b8698a0f 4630 {
272c6793 4631 /* 1. Create the vector or array pointer update chain. */
ebfd146a 4632 if (j == 0)
272c6793 4633 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
920e8172
RS
4634 offset, &dummy, gsi,
4635 &ptr_incr, false, &inv_p);
ebfd146a 4636 else
272c6793
RS
4637 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4638 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 4639
5ce1ee7f
RS
4640 if (strided_load || slp_perm)
4641 dr_chain = VEC_alloc (tree, heap, vec_num);
4642
272c6793 4643 if (load_lanes_p)
ebfd146a 4644 {
272c6793
RS
4645 tree vec_array;
4646
4647 vec_array = create_vector_array (vectype, vec_num);
4648
4649 /* Emit:
4650 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4651 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4652 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4653 gimple_call_set_lhs (new_stmt, vec_array);
4654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4655 mark_symbols_for_renaming (new_stmt);
ebfd146a 4656
272c6793
RS
4657 /* Extract each vector into an SSA_NAME. */
4658 for (i = 0; i < vec_num; i++)
ebfd146a 4659 {
272c6793
RS
4660 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4661 vec_array, i);
4662 VEC_quick_push (tree, dr_chain, new_temp);
4663 }
4664
4665 /* Record the mapping between SSA_NAMEs and statements. */
4666 vect_record_strided_load_vectors (stmt, dr_chain);
4667 }
4668 else
4669 {
4670 for (i = 0; i < vec_num; i++)
4671 {
4672 if (i > 0)
4673 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4674 stmt, NULL_TREE);
4675
4676 /* 2. Create the vector-load in the loop. */
4677 switch (alignment_support_scheme)
4678 {
4679 case dr_aligned:
4680 case dr_unaligned_supported:
be1ac4ec 4681 {
272c6793
RS
4682 struct ptr_info_def *pi;
4683 data_ref
4684 = build2 (MEM_REF, vectype, dataref_ptr,
4685 build_int_cst (reference_alias_ptr_type
4686 (DR_REF (first_dr)), 0));
4687 pi = get_ptr_info (dataref_ptr);
4688 pi->align = TYPE_ALIGN_UNIT (vectype);
4689 if (alignment_support_scheme == dr_aligned)
4690 {
4691 gcc_assert (aligned_access_p (first_dr));
4692 pi->misalign = 0;
4693 }
4694 else if (DR_MISALIGNMENT (first_dr) == -1)
4695 {
4696 TREE_TYPE (data_ref)
4697 = build_aligned_type (TREE_TYPE (data_ref),
4698 TYPE_ALIGN (elem_type));
4699 pi->align = TYPE_ALIGN_UNIT (elem_type);
4700 pi->misalign = 0;
4701 }
4702 else
4703 {
4704 TREE_TYPE (data_ref)
4705 = build_aligned_type (TREE_TYPE (data_ref),
4706 TYPE_ALIGN (elem_type));
4707 pi->misalign = DR_MISALIGNMENT (first_dr);
4708 }
4709 break;
be1ac4ec 4710 }
272c6793 4711 case dr_explicit_realign:
267d3070 4712 {
272c6793
RS
4713 tree ptr, bump;
4714 tree vs_minus_1;
4715
4716 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4717
4718 if (compute_in_loop)
4719 msq = vect_setup_realignment (first_stmt, gsi,
4720 &realignment_token,
4721 dr_explicit_realign,
4722 dataref_ptr, NULL);
4723
4724 new_stmt = gimple_build_assign_with_ops
4725 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4726 build_int_cst
4727 (TREE_TYPE (dataref_ptr),
4728 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4729 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4730 gimple_assign_set_lhs (new_stmt, ptr);
4731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4732 data_ref
4733 = build2 (MEM_REF, vectype, ptr,
4734 build_int_cst (reference_alias_ptr_type
4735 (DR_REF (first_dr)), 0));
4736 vec_dest = vect_create_destination_var (scalar_dest,
4737 vectype);
4738 new_stmt = gimple_build_assign (vec_dest, data_ref);
4739 new_temp = make_ssa_name (vec_dest, new_stmt);
4740 gimple_assign_set_lhs (new_stmt, new_temp);
4741 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4742 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4744 msq = new_temp;
4745
4746 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 4747 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
4748 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4749 new_stmt = gimple_build_assign_with_ops
4750 (BIT_AND_EXPR, NULL_TREE, ptr,
4751 build_int_cst
4752 (TREE_TYPE (ptr),
4753 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4754 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4755 gimple_assign_set_lhs (new_stmt, ptr);
4756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4757 data_ref
4758 = build2 (MEM_REF, vectype, ptr,
4759 build_int_cst (reference_alias_ptr_type
4760 (DR_REF (first_dr)), 0));
4761 break;
267d3070 4762 }
272c6793
RS
4763 case dr_explicit_realign_optimized:
4764 new_stmt = gimple_build_assign_with_ops
4765 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4766 build_int_cst
4767 (TREE_TYPE (dataref_ptr),
4768 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4769 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4770 new_stmt);
4771 gimple_assign_set_lhs (new_stmt, new_temp);
4772 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4773 data_ref
4774 = build2 (MEM_REF, vectype, new_temp,
4775 build_int_cst (reference_alias_ptr_type
4776 (DR_REF (first_dr)), 0));
4777 break;
4778 default:
4779 gcc_unreachable ();
4780 }
ebfd146a 4781 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 4782 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
4783 new_temp = make_ssa_name (vec_dest, new_stmt);
4784 gimple_assign_set_lhs (new_stmt, new_temp);
4785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793 4786 mark_symbols_for_renaming (new_stmt);
ebfd146a 4787
272c6793
RS
4788 /* 3. Handle explicit realignment if necessary/supported.
4789 Create in loop:
4790 vec_dest = realign_load (msq, lsq, realignment_token) */
4791 if (alignment_support_scheme == dr_explicit_realign_optimized
4792 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 4793 {
272c6793
RS
4794 lsq = gimple_assign_lhs (new_stmt);
4795 if (!realignment_token)
4796 realignment_token = dataref_ptr;
4797 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4798 new_stmt
4799 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4800 vec_dest, msq, lsq,
4801 realignment_token);
4802 new_temp = make_ssa_name (vec_dest, new_stmt);
4803 gimple_assign_set_lhs (new_stmt, new_temp);
4804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4805
4806 if (alignment_support_scheme == dr_explicit_realign_optimized)
4807 {
4808 gcc_assert (phi);
4809 if (i == vec_num - 1 && j == ncopies - 1)
4810 add_phi_arg (phi, lsq,
4811 loop_latch_edge (containing_loop),
4812 UNKNOWN_LOCATION);
4813 msq = lsq;
4814 }
ebfd146a 4815 }
ebfd146a 4816
272c6793
RS
4817 /* 4. Handle invariant-load. */
4818 if (inv_p && !bb_vinfo)
ebfd146a 4819 {
98f4fb34 4820 tree tem, vec_inv;
ab70d825 4821 gimple_stmt_iterator gsi2 = *gsi;
272c6793 4822 gcc_assert (!strided_load);
ab70d825 4823 gsi_next (&gsi2);
98f4fb34
RG
4824 tem = scalar_dest;
4825 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4826 TREE_TYPE (tem)))
4827 {
4828 tem = fold_convert (TREE_TYPE (vectype), tem);
4829 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4832 }
4833 vec_inv = build_vector_from_val (vectype, tem);
ab70d825
RG
4834 new_temp = vect_init_vector (stmt, vec_inv,
4835 vectype, &gsi2);
4836 new_stmt = SSA_NAME_DEF_STMT (new_temp);
272c6793 4837 }
ebfd146a 4838
272c6793
RS
4839 if (negative)
4840 {
aec7ae7d
JJ
4841 tree perm_mask = perm_mask_for_reverse (vectype);
4842 new_temp = permute_vec_elements (new_temp, new_temp,
4843 perm_mask, stmt, gsi);
ebfd146a
IR
4844 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4845 }
267d3070 4846
272c6793
RS
4847 /* Collect vector loads and later create their permutation in
4848 vect_transform_strided_load (). */
4849 if (strided_load || slp_perm)
4850 VEC_quick_push (tree, dr_chain, new_temp);
267d3070 4851
272c6793
RS
4852 /* Store vector loads in the corresponding SLP_NODE. */
4853 if (slp && !slp_perm)
4854 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4855 new_stmt);
4856 }
ebfd146a
IR
4857 }
4858
4859 if (slp && !slp_perm)
4860 continue;
4861
4862 if (slp_perm)
4863 {
a70d6342 4864 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
4865 slp_node_instance, false))
4866 {
4867 VEC_free (tree, heap, dr_chain);
4868 return false;
4869 }
4870 }
4871 else
4872 {
4873 if (strided_load)
4874 {
272c6793
RS
4875 if (!load_lanes_p)
4876 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
ebfd146a 4877 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4878 }
4879 else
4880 {
4881 if (j == 0)
4882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4883 else
4884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4885 prev_stmt_info = vinfo_for_stmt (new_stmt);
4886 }
4887 }
5ce1ee7f
RS
4888 if (dr_chain)
4889 VEC_free (tree, heap, dr_chain);
ebfd146a
IR
4890 }
4891
ebfd146a
IR
4892 return true;
4893}
4894
4895/* Function vect_is_simple_cond.
b8698a0f 4896
ebfd146a
IR
4897 Input:
4898 LOOP - the loop that is being vectorized.
4899 COND - Condition that is checked for simple use.
4900
e9e1d143
RG
4901 Output:
4902 *COMP_VECTYPE - the vector type for the comparison.
4903
ebfd146a
IR
4904 Returns whether a COND can be vectorized. Checks whether
4905 condition operands are supportable using vec_is_simple_use. */
4906
87aab9b2 4907static bool
f7e531cf
IR
4908vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4909 tree *comp_vectype)
ebfd146a
IR
4910{
4911 tree lhs, rhs;
4912 tree def;
4913 enum vect_def_type dt;
e9e1d143 4914 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
4915
4916 if (!COMPARISON_CLASS_P (cond))
4917 return false;
4918
4919 lhs = TREE_OPERAND (cond, 0);
4920 rhs = TREE_OPERAND (cond, 1);
4921
4922 if (TREE_CODE (lhs) == SSA_NAME)
4923 {
4924 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
f7e531cf 4925 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
e9e1d143 4926 &dt, &vectype1))
ebfd146a
IR
4927 return false;
4928 }
4929 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4930 && TREE_CODE (lhs) != FIXED_CST)
4931 return false;
4932
4933 if (TREE_CODE (rhs) == SSA_NAME)
4934 {
4935 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
f7e531cf 4936 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
e9e1d143 4937 &dt, &vectype2))
ebfd146a
IR
4938 return false;
4939 }
f7e531cf 4940 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
4941 && TREE_CODE (rhs) != FIXED_CST)
4942 return false;
4943
e9e1d143 4944 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
4945 return true;
4946}
4947
4948/* vectorizable_condition.
4949
b8698a0f
L
4950 Check if STMT is conditional modify expression that can be vectorized.
4951 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4952 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
4953 at GSI.
4954
4955 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4956 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4957 else caluse if it is 2).
ebfd146a
IR
4958
4959 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4960
4bbe8262 4961bool
ebfd146a 4962vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
4963 gimple *vec_stmt, tree reduc_def, int reduc_index,
4964 slp_tree slp_node)
ebfd146a
IR
4965{
4966 tree scalar_dest = NULL_TREE;
4967 tree vec_dest = NULL_TREE;
ebfd146a
IR
4968 tree cond_expr, then_clause, else_clause;
4969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 4971 tree comp_vectype = NULL_TREE;
ff802fa1
IR
4972 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4973 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
4974 tree vec_compare, vec_cond_expr;
4975 tree new_temp;
4976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 4977 tree def;
a855b1b1 4978 enum vect_def_type dt, dts[4];
ebfd146a 4979 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 4980 int ncopies;
ebfd146a 4981 enum tree_code code;
a855b1b1 4982 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
4983 int i, j;
4984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4985 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4986 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
b8698a0f 4987
f7e531cf
IR
4988 if (slp_node || PURE_SLP_STMT (stmt_info))
4989 ncopies = 1;
4990 else
4991 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 4992
ebfd146a 4993 gcc_assert (ncopies >= 1);
a855b1b1 4994 if (reduc_index && ncopies > 1)
ebfd146a
IR
4995 return false; /* FORNOW */
4996
f7e531cf
IR
4997 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4998 return false;
4999
5000 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5001 return false;
5002
4bbe8262
IR
5003 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5004 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5005 && reduc_def))
ebfd146a
IR
5006 return false;
5007
ebfd146a 5008 /* FORNOW: not yet supported. */
b8698a0f 5009 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
5010 {
5011 if (vect_print_dump_info (REPORT_DETAILS))
5012 fprintf (vect_dump, "value used after loop.");
5013 return false;
5014 }
5015
5016 /* Is vectorizable conditional operation? */
5017 if (!is_gimple_assign (stmt))
5018 return false;
5019
5020 code = gimple_assign_rhs_code (stmt);
5021
5022 if (code != COND_EXPR)
5023 return false;
5024
4e71066d
RG
5025 cond_expr = gimple_assign_rhs1 (stmt);
5026 then_clause = gimple_assign_rhs2 (stmt);
5027 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5028
f7e531cf 5029 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
e9e1d143 5030 || !comp_vectype)
ebfd146a
IR
5031 return false;
5032
5033 if (TREE_CODE (then_clause) == SSA_NAME)
5034 {
5035 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
f7e531cf 5036 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
ebfd146a
IR
5037 &then_def_stmt, &def, &dt))
5038 return false;
5039 }
b8698a0f 5040 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5041 && TREE_CODE (then_clause) != REAL_CST
5042 && TREE_CODE (then_clause) != FIXED_CST)
5043 return false;
5044
5045 if (TREE_CODE (else_clause) == SSA_NAME)
5046 {
5047 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
f7e531cf 5048 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
ebfd146a
IR
5049 &else_def_stmt, &def, &dt))
5050 return false;
5051 }
b8698a0f 5052 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5053 && TREE_CODE (else_clause) != REAL_CST
5054 && TREE_CODE (else_clause) != FIXED_CST)
5055 return false;
5056
b8698a0f 5057 if (!vec_stmt)
ebfd146a
IR
5058 {
5059 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5060 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5061 }
5062
f7e531cf
IR
5063 /* Transform. */
5064
5065 if (!slp_node)
5066 {
5067 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5068 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5069 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5070 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5071 }
ebfd146a
IR
5072
5073 /* Handle def. */
5074 scalar_dest = gimple_assign_lhs (stmt);
5075 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5076
5077 /* Handle cond expr. */
a855b1b1
MM
5078 for (j = 0; j < ncopies; j++)
5079 {
f7e531cf 5080 gimple new_stmt = NULL;
a855b1b1
MM
5081 if (j == 0)
5082 {
f7e531cf
IR
5083 if (slp_node)
5084 {
5085 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5086 VEC (slp_void_p, heap) *vec_defs;
5087
5088 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5089 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5090 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5091 VEC_safe_push (tree, heap, ops, then_clause);
5092 VEC_safe_push (tree, heap, ops, else_clause);
5093 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5094 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5095 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5096 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5097 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5098
5099 VEC_free (tree, heap, ops);
5100 VEC_free (slp_void_p, heap, vec_defs);
5101 }
5102 else
5103 {
5104 gimple gtemp;
5105 vec_cond_lhs =
a855b1b1
MM
5106 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5107 stmt, NULL);
f7e531cf 5108 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
a855b1b1 5109 NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5110
5111 vec_cond_rhs =
5112 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5113 stmt, NULL);
5114 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5115 NULL, &gtemp, &def, &dts[1]);
5116 if (reduc_index == 1)
5117 vec_then_clause = reduc_def;
5118 else
5119 {
5120 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5121 stmt, NULL);
5122 vect_is_simple_use (then_clause, loop_vinfo,
5123 NULL, &gtemp, &def, &dts[2]);
5124 }
5125 if (reduc_index == 2)
5126 vec_else_clause = reduc_def;
5127 else
5128 {
5129 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5130 stmt, NULL);
f7e531cf 5131 vect_is_simple_use (else_clause, loop_vinfo,
a855b1b1 5132 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5133 }
a855b1b1
MM
5134 }
5135 }
5136 else
5137 {
f7e531cf
IR
5138 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5139 VEC_pop (tree, vec_oprnds0));
5140 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5141 VEC_pop (tree, vec_oprnds1));
a855b1b1 5142 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
f7e531cf 5143 VEC_pop (tree, vec_oprnds2));
a855b1b1 5144 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
f7e531cf
IR
5145 VEC_pop (tree, vec_oprnds3));
5146 }
5147
5148 if (!slp_node)
5149 {
5150 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5151 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5152 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5153 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
a855b1b1
MM
5154 }
5155
9dc3f7de 5156 /* Arguments are ready. Create the new vector stmt. */
f7e531cf
IR
5157 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5158 {
5159 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5160 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5161 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
a855b1b1 5162
f7e531cf
IR
5163 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5164 vec_cond_lhs, vec_cond_rhs);
5165 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5166 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5167
f7e531cf
IR
5168 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5169 new_temp = make_ssa_name (vec_dest, new_stmt);
5170 gimple_assign_set_lhs (new_stmt, new_temp);
5171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5172 if (slp_node)
5173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5174 }
5175
5176 if (slp_node)
5177 continue;
5178
5179 if (j == 0)
5180 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5181 else
5182 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5183
5184 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5185 }
b8698a0f 5186
f7e531cf
IR
5187 VEC_free (tree, heap, vec_oprnds0);
5188 VEC_free (tree, heap, vec_oprnds1);
5189 VEC_free (tree, heap, vec_oprnds2);
5190 VEC_free (tree, heap, vec_oprnds3);
5191
ebfd146a
IR
5192 return true;
5193}
5194
5195
8644a673 5196/* Make sure the statement is vectorizable. */
ebfd146a
IR
5197
5198bool
a70d6342 5199vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5200{
8644a673 5201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5202 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5203 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5204 bool ok;
a70d6342 5205 tree scalar_type, vectype;
363477c0
JJ
5206 gimple pattern_stmt;
5207 gimple_seq pattern_def_seq;
ebfd146a
IR
5208
5209 if (vect_print_dump_info (REPORT_DETAILS))
ebfd146a 5210 {
8644a673
IR
5211 fprintf (vect_dump, "==> examining statement: ");
5212 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5213 }
ebfd146a 5214
1825a1f3 5215 if (gimple_has_volatile_ops (stmt))
b8698a0f 5216 {
1825a1f3
IR
5217 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5218 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5219
5220 return false;
5221 }
b8698a0f
L
5222
5223 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5224 to include:
5225 - the COND_EXPR which is the loop exit condition
5226 - any LABEL_EXPRs in the loop
b8698a0f 5227 - computations that are used only for array indexing or loop control.
8644a673 5228 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5229 instance, therefore, all the statements are relevant.
ebfd146a 5230
d092494c 5231 Pattern statement needs to be analyzed instead of the original statement
83197f37
IR
5232 if the original statement is not relevant. Otherwise, we analyze both
5233 statements. */
5234
5235 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5236 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5237 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5238 {
9d5e7640 5239 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5240 && pattern_stmt
9d5e7640
IR
5241 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5242 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5243 {
83197f37 5244 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5245 stmt = pattern_stmt;
5246 stmt_info = vinfo_for_stmt (pattern_stmt);
5247 if (vect_print_dump_info (REPORT_DETAILS))
5248 {
5249 fprintf (vect_dump, "==> examining pattern statement: ");
5250 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5251 }
5252 }
5253 else
5254 {
5255 if (vect_print_dump_info (REPORT_DETAILS))
5256 fprintf (vect_dump, "irrelevant.");
ebfd146a 5257
9d5e7640
IR
5258 return true;
5259 }
8644a673 5260 }
83197f37
IR
5261 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5262 && pattern_stmt
5263 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5264 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5265 {
5266 /* Analyze PATTERN_STMT too. */
5267 if (vect_print_dump_info (REPORT_DETAILS))
5268 {
5269 fprintf (vect_dump, "==> examining pattern statement: ");
5270 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5271 }
5272
5273 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5274 return false;
5275 }
ebfd146a 5276
1107f3ae 5277 if (is_pattern_stmt_p (stmt_info)
363477c0 5278 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5279 {
363477c0 5280 gimple_stmt_iterator si;
1107f3ae 5281
363477c0
JJ
5282 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5283 {
5284 gimple pattern_def_stmt = gsi_stmt (si);
5285 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5286 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5287 {
5288 /* Analyze def stmt of STMT if it's a pattern stmt. */
5289 if (vect_print_dump_info (REPORT_DETAILS))
5290 {
5291 fprintf (vect_dump, "==> examining pattern def statement: ");
5292 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5293 }
1107f3ae 5294
363477c0
JJ
5295 if (!vect_analyze_stmt (pattern_def_stmt,
5296 need_to_vectorize, node))
5297 return false;
5298 }
5299 }
5300 }
1107f3ae 5301
8644a673
IR
5302 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5303 {
5304 case vect_internal_def:
5305 break;
ebfd146a 5306
8644a673 5307 case vect_reduction_def:
7c5222ff 5308 case vect_nested_cycle:
a70d6342 5309 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5310 || relevance == vect_used_in_outer_by_reduction
a70d6342 5311 || relevance == vect_unused_in_scope));
8644a673
IR
5312 break;
5313
5314 case vect_induction_def:
5315 case vect_constant_def:
5316 case vect_external_def:
5317 case vect_unknown_def_type:
5318 default:
5319 gcc_unreachable ();
5320 }
ebfd146a 5321
a70d6342
IR
5322 if (bb_vinfo)
5323 {
5324 gcc_assert (PURE_SLP_STMT (stmt_info));
5325
b690cc0f 5326 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
a70d6342
IR
5327 if (vect_print_dump_info (REPORT_DETAILS))
5328 {
5329 fprintf (vect_dump, "get vectype for scalar type: ");
5330 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5331 }
5332
5333 vectype = get_vectype_for_scalar_type (scalar_type);
5334 if (!vectype)
5335 {
5336 if (vect_print_dump_info (REPORT_DETAILS))
5337 {
5338 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5339 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5340 }
5341 return false;
5342 }
5343
5344 if (vect_print_dump_info (REPORT_DETAILS))
5345 {
5346 fprintf (vect_dump, "vectype: ");
5347 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5348 }
5349
5350 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5351 }
5352
8644a673 5353 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5354 {
8644a673
IR
5355 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5356 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5357 *need_to_vectorize = true;
ebfd146a
IR
5358 }
5359
8644a673 5360 ok = true;
b8698a0f 5361 if (!bb_vinfo
a70d6342
IR
5362 && (STMT_VINFO_RELEVANT_P (stmt_info)
5363 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5364 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5365 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5366 || vectorizable_operation (stmt, NULL, NULL, NULL)
5367 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5368 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5369 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5370 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5371 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5372 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5373 else
5374 {
5375 if (bb_vinfo)
4a00c761
JJ
5376 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5377 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5378 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5379 || vectorizable_assignment (stmt, NULL, NULL, node)
5380 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5381 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5382 || vectorizable_store (stmt, NULL, NULL, node)
5383 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5384 }
8644a673
IR
5385
5386 if (!ok)
ebfd146a 5387 {
8644a673
IR
5388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5389 {
5390 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5391 fprintf (vect_dump, "supported: ");
5392 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5393 }
b8698a0f 5394
ebfd146a
IR
5395 return false;
5396 }
5397
a70d6342
IR
5398 if (bb_vinfo)
5399 return true;
5400
8644a673
IR
5401 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5402 need extra handling, except for vectorizable reductions. */
5403 if (STMT_VINFO_LIVE_P (stmt_info)
5404 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5405 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5406
8644a673 5407 if (!ok)
ebfd146a 5408 {
8644a673
IR
5409 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5410 {
5411 fprintf (vect_dump, "not vectorized: live stmt not ");
5412 fprintf (vect_dump, "supported: ");
5413 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5414 }
b8698a0f 5415
8644a673 5416 return false;
ebfd146a
IR
5417 }
5418
ebfd146a
IR
5419 return true;
5420}
5421
5422
5423/* Function vect_transform_stmt.
5424
5425 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5426
5427bool
5428vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
b8698a0f 5429 bool *strided_store, slp_tree slp_node,
ebfd146a
IR
5430 slp_instance slp_node_instance)
5431{
5432 bool is_store = false;
5433 gimple vec_stmt = NULL;
5434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5435 bool done;
ebfd146a
IR
5436
5437 switch (STMT_VINFO_TYPE (stmt_info))
5438 {
5439 case type_demotion_vec_info_type:
ebfd146a 5440 case type_promotion_vec_info_type:
ebfd146a
IR
5441 case type_conversion_vec_info_type:
5442 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5443 gcc_assert (done);
5444 break;
5445
5446 case induc_vec_info_type:
5447 gcc_assert (!slp_node);
5448 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5449 gcc_assert (done);
5450 break;
5451
9dc3f7de
IR
5452 case shift_vec_info_type:
5453 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5454 gcc_assert (done);
5455 break;
5456
ebfd146a
IR
5457 case op_vec_info_type:
5458 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5459 gcc_assert (done);
5460 break;
5461
5462 case assignment_vec_info_type:
5463 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5464 gcc_assert (done);
5465 break;
5466
5467 case load_vec_info_type:
b8698a0f 5468 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5469 slp_node_instance);
5470 gcc_assert (done);
5471 break;
5472
5473 case store_vec_info_type:
5474 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5475 gcc_assert (done);
5476 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5477 {
5478 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5479 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5480 one are skipped, and there vec_stmt_info shouldn't be freed
5481 meanwhile. */
5482 *strided_store = true;
5483 if (STMT_VINFO_VEC_STMT (stmt_info))
5484 is_store = true;
5485 }
5486 else
5487 is_store = true;
5488 break;
5489
5490 case condition_vec_info_type:
f7e531cf 5491 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5492 gcc_assert (done);
5493 break;
5494
5495 case call_vec_info_type:
190c2236 5496 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5497 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5498 break;
5499
5500 case reduc_vec_info_type:
b5aeb3bb 5501 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5502 gcc_assert (done);
5503 break;
5504
5505 default:
5506 if (!STMT_VINFO_LIVE_P (stmt_info))
5507 {
5508 if (vect_print_dump_info (REPORT_DETAILS))
5509 fprintf (vect_dump, "stmt not supported.");
5510 gcc_unreachable ();
5511 }
5512 }
5513
5514 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5515 is being vectorized, but outside the immediately enclosing loop. */
5516 if (vec_stmt
a70d6342
IR
5517 && STMT_VINFO_LOOP_VINFO (stmt_info)
5518 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5519 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5520 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5521 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5522 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5523 vect_used_in_outer_by_reduction))
ebfd146a 5524 {
a70d6342
IR
5525 struct loop *innerloop = LOOP_VINFO_LOOP (
5526 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5527 imm_use_iterator imm_iter;
5528 use_operand_p use_p;
5529 tree scalar_dest;
5530 gimple exit_phi;
5531
5532 if (vect_print_dump_info (REPORT_DETAILS))
a70d6342 5533 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5534
5535 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5536 (to be used when vectorizing outer-loop stmts that use the DEF of
5537 STMT). */
5538 if (gimple_code (stmt) == GIMPLE_PHI)
5539 scalar_dest = PHI_RESULT (stmt);
5540 else
5541 scalar_dest = gimple_assign_lhs (stmt);
5542
5543 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5544 {
5545 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5546 {
5547 exit_phi = USE_STMT (use_p);
5548 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5549 }
5550 }
5551 }
5552
5553 /* Handle stmts whose DEF is used outside the loop-nest that is
5554 being vectorized. */
5555 if (STMT_VINFO_LIVE_P (stmt_info)
5556 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5557 {
5558 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5559 gcc_assert (done);
5560 }
5561
5562 if (vec_stmt)
83197f37 5563 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 5564
b8698a0f 5565 return is_store;
ebfd146a
IR
5566}
5567
5568
b8698a0f 5569/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
5570 stmt_vec_info. */
5571
5572void
5573vect_remove_stores (gimple first_stmt)
5574{
5575 gimple next = first_stmt;
5576 gimple tmp;
5577 gimple_stmt_iterator next_si;
5578
5579 while (next)
5580 {
78048b1c
JJ
5581 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5582
5583 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5584 if (is_pattern_stmt_p (stmt_info))
5585 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
5586 /* Free the attached stmt_vec_info and remove the stmt. */
5587 next_si = gsi_for_stmt (next);
5588 gsi_remove (&next_si, true);
ebfd146a
IR
5589 free_stmt_vec_info (next);
5590 next = tmp;
5591 }
5592}
5593
5594
5595/* Function new_stmt_vec_info.
5596
5597 Create and initialize a new stmt_vec_info struct for STMT. */
5598
5599stmt_vec_info
b8698a0f 5600new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 5601 bb_vec_info bb_vinfo)
ebfd146a
IR
5602{
5603 stmt_vec_info res;
5604 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5605
5606 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5607 STMT_VINFO_STMT (res) = stmt;
5608 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 5609 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 5610 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
5611 STMT_VINFO_LIVE_P (res) = false;
5612 STMT_VINFO_VECTYPE (res) = NULL;
5613 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 5614 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
5615 STMT_VINFO_IN_PATTERN_P (res) = false;
5616 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 5617 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
5618 STMT_VINFO_DATA_REF (res) = NULL;
5619
5620 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5621 STMT_VINFO_DR_OFFSET (res) = NULL;
5622 STMT_VINFO_DR_INIT (res) = NULL;
5623 STMT_VINFO_DR_STEP (res) = NULL;
5624 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5625
5626 if (gimple_code (stmt) == GIMPLE_PHI
5627 && is_loop_header_bb_p (gimple_bb (stmt)))
5628 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5629 else
8644a673
IR
5630 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5631
ebfd146a
IR
5632 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5633 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5634 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
32e8bb8e 5635 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
5636 GROUP_FIRST_ELEMENT (res) = NULL;
5637 GROUP_NEXT_ELEMENT (res) = NULL;
5638 GROUP_SIZE (res) = 0;
5639 GROUP_STORE_COUNT (res) = 0;
5640 GROUP_GAP (res) = 0;
5641 GROUP_SAME_DR_STMT (res) = NULL;
5642 GROUP_READ_WRITE_DEPENDENCE (res) = false;
ebfd146a
IR
5643
5644 return res;
5645}
5646
5647
5648/* Create a hash table for stmt_vec_info. */
5649
5650void
5651init_stmt_vec_info_vec (void)
5652{
5653 gcc_assert (!stmt_vec_info_vec);
5654 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5655}
5656
5657
5658/* Free hash table for stmt_vec_info. */
5659
5660void
5661free_stmt_vec_info_vec (void)
5662{
5663 gcc_assert (stmt_vec_info_vec);
5664 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5665}
5666
5667
5668/* Free stmt vectorization related info. */
5669
5670void
5671free_stmt_vec_info (gimple stmt)
5672{
5673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5674
5675 if (!stmt_info)
5676 return;
5677
78048b1c
JJ
5678 /* Check if this statement has a related "pattern stmt"
5679 (introduced by the vectorizer during the pattern recognition
5680 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5681 too. */
5682 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5683 {
5684 stmt_vec_info patt_info
5685 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5686 if (patt_info)
5687 {
363477c0
JJ
5688 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5689 if (seq)
5690 {
5691 gimple_stmt_iterator si;
5692 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5693 free_stmt_vec_info (gsi_stmt (si));
5694 }
78048b1c
JJ
5695 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5696 }
5697 }
5698
ebfd146a
IR
5699 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5700 set_vinfo_for_stmt (stmt, NULL);
5701 free (stmt_info);
5702}
5703
5704
bb67d9c7 5705/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 5706
bb67d9c7 5707 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
5708 by the target. */
5709
bb67d9c7
RG
5710static tree
5711get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
5712{
5713 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 5714 enum machine_mode simd_mode;
2f816591 5715 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
5716 int nunits;
5717 tree vectype;
5718
cc4b5170 5719 if (nbytes == 0)
ebfd146a
IR
5720 return NULL_TREE;
5721
48f2e373
RB
5722 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5723 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5724 return NULL_TREE;
5725
2f816591
RG
5726 /* We can't build a vector type of elements with alignment bigger than
5727 their size. */
5728 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5729 return NULL_TREE;
5730
7b7b1813
RG
5731 /* For vector types of elements whose mode precision doesn't
5732 match their types precision we use a element type of mode
5733 precision. The vectorization routines will have to make sure
48f2e373
RB
5734 they support the proper result truncation/extension.
5735 We also make sure to build vector types with INTEGER_TYPE
5736 component type only. */
6d7971b8 5737 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
5738 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5739 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
5740 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5741 TYPE_UNSIGNED (scalar_type));
6d7971b8 5742
ccbf5bb4
RG
5743 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5744 When the component mode passes the above test simply use a type
5745 corresponding to that mode. The theory is that any use that
5746 would cause problems with this will disable vectorization anyway. */
5747 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5748 && !INTEGRAL_TYPE_P (scalar_type)
5749 && !POINTER_TYPE_P (scalar_type))
5750 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5751
bb67d9c7
RG
5752 /* If no size was supplied use the mode the target prefers. Otherwise
5753 lookup a vector mode of the specified size. */
5754 if (size == 0)
5755 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5756 else
5757 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
5758 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5759 if (nunits <= 1)
5760 return NULL_TREE;
ebfd146a
IR
5761
5762 vectype = build_vector_type (scalar_type, nunits);
5763 if (vect_print_dump_info (REPORT_DETAILS))
5764 {
5765 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5766 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5767 }
5768
5769 if (!vectype)
5770 return NULL_TREE;
5771
5772 if (vect_print_dump_info (REPORT_DETAILS))
5773 {
5774 fprintf (vect_dump, "vectype: ");
5775 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5776 }
5777
5778 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5779 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5780 {
5781 if (vect_print_dump_info (REPORT_DETAILS))
5782 fprintf (vect_dump, "mode not supported by target.");
5783 return NULL_TREE;
5784 }
5785
5786 return vectype;
5787}
5788
bb67d9c7
RG
5789unsigned int current_vector_size;
5790
5791/* Function get_vectype_for_scalar_type.
5792
5793 Returns the vector type corresponding to SCALAR_TYPE as supported
5794 by the target. */
5795
5796tree
5797get_vectype_for_scalar_type (tree scalar_type)
5798{
5799 tree vectype;
5800 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5801 current_vector_size);
5802 if (vectype
5803 && current_vector_size == 0)
5804 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5805 return vectype;
5806}
5807
b690cc0f
RG
5808/* Function get_same_sized_vectype
5809
5810 Returns a vector type corresponding to SCALAR_TYPE of size
5811 VECTOR_TYPE if supported by the target. */
5812
5813tree
bb67d9c7 5814get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 5815{
bb67d9c7
RG
5816 return get_vectype_for_scalar_type_and_size
5817 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
5818}
5819
ebfd146a
IR
5820/* Function vect_is_simple_use.
5821
5822 Input:
a70d6342
IR
5823 LOOP_VINFO - the vect info of the loop that is being vectorized.
5824 BB_VINFO - the vect info of the basic block that is being vectorized.
5825 OPERAND - operand of a stmt in the loop or bb.
ebfd146a
IR
5826 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5827
5828 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 5829 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 5830 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 5831 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
5832 is the case in reduction/induction computations).
5833 For basic blocks, supportable operands are constants and bb invariants.
5834 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
5835
5836bool
b8698a0f 5837vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
a70d6342 5838 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 5839 tree *def, enum vect_def_type *dt)
b8698a0f 5840{
ebfd146a
IR
5841 basic_block bb;
5842 stmt_vec_info stmt_vinfo;
a70d6342 5843 struct loop *loop = NULL;
b8698a0f 5844
a70d6342
IR
5845 if (loop_vinfo)
5846 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5847
5848 *def_stmt = NULL;
5849 *def = NULL_TREE;
b8698a0f 5850
ebfd146a
IR
5851 if (vect_print_dump_info (REPORT_DETAILS))
5852 {
5853 fprintf (vect_dump, "vect_is_simple_use: operand ");
5854 print_generic_expr (vect_dump, operand, TDF_SLIM);
5855 }
b8698a0f 5856
ebfd146a
IR
5857 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5858 {
5859 *dt = vect_constant_def;
5860 return true;
5861 }
b8698a0f 5862
ebfd146a
IR
5863 if (is_gimple_min_invariant (operand))
5864 {
5865 *def = operand;
8644a673 5866 *dt = vect_external_def;
ebfd146a
IR
5867 return true;
5868 }
5869
5870 if (TREE_CODE (operand) == PAREN_EXPR)
5871 {
5872 if (vect_print_dump_info (REPORT_DETAILS))
5873 fprintf (vect_dump, "non-associatable copy.");
5874 operand = TREE_OPERAND (operand, 0);
5875 }
b8698a0f 5876
ebfd146a
IR
5877 if (TREE_CODE (operand) != SSA_NAME)
5878 {
5879 if (vect_print_dump_info (REPORT_DETAILS))
5880 fprintf (vect_dump, "not ssa-name.");
5881 return false;
5882 }
b8698a0f 5883
ebfd146a
IR
5884 *def_stmt = SSA_NAME_DEF_STMT (operand);
5885 if (*def_stmt == NULL)
5886 {
5887 if (vect_print_dump_info (REPORT_DETAILS))
5888 fprintf (vect_dump, "no def_stmt.");
5889 return false;
5890 }
5891
5892 if (vect_print_dump_info (REPORT_DETAILS))
5893 {
5894 fprintf (vect_dump, "def_stmt: ");
5895 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5896 }
5897
8644a673 5898 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
5899 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5900 if (gimple_nop_p (*def_stmt))
5901 {
5902 *def = operand;
8644a673 5903 *dt = vect_external_def;
ebfd146a
IR
5904 return true;
5905 }
5906
5907 bb = gimple_bb (*def_stmt);
a70d6342
IR
5908
5909 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5910 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 5911 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 5912 *dt = vect_external_def;
ebfd146a
IR
5913 else
5914 {
5915 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5916 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5917 }
5918
5919 if (*dt == vect_unknown_def_type)
5920 {
5921 if (vect_print_dump_info (REPORT_DETAILS))
5922 fprintf (vect_dump, "Unsupported pattern.");
5923 return false;
5924 }
5925
5926 if (vect_print_dump_info (REPORT_DETAILS))
5927 fprintf (vect_dump, "type of def: %d.",*dt);
5928
5929 switch (gimple_code (*def_stmt))
5930 {
5931 case GIMPLE_PHI:
5932 *def = gimple_phi_result (*def_stmt);
5933 break;
5934
5935 case GIMPLE_ASSIGN:
5936 *def = gimple_assign_lhs (*def_stmt);
5937 break;
5938
5939 case GIMPLE_CALL:
5940 *def = gimple_call_lhs (*def_stmt);
5941 if (*def != NULL)
5942 break;
5943 /* FALLTHRU */
5944 default:
5945 if (vect_print_dump_info (REPORT_DETAILS))
5946 fprintf (vect_dump, "unsupported defining stmt: ");
5947 return false;
5948 }
5949
5950 return true;
5951}
5952
b690cc0f
RG
5953/* Function vect_is_simple_use_1.
5954
5955 Same as vect_is_simple_use_1 but also determines the vector operand
5956 type of OPERAND and stores it to *VECTYPE. If the definition of
5957 OPERAND is vect_uninitialized_def, vect_constant_def or
5958 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5959 is responsible to compute the best suited vector type for the
5960 scalar operand. */
5961
5962bool
5963vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5964 bb_vec_info bb_vinfo, gimple *def_stmt,
5965 tree *def, enum vect_def_type *dt, tree *vectype)
5966{
5967 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5968 return false;
5969
5970 /* Now get a vector type if the def is internal, otherwise supply
5971 NULL_TREE and leave it up to the caller to figure out a proper
5972 type for the use stmt. */
5973 if (*dt == vect_internal_def
5974 || *dt == vect_induction_def
5975 || *dt == vect_reduction_def
5976 || *dt == vect_double_reduction_def
5977 || *dt == vect_nested_cycle)
5978 {
5979 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
5980
5981 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5982 && !STMT_VINFO_RELEVANT (stmt_info)
5983 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 5984 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 5985
b690cc0f
RG
5986 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5987 gcc_assert (*vectype != NULL_TREE);
5988 }
5989 else if (*dt == vect_uninitialized_def
5990 || *dt == vect_constant_def
5991 || *dt == vect_external_def)
5992 *vectype = NULL_TREE;
5993 else
5994 gcc_unreachable ();
5995
5996 return true;
5997}
5998
ebfd146a
IR
5999
6000/* Function supportable_widening_operation
6001
b8698a0f
L
6002 Check whether an operation represented by the code CODE is a
6003 widening operation that is supported by the target platform in
b690cc0f
RG
6004 vector form (i.e., when operating on arguments of type VECTYPE_IN
6005 producing a result of type VECTYPE_OUT).
b8698a0f 6006
ebfd146a
IR
6007 Widening operations we currently support are NOP (CONVERT), FLOAT
6008 and WIDEN_MULT. This function checks if these operations are supported
6009 by the target platform either directly (via vector tree-codes), or via
6010 target builtins.
6011
6012 Output:
b8698a0f
L
6013 - CODE1 and CODE2 are codes of vector operations to be used when
6014 vectorizing the operation, if available.
ebfd146a 6015 - DECL1 and DECL2 are decls of target builtin functions to be used
ff802fa1 6016 when vectorizing the operation, if available. In this case,
b8698a0f 6017 CODE1 and CODE2 are CALL_EXPR.
ebfd146a
IR
6018 - MULTI_STEP_CVT determines the number of required intermediate steps in
6019 case of multi-step conversion (like char->short->int - in that case
6020 MULTI_STEP_CVT will be 1).
b8698a0f
L
6021 - INTERM_TYPES contains the intermediate type required to perform the
6022 widening operation (short in the above example). */
ebfd146a
IR
6023
6024bool
b690cc0f
RG
6025supportable_widening_operation (enum tree_code code, gimple stmt,
6026 tree vectype_out, tree vectype_in,
ebfd146a
IR
6027 tree *decl1, tree *decl2,
6028 enum tree_code *code1, enum tree_code *code2,
6029 int *multi_step_cvt,
6030 VEC (tree, heap) **interm_types)
6031{
6032 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6033 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6034 struct loop *vect_loop = NULL;
ebfd146a
IR
6035 bool ordered_p;
6036 enum machine_mode vec_mode;
81f40b79 6037 enum insn_code icode1, icode2;
ebfd146a 6038 optab optab1, optab2;
b690cc0f
RG
6039 tree vectype = vectype_in;
6040 tree wide_vectype = vectype_out;
ebfd146a 6041 enum tree_code c1, c2;
4a00c761
JJ
6042 int i;
6043 tree prev_type, intermediate_type;
6044 enum machine_mode intermediate_mode, prev_mode;
6045 optab optab3, optab4;
ebfd146a 6046
4a00c761 6047 *multi_step_cvt = 0;
4ef69dfc
IR
6048 if (loop_info)
6049 vect_loop = LOOP_VINFO_LOOP (loop_info);
6050
ebfd146a 6051 /* The result of a vectorized widening operation usually requires two vectors
4a00c761 6052 (because the widened results do not fit into one vector). The generated
b8698a0f 6053 vector results would normally be expected to be generated in the same
ebfd146a
IR
6054 order as in the original scalar computation, i.e. if 8 results are
6055 generated in each vector iteration, they are to be organized as follows:
b8698a0f 6056 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
ebfd146a 6057
b8698a0f 6058 However, in the special case that the result of the widening operation is
ebfd146a 6059 used in a reduction computation only, the order doesn't matter (because
b8698a0f 6060 when vectorizing a reduction we change the order of the computation).
ebfd146a
IR
6061 Some targets can take advantage of this and generate more efficient code.
6062 For example, targets like Altivec, that support widen_mult using a sequence
6063 of {mult_even,mult_odd} generate the following vectors:
6064 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6065
6066 When vectorizing outer-loops, we execute the inner-loop sequentially
b8698a0f 6067 (each vectorized inner-loop iteration contributes to VF outer-loop
ff802fa1 6068 iterations in parallel). We therefore don't allow to change the order
ebfd146a
IR
6069 of the computation in the inner-loop during outer-loop vectorization. */
6070
4ef69dfc
IR
6071 if (vect_loop
6072 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
ebfd146a
IR
6073 && !nested_in_vect_loop_p (vect_loop, stmt))
6074 ordered_p = false;
6075 else
6076 ordered_p = true;
6077
6078 if (!ordered_p
6079 && code == WIDEN_MULT_EXPR
6080 && targetm.vectorize.builtin_mul_widen_even
6081 && targetm.vectorize.builtin_mul_widen_even (vectype)
6082 && targetm.vectorize.builtin_mul_widen_odd
6083 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6084 {
6085 if (vect_print_dump_info (REPORT_DETAILS))
6086 fprintf (vect_dump, "Unordered widening operation detected.");
6087
6088 *code1 = *code2 = CALL_EXPR;
6089 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6090 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6091 return true;
6092 }
6093
6094 switch (code)
6095 {
6096 case WIDEN_MULT_EXPR:
4a00c761
JJ
6097 c1 = VEC_WIDEN_MULT_LO_EXPR;
6098 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6099 break;
6100
36ba4aae 6101 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6102 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6103 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6104 break;
6105
ebfd146a 6106 CASE_CONVERT:
4a00c761
JJ
6107 c1 = VEC_UNPACK_LO_EXPR;
6108 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6109 break;
6110
6111 case FLOAT_EXPR:
4a00c761
JJ
6112 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6113 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6114 break;
6115
6116 case FIX_TRUNC_EXPR:
6117 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6118 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6119 computing the operation. */
6120 return false;
6121
6122 default:
6123 gcc_unreachable ();
6124 }
6125
4a00c761
JJ
6126 if (BYTES_BIG_ENDIAN)
6127 {
6128 enum tree_code ctmp = c1;
6129 c1 = c2;
6130 c2 = ctmp;
6131 }
6132
ebfd146a
IR
6133 if (code == FIX_TRUNC_EXPR)
6134 {
6135 /* The signedness is determined from output operand. */
b690cc0f
RG
6136 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6137 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6138 }
6139 else
6140 {
6141 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6142 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6143 }
6144
6145 if (!optab1 || !optab2)
6146 return false;
6147
6148 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6149 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6150 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6151 return false;
6152
4a00c761
JJ
6153 *code1 = c1;
6154 *code2 = c2;
6155
6156 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6157 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6158 return true;
6159
b8698a0f 6160 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6161 types. */
ebfd146a 6162
4a00c761
JJ
6163 prev_type = vectype;
6164 prev_mode = vec_mode;
b8698a0f 6165
4a00c761
JJ
6166 if (!CONVERT_EXPR_CODE_P (code))
6167 return false;
b8698a0f 6168
4a00c761
JJ
6169 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6170 intermediate steps in promotion sequence. We try
6171 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6172 not. */
6173 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6174 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6175 {
6176 intermediate_mode = insn_data[icode1].operand[0].mode;
6177 intermediate_type
6178 = lang_hooks.types.type_for_mode (intermediate_mode,
6179 TYPE_UNSIGNED (prev_type));
6180 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6181 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6182
6183 if (!optab3 || !optab4
6184 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6185 || insn_data[icode1].operand[0].mode != intermediate_mode
6186 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6187 || insn_data[icode2].operand[0].mode != intermediate_mode
6188 || ((icode1 = optab_handler (optab3, intermediate_mode))
6189 == CODE_FOR_nothing)
6190 || ((icode2 = optab_handler (optab4, intermediate_mode))
6191 == CODE_FOR_nothing))
6192 break;
ebfd146a 6193
4a00c761
JJ
6194 VEC_quick_push (tree, *interm_types, intermediate_type);
6195 (*multi_step_cvt)++;
6196
6197 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6198 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6199 return true;
6200
6201 prev_type = intermediate_type;
6202 prev_mode = intermediate_mode;
ebfd146a
IR
6203 }
6204
4a00c761
JJ
6205 VEC_free (tree, heap, *interm_types);
6206 return false;
ebfd146a
IR
6207}
6208
6209
6210/* Function supportable_narrowing_operation
6211
b8698a0f
L
6212 Check whether an operation represented by the code CODE is a
6213 narrowing operation that is supported by the target platform in
b690cc0f
RG
6214 vector form (i.e., when operating on arguments of type VECTYPE_IN
6215 and producing a result of type VECTYPE_OUT).
b8698a0f 6216
ebfd146a 6217 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6218 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6219 the target platform directly via vector tree-codes.
6220
6221 Output:
b8698a0f
L
6222 - CODE1 is the code of a vector operation to be used when
6223 vectorizing the operation, if available.
ebfd146a
IR
6224 - MULTI_STEP_CVT determines the number of required intermediate steps in
6225 case of multi-step conversion (like int->short->char - in that case
6226 MULTI_STEP_CVT will be 1).
6227 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6228 narrowing operation (short in the above example). */
ebfd146a
IR
6229
6230bool
6231supportable_narrowing_operation (enum tree_code code,
b690cc0f 6232 tree vectype_out, tree vectype_in,
ebfd146a
IR
6233 enum tree_code *code1, int *multi_step_cvt,
6234 VEC (tree, heap) **interm_types)
6235{
6236 enum machine_mode vec_mode;
6237 enum insn_code icode1;
6238 optab optab1, interm_optab;
b690cc0f
RG
6239 tree vectype = vectype_in;
6240 tree narrow_vectype = vectype_out;
ebfd146a 6241 enum tree_code c1;
4a00c761
JJ
6242 tree intermediate_type;
6243 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6244 int i;
4a00c761 6245 bool uns;
ebfd146a 6246
4a00c761 6247 *multi_step_cvt = 0;
ebfd146a
IR
6248 switch (code)
6249 {
6250 CASE_CONVERT:
6251 c1 = VEC_PACK_TRUNC_EXPR;
6252 break;
6253
6254 case FIX_TRUNC_EXPR:
6255 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6256 break;
6257
6258 case FLOAT_EXPR:
6259 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6260 tree code and optabs used for computing the operation. */
6261 return false;
6262
6263 default:
6264 gcc_unreachable ();
6265 }
6266
6267 if (code == FIX_TRUNC_EXPR)
6268 /* The signedness is determined from output operand. */
b690cc0f 6269 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6270 else
6271 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6272
6273 if (!optab1)
6274 return false;
6275
6276 vec_mode = TYPE_MODE (vectype);
947131ba 6277 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6278 return false;
6279
4a00c761
JJ
6280 *code1 = c1;
6281
6282 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6283 return true;
6284
ebfd146a
IR
6285 /* Check if it's a multi-step conversion that can be done using intermediate
6286 types. */
4a00c761
JJ
6287 prev_mode = vec_mode;
6288 if (code == FIX_TRUNC_EXPR)
6289 uns = TYPE_UNSIGNED (vectype_out);
6290 else
6291 uns = TYPE_UNSIGNED (vectype);
6292
6293 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6294 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6295 costly than signed. */
6296 if (code == FIX_TRUNC_EXPR && uns)
6297 {
6298 enum insn_code icode2;
6299
6300 intermediate_type
6301 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6302 interm_optab
6303 = optab_for_tree_code (c1, intermediate_type, optab_default);
6304 if (interm_optab != NULL
6305 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6306 && insn_data[icode1].operand[0].mode
6307 == insn_data[icode2].operand[0].mode)
6308 {
6309 uns = false;
6310 optab1 = interm_optab;
6311 icode1 = icode2;
6312 }
6313 }
ebfd146a 6314
4a00c761
JJ
6315 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6316 intermediate steps in promotion sequence. We try
6317 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6318 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6319 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6320 {
6321 intermediate_mode = insn_data[icode1].operand[0].mode;
6322 intermediate_type
6323 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6324 interm_optab
6325 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6326 optab_default);
6327 if (!interm_optab
6328 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6329 || insn_data[icode1].operand[0].mode != intermediate_mode
6330 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6331 == CODE_FOR_nothing))
6332 break;
6333
6334 VEC_quick_push (tree, *interm_types, intermediate_type);
6335 (*multi_step_cvt)++;
6336
6337 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6338 return true;
6339
6340 prev_mode = intermediate_mode;
6341 optab1 = interm_optab;
ebfd146a
IR
6342 }
6343
4a00c761
JJ
6344 VEC_free (tree, heap, *interm_types);
6345 return false;
ebfd146a 6346}