]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR rtl-optimization/54127 (ICE in maybe_record_trace_start with asm goto, --target...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
82294ec1 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
62f7fd21 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
78c60e3d 26#include "dumpfile.h"
ebfd146a
IR
27#include "tm.h"
28#include "ggc.h"
29#include "tree.h"
30#include "target.h"
31#include "basic-block.h"
cf835838 32#include "gimple-pretty-print.h"
ebfd146a 33#include "tree-flow.h"
ebfd146a 34#include "cfgloop.h"
ebfd146a 35#include "expr.h"
7ee2468b 36#include "recog.h" /* FIXME: for insn_data */
ebfd146a 37#include "optabs.h"
718f9c0f 38#include "diagnostic-core.h"
ebfd146a 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
ebfd146a 41
7ee2468b
SB
42/* For lang_hooks.types.type_for_mode. */
43#include "langhooks.h"
ebfd146a 44
c3e7ee41
BS
45/* Return the vectorized type for the given statement. */
46
47tree
48stmt_vectype (struct _stmt_vec_info *stmt_info)
49{
50 return STMT_VINFO_VECTYPE (stmt_info);
51}
52
53/* Return TRUE iff the given statement is in an inner loop relative to
54 the loop being vectorized. */
55bool
56stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57{
58 gimple stmt = STMT_VINFO_STMT (stmt_info);
59 basic_block bb = gimple_bb (stmt);
60 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
61 struct loop* loop;
62
63 if (!loop_vinfo)
64 return false;
65
66 loop = LOOP_VINFO_LOOP (loop_vinfo);
67
68 return (bb->loop_father == loop->inner);
69}
70
71/* Record the cost of a statement, either by directly informing the
72 target model or by saving it in a vector for later processing.
73 Return a preliminary estimate of the statement's cost. */
74
75unsigned
92345349 76record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 77 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 78 int misalign, enum vect_cost_model_location where)
c3e7ee41 79{
92345349 80 if (body_cost_vec)
c3e7ee41 81 {
92345349
BS
82 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
83 add_stmt_info_to_vec (body_cost_vec, count, kind,
84 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
85 misalign);
c3e7ee41 86 return (unsigned)
92345349 87 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
88
89 }
90 else
91 {
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
94 void *target_cost_data;
95
96 if (loop_vinfo)
97 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
98 else
99 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100
92345349
BS
101 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
102 misalign, where);
c3e7ee41
BS
103 }
104}
105
272c6793
RS
106/* Return a variable of type ELEM_TYPE[NELEMS]. */
107
108static tree
109create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110{
111 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
112 "vect_array");
113}
114
115/* ARRAY is an array of vectors created by create_vector_array.
116 Return an SSA_NAME for the vector in index N. The reference
117 is part of the vectorization of STMT and the vector is associated
118 with scalar destination SCALAR_DEST. */
119
120static tree
121read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
122 tree array, unsigned HOST_WIDE_INT n)
123{
124 tree vect_type, vect, vect_name, array_ref;
125 gimple new_stmt;
126
127 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
128 vect_type = TREE_TYPE (TREE_TYPE (array));
129 vect = vect_create_destination_var (scalar_dest, vect_type);
130 array_ref = build4 (ARRAY_REF, vect_type, array,
131 build_int_cst (size_type_node, n),
132 NULL_TREE, NULL_TREE);
133
134 new_stmt = gimple_build_assign (vect, array_ref);
135 vect_name = make_ssa_name (vect, new_stmt);
136 gimple_assign_set_lhs (new_stmt, vect_name);
137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
138
139 return vect_name;
140}
141
142/* ARRAY is an array of vectors created by create_vector_array.
143 Emit code to store SSA_NAME VECT in index N of the array.
144 The store is part of the vectorization of STMT. */
145
146static void
147write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
148 tree array, unsigned HOST_WIDE_INT n)
149{
150 tree array_ref;
151 gimple new_stmt;
152
153 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
154 build_int_cst (size_type_node, n),
155 NULL_TREE, NULL_TREE);
156
157 new_stmt = gimple_build_assign (array_ref, vect);
158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
159}
160
161/* PTR is a pointer to an array of type TYPE. Return a representation
162 of *PTR. The memory reference replaces those in FIRST_DR
163 (and its group). */
164
165static tree
166create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167{
272c6793
RS
168 tree mem_ref, alias_ptr_type;
169
170 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
171 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
172 /* Arrays have the same alignment as their type. */
644ffefd 173 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
174 return mem_ref;
175}
176
ebfd146a
IR
177/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178
179/* Function vect_mark_relevant.
180
181 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182
183static void
184vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
83197f37
IR
185 enum vect_relevant relevant, bool live_p,
186 bool used_in_pattern)
ebfd146a
IR
187{
188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
189 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
190 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 191 gimple pattern_stmt;
ebfd146a 192
73fbfcad 193 if (dump_enabled_p ())
78c60e3d
SS
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d.", relevant, live_p);
ebfd146a 196
83197f37
IR
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
ebfd146a
IR
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
202 {
83197f37
IR
203 bool found = false;
204 if (!used_in_pattern)
205 {
206 imm_use_iterator imm_iter;
207 use_operand_p use_p;
208 gimple use_stmt;
209 tree lhs;
13c931c9
JJ
210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 212
83197f37
IR
213 if (is_gimple_assign (stmt))
214 lhs = gimple_assign_lhs (stmt);
215 else
216 lhs = gimple_call_lhs (stmt);
ebfd146a 217
83197f37
IR
218 /* This use is out of pattern use, if LHS has other uses that are
219 pattern uses, we should mark the stmt itself, and not the pattern
220 stmt. */
ab0ef706
JJ
221 if (TREE_CODE (lhs) == SSA_NAME)
222 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
223 {
224 if (is_gimple_debug (USE_STMT (use_p)))
225 continue;
226 use_stmt = USE_STMT (use_p);
227
13c931c9
JJ
228 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
229 continue;
230
ab0ef706
JJ
231 if (vinfo_for_stmt (use_stmt)
232 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
233 {
234 found = true;
235 break;
236 }
237 }
83197f37
IR
238 }
239
240 if (!found)
241 {
242 /* This is the last stmt in a sequence that was detected as a
243 pattern that can potentially be vectorized. Don't mark the stmt
244 as relevant/live because it's not going to be vectorized.
245 Instead mark the pattern-stmt that replaces it. */
246
247 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
248
73fbfcad 249 if (dump_enabled_p ())
78c60e3d
SS
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "last stmt in pattern. don't mark"
252 " relevant/live.");
83197f37
IR
253 stmt_info = vinfo_for_stmt (pattern_stmt);
254 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
255 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 stmt = pattern_stmt;
258 }
ebfd146a
IR
259 }
260
261 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
262 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
263 STMT_VINFO_RELEVANT (stmt_info) = relevant;
264
265 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
266 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
267 {
73fbfcad 268 if (dump_enabled_p ())
78c60e3d
SS
269 dump_printf_loc (MSG_NOTE, vect_location,
270 "already marked relevant/live.");
ebfd146a
IR
271 return;
272 }
273
274 VEC_safe_push (gimple, heap, *worklist, stmt);
275}
276
277
278/* Function vect_stmt_relevant_p.
279
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
282
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
287
288 CHECKME: what other side effects would the vectorizer allow? */
289
290static bool
291vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
293{
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
299
8644a673 300 *relevant = vect_unused_in_scope;
ebfd146a
IR
301 *live_p = false;
302
303 /* cond stmt other than loop exit cond. */
b8698a0f
L
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
8644a673 307 *relevant = vect_used_in_scope;
ebfd146a
IR
308
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 311 if (gimple_vdef (stmt))
ebfd146a 312 {
73fbfcad 313 if (dump_enabled_p ())
78c60e3d
SS
314 dump_printf_loc (MSG_NOTE, vect_location,
315 "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 316 *relevant = vect_used_in_scope;
ebfd146a
IR
317 }
318
319 /* uses outside the loop. */
320 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
321 {
322 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
323 {
324 basic_block bb = gimple_bb (USE_STMT (use_p));
325 if (!flow_bb_inside_loop_p (loop, bb))
326 {
73fbfcad 327 if (dump_enabled_p ())
78c60e3d
SS
328 dump_printf_loc (MSG_NOTE, vect_location,
329 "vec_stmt_relevant_p: used out of loop.");
ebfd146a 330
3157b0c2
AO
331 if (is_gimple_debug (USE_STMT (use_p)))
332 continue;
333
ebfd146a
IR
334 /* We expect all such uses to be in the loop exit phis
335 (because of loop closed form) */
336 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
337 gcc_assert (bb == single_exit (loop)->dest);
338
339 *live_p = true;
340 }
341 }
342 }
343
344 return (*live_p || *relevant);
345}
346
347
b8698a0f 348/* Function exist_non_indexing_operands_for_use_p
ebfd146a 349
ff802fa1 350 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
351 used in STMT for anything other than indexing an array. */
352
353static bool
354exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
355{
356 tree operand;
357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 358
ff802fa1 359 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
360 reference in STMT, then any operand that corresponds to USE
361 is not indexing an array. */
362 if (!STMT_VINFO_DATA_REF (stmt_info))
363 return true;
59a05b0c 364
ebfd146a
IR
365 /* STMT has a data_ref. FORNOW this means that its of one of
366 the following forms:
367 -1- ARRAY_REF = var
368 -2- var = ARRAY_REF
369 (This should have been verified in analyze_data_refs).
370
371 'var' in the second case corresponds to a def, not a use,
b8698a0f 372 so USE cannot correspond to any operands that are not used
ebfd146a
IR
373 for array indexing.
374
375 Therefore, all we need to check is if STMT falls into the
376 first case, and whether var corresponds to USE. */
ebfd146a
IR
377
378 if (!gimple_assign_copy_p (stmt))
379 return false;
59a05b0c
EB
380 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
381 return false;
ebfd146a 382 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
383 if (TREE_CODE (operand) != SSA_NAME)
384 return false;
385
386 if (operand == use)
387 return true;
388
389 return false;
390}
391
392
b8698a0f 393/*
ebfd146a
IR
394 Function process_use.
395
396 Inputs:
397 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 398 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 399 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 400 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
401 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
402 be performed.
ebfd146a
IR
403
404 Outputs:
405 Generally, LIVE_P and RELEVANT are used to define the liveness and
406 relevance info of the DEF_STMT of this USE:
407 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
408 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 Exceptions:
410 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 411 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 412 of the respective DEF_STMT is left unchanged.
b8698a0f
L
413 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
414 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
415 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
416 be modified accordingly.
417
418 Return true if everything is as expected. Return false otherwise. */
419
420static bool
b8698a0f 421process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
aec7ae7d
JJ
422 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
423 bool force)
ebfd146a
IR
424{
425 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
426 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
427 stmt_vec_info dstmt_vinfo;
428 basic_block bb, def_bb;
429 tree def;
430 gimple def_stmt;
431 enum vect_def_type dt;
432
b8698a0f 433 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 434 that are used for address computation are not considered relevant. */
aec7ae7d 435 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
436 return true;
437
24ee1384 438 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 439 {
73fbfcad 440 if (dump_enabled_p ())
78c60e3d
SS
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "not vectorized: unsupported use in stmt.");
ebfd146a
IR
443 return false;
444 }
445
446 if (!def_stmt || gimple_nop_p (def_stmt))
447 return true;
448
449 def_bb = gimple_bb (def_stmt);
450 if (!flow_bb_inside_loop_p (loop, def_bb))
451 {
73fbfcad 452 if (dump_enabled_p ())
78c60e3d 453 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
ebfd146a
IR
454 return true;
455 }
456
b8698a0f
L
457 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
458 DEF_STMT must have already been processed, because this should be the
459 only way that STMT, which is a reduction-phi, was put in the worklist,
460 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
461 check that everything is as expected, and we are done. */
462 dstmt_vinfo = vinfo_for_stmt (def_stmt);
463 bb = gimple_bb (stmt);
464 if (gimple_code (stmt) == GIMPLE_PHI
465 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 && gimple_code (def_stmt) != GIMPLE_PHI
467 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
468 && bb->loop_father == def_bb->loop_father)
469 {
73fbfcad 470 if (dump_enabled_p ())
78c60e3d
SS
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "reduc-stmt defining reduc-phi in the same nest.");
ebfd146a
IR
473 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
474 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
475 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 476 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 477 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
478 return true;
479 }
480
481 /* case 3a: outer-loop stmt defining an inner-loop stmt:
482 outer-loop-header-bb:
483 d = def_stmt
484 inner-loop:
485 stmt # use (d)
486 outer-loop-tail-bb:
487 ... */
488 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
489 {
73fbfcad 490 if (dump_enabled_p ())
78c60e3d
SS
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 493
ebfd146a
IR
494 switch (relevant)
495 {
8644a673 496 case vect_unused_in_scope:
7c5222ff
IR
497 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
498 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 499 break;
7c5222ff 500
ebfd146a 501 case vect_used_in_outer_by_reduction:
7c5222ff 502 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
503 relevant = vect_used_by_reduction;
504 break;
7c5222ff 505
ebfd146a 506 case vect_used_in_outer:
7c5222ff 507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 508 relevant = vect_used_in_scope;
ebfd146a 509 break;
7c5222ff 510
8644a673 511 case vect_used_in_scope:
ebfd146a
IR
512 break;
513
514 default:
515 gcc_unreachable ();
b8698a0f 516 }
ebfd146a
IR
517 }
518
519 /* case 3b: inner-loop stmt defining an outer-loop stmt:
520 outer-loop-header-bb:
521 ...
522 inner-loop:
523 d = def_stmt
06066f92 524 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
525 stmt # use (d) */
526 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
527 {
73fbfcad 528 if (dump_enabled_p ())
78c60e3d
SS
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 531
ebfd146a
IR
532 switch (relevant)
533 {
8644a673 534 case vect_unused_in_scope:
b8698a0f 535 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 536 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 537 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
538 break;
539
ebfd146a
IR
540 case vect_used_by_reduction:
541 relevant = vect_used_in_outer_by_reduction;
542 break;
543
8644a673 544 case vect_used_in_scope:
ebfd146a
IR
545 relevant = vect_used_in_outer;
546 break;
547
548 default:
549 gcc_unreachable ();
550 }
551 }
552
83197f37
IR
553 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
554 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
555 return true;
556}
557
558
559/* Function vect_mark_stmts_to_be_vectorized.
560
561 Not all stmts in the loop need to be vectorized. For example:
562
563 for i...
564 for j...
565 1. T0 = i + j
566 2. T1 = a[T0]
567
568 3. j = j + 1
569
570 Stmt 1 and 3 do not need to be vectorized, because loop control and
571 addressing of vectorized data-refs are handled differently.
572
573 This pass detects such stmts. */
574
575bool
576vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
577{
578 VEC(gimple,heap) *worklist;
579 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
580 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
581 unsigned int nbbs = loop->num_nodes;
582 gimple_stmt_iterator si;
583 gimple stmt;
584 unsigned int i;
585 stmt_vec_info stmt_vinfo;
586 basic_block bb;
587 gimple phi;
588 bool live_p;
06066f92
IR
589 enum vect_relevant relevant, tmp_relevant;
590 enum vect_def_type def_type;
ebfd146a 591
73fbfcad 592 if (dump_enabled_p ())
78c60e3d
SS
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "=== vect_mark_stmts_to_be_vectorized ===");
ebfd146a
IR
595
596 worklist = VEC_alloc (gimple, heap, 64);
597
598 /* 1. Init worklist. */
599 for (i = 0; i < nbbs; i++)
600 {
601 bb = bbs[i];
602 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 603 {
ebfd146a 604 phi = gsi_stmt (si);
73fbfcad 605 if (dump_enabled_p ())
ebfd146a 606 {
78c60e3d
SS
607 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
608 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
609 }
610
611 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 612 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
613 }
614 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
615 {
616 stmt = gsi_stmt (si);
73fbfcad 617 if (dump_enabled_p ())
ebfd146a 618 {
78c60e3d
SS
619 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 621 }
ebfd146a
IR
622
623 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 624 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
625 }
626 }
627
628 /* 2. Process_worklist */
629 while (VEC_length (gimple, worklist) > 0)
630 {
631 use_operand_p use_p;
632 ssa_op_iter iter;
633
634 stmt = VEC_pop (gimple, worklist);
73fbfcad 635 if (dump_enabled_p ())
ebfd146a 636 {
78c60e3d
SS
637 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
638 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
639 }
640
b8698a0f
L
641 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
642 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
643 liveness and relevance properties of STMT. */
644 stmt_vinfo = vinfo_for_stmt (stmt);
645 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
646 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
647
648 /* Generally, the liveness and relevance properties of STMT are
649 propagated as is to the DEF_STMTs of its USEs:
650 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
651 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
652
653 One exception is when STMT has been identified as defining a reduction
654 variable; in this case we set the liveness/relevance as follows:
655 live_p = false
656 relevant = vect_used_by_reduction
657 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 658 those that are used by a reduction computation, and those that are
ff802fa1 659 (also) used by a regular computation. This allows us later on to
b8698a0f 660 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 661 order of the results that they produce does not have to be kept. */
ebfd146a 662
06066f92
IR
663 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
664 tmp_relevant = relevant;
665 switch (def_type)
ebfd146a 666 {
06066f92
IR
667 case vect_reduction_def:
668 switch (tmp_relevant)
669 {
670 case vect_unused_in_scope:
671 relevant = vect_used_by_reduction;
672 break;
673
674 case vect_used_by_reduction:
675 if (gimple_code (stmt) == GIMPLE_PHI)
676 break;
677 /* fall through */
678
679 default:
73fbfcad 680 if (dump_enabled_p ())
78c60e3d
SS
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "unsupported use of reduction.");
06066f92
IR
683 VEC_free (gimple, heap, worklist);
684 return false;
685 }
686
b8698a0f 687 live_p = false;
06066f92 688 break;
b8698a0f 689
06066f92
IR
690 case vect_nested_cycle:
691 if (tmp_relevant != vect_unused_in_scope
692 && tmp_relevant != vect_used_in_outer_by_reduction
693 && tmp_relevant != vect_used_in_outer)
694 {
73fbfcad 695 if (dump_enabled_p ())
78c60e3d
SS
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "unsupported use of nested cycle.");
7c5222ff 698
06066f92
IR
699 VEC_free (gimple, heap, worklist);
700 return false;
701 }
7c5222ff 702
b8698a0f
L
703 live_p = false;
704 break;
705
06066f92
IR
706 case vect_double_reduction_def:
707 if (tmp_relevant != vect_unused_in_scope
708 && tmp_relevant != vect_used_by_reduction)
709 {
73fbfcad 710 if (dump_enabled_p ())
78c60e3d
SS
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of double reduction.");
7c5222ff
IR
713
714 VEC_free (gimple, heap, worklist);
715 return false;
06066f92
IR
716 }
717
718 live_p = false;
b8698a0f 719 break;
7c5222ff 720
06066f92
IR
721 default:
722 break;
7c5222ff 723 }
b8698a0f 724
aec7ae7d 725 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
726 {
727 /* Pattern statements are not inserted into the code, so
728 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
729 have to scan the RHS or function arguments instead. */
730 if (is_gimple_assign (stmt))
731 {
69d2aade
JJ
732 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
733 tree op = gimple_assign_rhs1 (stmt);
734
735 i = 1;
736 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 {
738 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 739 live_p, relevant, &worklist, false)
69d2aade 740 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 741 live_p, relevant, &worklist, false))
69d2aade
JJ
742 {
743 VEC_free (gimple, heap, worklist);
744 return false;
745 }
746 i = 2;
747 }
748 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 749 {
69d2aade 750 op = gimple_op (stmt, i);
9d5e7640 751 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 752 &worklist, false))
9d5e7640
IR
753 {
754 VEC_free (gimple, heap, worklist);
755 return false;
756 }
757 }
758 }
759 else if (is_gimple_call (stmt))
760 {
761 for (i = 0; i < gimple_call_num_args (stmt); i++)
762 {
763 tree arg = gimple_call_arg (stmt, i);
764 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 765 &worklist, false))
9d5e7640
IR
766 {
767 VEC_free (gimple, heap, worklist);
768 return false;
769 }
770 }
771 }
772 }
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 {
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 778 &worklist, false))
9d5e7640
IR
779 {
780 VEC_free (gimple, heap, worklist);
781 return false;
782 }
783 }
aec7ae7d
JJ
784
785 if (STMT_VINFO_GATHER_P (stmt_vinfo))
786 {
787 tree off;
788 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
789 gcc_assert (decl);
790 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
791 &worklist, true))
792 {
793 VEC_free (gimple, heap, worklist);
794 return false;
795 }
796 }
ebfd146a
IR
797 } /* while worklist */
798
799 VEC_free (gimple, heap, worklist);
800 return true;
801}
802
803
b8698a0f 804/* Function vect_model_simple_cost.
ebfd146a 805
b8698a0f 806 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
807 single op. Right now, this does not account for multiple insns that could
808 be generated for the single vector op. We will handle that shortly. */
809
810void
b8698a0f 811vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
812 enum vect_def_type *dt,
813 stmt_vector_for_cost *prologue_cost_vec,
814 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
815{
816 int i;
92345349 817 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
818
819 /* The SLP costs were already calculated during SLP tree build. */
820 if (PURE_SLP_STMT (stmt_info))
821 return;
822
ebfd146a
IR
823 /* FORNOW: Assuming maximum 2 args per stmts. */
824 for (i = 0; i < 2; i++)
92345349
BS
825 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
826 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
827 stmt_info, 0, vect_prologue);
c3e7ee41
BS
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
830 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
831 stmt_info, 0, vect_body);
c3e7ee41 832
73fbfcad 833 if (dump_enabled_p ())
78c60e3d
SS
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
837}
838
839
8bd37302
BS
840/* Model cost for type demotion and promotion operations. PWR is normally
841 zero for single-step promotions and demotions. It will be one if
842 two-step promotion/demotion is required, and so on. Each additional
843 step doubles the number of instructions required. */
844
845static void
846vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
847 enum vect_def_type *dt, int pwr)
848{
849 int i, tmp;
92345349 850 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
853 void *target_cost_data;
8bd37302
BS
854
855 /* The SLP costs were already calculated during SLP tree build. */
856 if (PURE_SLP_STMT (stmt_info))
857 return;
858
c3e7ee41
BS
859 if (loop_vinfo)
860 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
861 else
862 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
863
8bd37302
BS
864 for (i = 0; i < pwr + 1; i++)
865 {
866 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
867 (i + 1) : i;
c3e7ee41 868 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
869 vec_promote_demote, stmt_info, 0,
870 vect_body);
8bd37302
BS
871 }
872
873 /* FORNOW: Assuming maximum 2 args per stmts. */
874 for (i = 0; i < 2; i++)
92345349
BS
875 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
876 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
877 stmt_info, 0, vect_prologue);
8bd37302 878
73fbfcad 879 if (dump_enabled_p ())
78c60e3d
SS
880 dump_printf_loc (MSG_NOTE, vect_location,
881 "vect_model_promotion_demotion_cost: inside_cost = %d, "
882 "prologue_cost = %d .", inside_cost, prologue_cost);
8bd37302
BS
883}
884
0d0293ac 885/* Function vect_cost_group_size
b8698a0f 886
0d0293ac 887 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
888 load or store of a group, else return 1. This ensures that group size is
889 only returned once per group. */
890
891static int
0d0293ac 892vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 893{
e14c1050 894 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
895
896 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 897 return GROUP_SIZE (stmt_info);
ebfd146a
IR
898
899 return 1;
900}
901
902
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 910 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
911 slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
914{
915 int group_size;
92345349 916 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
917 struct data_reference *first_dr;
918 gimple first_stmt;
ebfd146a
IR
919
920 /* The SLP costs were already calculated during SLP tree build. */
921 if (PURE_SLP_STMT (stmt_info))
922 return;
923
8644a673 924 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
925 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
926 stmt_info, 0, vect_prologue);
ebfd146a 927
0d0293ac
MM
928 /* Grouped access? */
929 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
930 {
931 if (slp_node)
932 {
933 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
934 group_size = 1;
935 }
936 else
937 {
e14c1050 938 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 939 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
940 }
941
942 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 }
0d0293ac 944 /* Not a grouped access. */
ebfd146a 945 else
720f5239
IR
946 {
947 group_size = 1;
948 first_dr = STMT_VINFO_DATA_REF (stmt_info);
949 }
ebfd146a 950
272c6793 951 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 952 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
953 access is instead being provided by a permute-and-store operation,
954 include the cost of the permutes. */
955 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
956 {
957 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
958
959 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
ebfd146a 962
73fbfcad 963 if (dump_enabled_p ())
78c60e3d
SS
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .",
966 group_size);
ebfd146a
IR
967 }
968
969 /* Costs of the stores. */
92345349 970 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 971
73fbfcad 972 if (dump_enabled_p ())
78c60e3d
SS
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
976}
977
978
720f5239
IR
979/* Calculate cost of DR's memory access. */
980void
981vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 982 unsigned int *inside_cost,
92345349 983 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
984{
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
986 gimple stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
988
989 switch (alignment_support_scheme)
990 {
991 case dr_aligned:
992 {
92345349
BS
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
720f5239 996
73fbfcad 997 if (dump_enabled_p ())
78c60e3d
SS
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.");
720f5239
IR
1000 break;
1001 }
1002
1003 case dr_unaligned_supported:
1004 {
720f5239 1005 /* Here, we assign an additional cost for the unaligned store. */
92345349 1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1007 unaligned_store, stmt_info,
92345349 1008 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1009 if (dump_enabled_p ())
78c60e3d
SS
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.");
720f5239
IR
1013 break;
1014 }
1015
38eec4c6
UW
1016 case dr_unaligned_unsupported:
1017 {
1018 *inside_cost = VECT_MAX_COST;
1019
73fbfcad 1020 if (dump_enabled_p ())
78c60e3d
SS
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.");
38eec4c6
UW
1023 break;
1024 }
1025
720f5239
IR
1026 default:
1027 gcc_unreachable ();
1028 }
1029}
1030
1031
ebfd146a
IR
1032/* Function vect_model_load_cost
1033
0d0293ac
MM
1034 Models cost for loads. In the case of grouped accesses, the last access
1035 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1036 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1037 access scheme chosen. */
1038
1039void
92345349
BS
1040vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 bool load_lanes_p, slp_tree slp_node,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1044{
1045 int group_size;
ebfd146a
IR
1046 gimple first_stmt;
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1048 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1049
1050 /* The SLP costs were already calculated during SLP tree build. */
1051 if (PURE_SLP_STMT (stmt_info))
1052 return;
1053
0d0293ac 1054 /* Grouped accesses? */
e14c1050 1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1056 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1057 {
0d0293ac 1058 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1059 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 }
0d0293ac 1061 /* Not a grouped access. */
ebfd146a
IR
1062 else
1063 {
1064 group_size = 1;
1065 first_dr = dr;
1066 }
1067
272c6793 1068 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1069 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1070 access is instead being provided by a load-and-permute operation,
1071 include the cost of the permutes. */
1072 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1073 {
1074 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1075 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1076 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
ebfd146a 1078
73fbfcad 1079 if (dump_enabled_p ())
78c60e3d
SS
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .",
1082 group_size);
ebfd146a
IR
1083 }
1084
1085 /* The loads themselves. */
a82960aa
RG
1086 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1087 {
a21892ad
BS
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1090 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1092 scalar_load, stmt_info, 0, vect_body);
1093 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1094 stmt_info, 0, vect_body);
a82960aa
RG
1095 }
1096 else
1097 vect_get_load_cost (first_dr, ncopies,
1098 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1099 || group_size > 1 || slp_node),
92345349
BS
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
720f5239 1102
73fbfcad 1103 if (dump_enabled_p ())
78c60e3d
SS
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .", inside_cost, prologue_cost);
720f5239
IR
1107}
1108
1109
1110/* Calculate cost of DR's memory access. */
1111void
1112vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1113 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
720f5239
IR
1118{
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1120 gimple stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1122
1123 switch (alignment_support_scheme)
ebfd146a
IR
1124 {
1125 case dr_aligned:
1126 {
92345349
BS
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
ebfd146a 1129
73fbfcad 1130 if (dump_enabled_p ())
78c60e3d
SS
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.");
ebfd146a
IR
1133
1134 break;
1135 }
1136 case dr_unaligned_supported:
1137 {
720f5239 1138 /* Here, we assign an additional cost for the unaligned load. */
92345349 1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1140 unaligned_load, stmt_info,
92345349 1141 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1142
73fbfcad 1143 if (dump_enabled_p ())
78c60e3d
SS
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.");
ebfd146a
IR
1147
1148 break;
1149 }
1150 case dr_explicit_realign:
1151 {
92345349
BS
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1156
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
92345349 1159 prologue costs. */
ebfd146a 1160 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
ebfd146a 1163
73fbfcad 1164 if (dump_enabled_p ())
78c60e3d
SS
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign");
8bd37302 1167
ebfd146a
IR
1168 break;
1169 }
1170 case dr_explicit_realign_optimized:
1171 {
73fbfcad 1172 if (dump_enabled_p ())
78c60e3d
SS
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.");
ebfd146a
IR
1176
1177 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1178 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1179 if this is an access in a group of loads, which provide grouped
ebfd146a 1180 access, then the above cost should only be considered for one
ff802fa1 1181 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1182 and a realignment op. */
1183
92345349 1184 if (add_realign_cost && record_prologue_costs)
ebfd146a 1185 {
92345349
BS
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
ebfd146a 1189 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
ebfd146a
IR
1193 }
1194
92345349
BS
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
8bd37302 1199
73fbfcad 1200 if (dump_enabled_p ())
78c60e3d
SS
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized");
8bd37302 1203
ebfd146a
IR
1204 break;
1205 }
1206
38eec4c6
UW
1207 case dr_unaligned_unsupported:
1208 {
1209 *inside_cost = VECT_MAX_COST;
1210
73fbfcad 1211 if (dump_enabled_p ())
78c60e3d
SS
1212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1213 "vect_model_load_cost: unsupported access.");
38eec4c6
UW
1214 break;
1215 }
1216
ebfd146a
IR
1217 default:
1218 gcc_unreachable ();
1219 }
ebfd146a
IR
1220}
1221
418b7df3
RG
1222/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1223 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1224
418b7df3
RG
1225static void
1226vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1227{
ebfd146a 1228 if (gsi)
418b7df3 1229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1230 else
1231 {
418b7df3 1232 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1233 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1234
a70d6342
IR
1235 if (loop_vinfo)
1236 {
1237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1238 basic_block new_bb;
1239 edge pe;
a70d6342
IR
1240
1241 if (nested_in_vect_loop_p (loop, stmt))
1242 loop = loop->inner;
b8698a0f 1243
a70d6342 1244 pe = loop_preheader_edge (loop);
418b7df3 1245 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1246 gcc_assert (!new_bb);
1247 }
1248 else
1249 {
1250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1251 basic_block bb;
1252 gimple_stmt_iterator gsi_bb_start;
1253
1254 gcc_assert (bb_vinfo);
1255 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1256 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1257 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1258 }
ebfd146a
IR
1259 }
1260
73fbfcad 1261 if (dump_enabled_p ())
ebfd146a 1262 {
78c60e3d
SS
1263 dump_printf_loc (MSG_NOTE, vect_location,
1264 "created new init_stmt: ");
1265 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1266 }
418b7df3
RG
1267}
1268
1269/* Function vect_init_vector.
ebfd146a 1270
5467ee52
RG
1271 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1272 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1273 vector type a vector with all elements equal to VAL is created first.
1274 Place the initialization at BSI if it is not NULL. Otherwise, place the
1275 initialization at the loop preheader.
418b7df3
RG
1276 Return the DEF of INIT_STMT.
1277 It will be used in the vectorization of STMT. */
1278
1279tree
5467ee52 1280vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1281{
1282 tree new_var;
1283 gimple init_stmt;
1284 tree vec_oprnd;
1285 tree new_temp;
1286
5467ee52
RG
1287 if (TREE_CODE (type) == VECTOR_TYPE
1288 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1289 {
5467ee52 1290 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1291 {
5467ee52
RG
1292 if (CONSTANT_CLASS_P (val))
1293 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1294 else
1295 {
83d5977e 1296 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1297 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1298 new_temp, val,
418b7df3 1299 NULL_TREE);
418b7df3 1300 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1301 val = new_temp;
418b7df3
RG
1302 }
1303 }
5467ee52 1304 val = build_vector_from_val (type, val);
418b7df3
RG
1305 }
1306
5467ee52 1307 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1308 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1309 new_temp = make_ssa_name (new_var, init_stmt);
1310 gimple_assign_set_lhs (init_stmt, new_temp);
1311 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1312 vec_oprnd = gimple_assign_lhs (init_stmt);
1313 return vec_oprnd;
1314}
1315
a70d6342 1316
ebfd146a
IR
1317/* Function vect_get_vec_def_for_operand.
1318
ff802fa1 1319 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1320 used in the vectorized stmt for STMT.
1321
1322 In the case that OP is an SSA_NAME which is defined in the loop, then
1323 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1324
1325 In case OP is an invariant or constant, a new stmt that creates a vector def
1326 needs to be introduced. */
1327
1328tree
1329vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1330{
1331 tree vec_oprnd;
1332 gimple vec_stmt;
1333 gimple def_stmt;
1334 stmt_vec_info def_stmt_info = NULL;
1335 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1336 unsigned int nunits;
ebfd146a 1337 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1338 tree def;
ebfd146a
IR
1339 enum vect_def_type dt;
1340 bool is_simple_use;
1341 tree vector_type;
1342
73fbfcad 1343 if (dump_enabled_p ())
ebfd146a 1344 {
78c60e3d
SS
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_get_vec_def_for_operand: ");
1347 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
ebfd146a
IR
1348 }
1349
24ee1384
IR
1350 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1351 &def_stmt, &def, &dt);
ebfd146a 1352 gcc_assert (is_simple_use);
73fbfcad 1353 if (dump_enabled_p ())
ebfd146a 1354 {
78c60e3d 1355 int loc_printed = 0;
ebfd146a
IR
1356 if (def)
1357 {
78c60e3d
SS
1358 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1359 loc_printed = 1;
1360 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
ebfd146a
IR
1361 }
1362 if (def_stmt)
1363 {
78c60e3d
SS
1364 if (loc_printed)
1365 dump_printf (MSG_NOTE, " def_stmt = ");
1366 else
1367 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1368 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1369 }
1370 }
1371
1372 switch (dt)
1373 {
1374 /* Case 1: operand is a constant. */
1375 case vect_constant_def:
1376 {
7569a6cc
RG
1377 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1378 gcc_assert (vector_type);
9dc3f7de 1379 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1380
b8698a0f 1381 if (scalar_def)
ebfd146a
IR
1382 *scalar_def = op;
1383
1384 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1385 if (dump_enabled_p ())
78c60e3d
SS
1386 dump_printf_loc (MSG_NOTE, vect_location,
1387 "Create vector_cst. nunits = %d", nunits);
ebfd146a 1388
418b7df3 1389 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1390 }
1391
1392 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1393 case vect_external_def:
ebfd146a
IR
1394 {
1395 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1396 gcc_assert (vector_type);
ebfd146a 1397
b8698a0f 1398 if (scalar_def)
ebfd146a
IR
1399 *scalar_def = def;
1400
1401 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1402 if (dump_enabled_p ())
78c60e3d 1403 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
ebfd146a 1404
418b7df3 1405 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1406 }
1407
1408 /* Case 3: operand is defined inside the loop. */
8644a673 1409 case vect_internal_def:
ebfd146a 1410 {
b8698a0f 1411 if (scalar_def)
ebfd146a
IR
1412 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1413
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1416
ebfd146a 1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1432 }
1433
1434 /* Case 4: operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
06066f92 1436 case vect_double_reduction_def:
7c5222ff 1437 case vect_nested_cycle:
ebfd146a
IR
1438 {
1439 struct loop *loop;
1440
1441 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1442 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1443
1444 /* Get the def before the loop */
1445 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1446 return get_initial_def_for_reduction (stmt, op, scalar_def);
1447 }
1448
1449 /* Case 5: operand is defined by loop-header phi - induction. */
1450 case vect_induction_def:
1451 {
1452 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1453
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1457 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1458 vec_oprnd = PHI_RESULT (vec_stmt);
1459 else
1460 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1461 return vec_oprnd;
1462 }
1463
1464 default:
1465 gcc_unreachable ();
1466 }
1467}
1468
1469
1470/* Function vect_get_vec_def_for_stmt_copy
1471
ff802fa1 1472 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1475 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1477 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1478 DT is the type of the vector def VEC_OPRND.
1479
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1483 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1484 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1488 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1491
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1493
ebfd146a
IR
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1497 VS1.3: vx.3 = memref3
ebfd146a
IR
1498
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1503
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
b8698a0f
L
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1508 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1509 returns the vector-def 'vx.0'.
1510
b8698a0f
L
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515
b8698a0f
L
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525
1526tree
1527vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528{
1529 gimple vec_stmt_for_operand;
1530 stmt_vec_info def_stmt_info;
1531
1532 /* Do nothing; can reuse same def. */
8644a673 1533 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1534 return vec_oprnd;
1535
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
1541 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1542 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1543 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1544 else
1545 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1546 return vec_oprnd;
1547}
1548
1549
1550/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1551 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1552
1553static void
b8698a0f
L
1554vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1555 VEC(tree,heap) **vec_oprnds0,
ebfd146a
IR
1556 VEC(tree,heap) **vec_oprnds1)
1557{
1558 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1559
1560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1561 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1562
1563 if (vec_oprnds1 && *vec_oprnds1)
1564 {
1565 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1566 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1567 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1568 }
1569}
1570
1571
d092494c
IR
1572/* Get vectorized definitions for OP0 and OP1.
1573 REDUC_INDEX is the index of reduction operand in case of reduction,
1574 and -1 otherwise. */
ebfd146a 1575
d092494c 1576void
ebfd146a 1577vect_get_vec_defs (tree op0, tree op1, gimple stmt,
d092494c
IR
1578 VEC (tree, heap) **vec_oprnds0,
1579 VEC (tree, heap) **vec_oprnds1,
1580 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1581{
1582 if (slp_node)
d092494c
IR
1583 {
1584 int nops = (op1 == NULL_TREE) ? 1 : 2;
1585 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1586 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1587
1588 VEC_quick_push (tree, ops, op0);
1589 if (op1)
1590 VEC_quick_push (tree, ops, op1);
1591
1592 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1593
1594 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1595 if (op1)
1596 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1597
1598 VEC_free (tree, heap, ops);
1599 VEC_free (slp_void_p, heap, vec_defs);
1600 }
ebfd146a
IR
1601 else
1602 {
1603 tree vec_oprnd;
1604
b8698a0f
L
1605 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1606 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
ebfd146a
IR
1607 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1608
1609 if (op1)
1610 {
b8698a0f
L
1611 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1612 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
ebfd146a
IR
1613 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1614 }
1615 }
1616}
1617
1618
1619/* Function vect_finish_stmt_generation.
1620
1621 Insert a new stmt. */
1622
1623void
1624vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1625 gimple_stmt_iterator *gsi)
1626{
1627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1628 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1630
1631 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1632
54e8e2c3
RG
1633 if (!gsi_end_p (*gsi)
1634 && gimple_has_mem_ops (vec_stmt))
1635 {
1636 gimple at_stmt = gsi_stmt (*gsi);
1637 tree vuse = gimple_vuse (at_stmt);
1638 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1639 {
1640 tree vdef = gimple_vdef (at_stmt);
1641 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1642 /* If we have an SSA vuse and insert a store, update virtual
1643 SSA form to avoid triggering the renamer. Do so only
1644 if we can easily see all uses - which is what almost always
1645 happens with the way vectorized stmts are inserted. */
1646 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1647 && ((is_gimple_assign (vec_stmt)
1648 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1649 || (is_gimple_call (vec_stmt)
1650 && !(gimple_call_flags (vec_stmt)
1651 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1652 {
1653 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1654 gimple_set_vdef (vec_stmt, new_vdef);
1655 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1656 }
1657 }
1658 }
ebfd146a
IR
1659 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1660
b8698a0f 1661 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1662 bb_vinfo));
ebfd146a 1663
73fbfcad 1664 if (dump_enabled_p ())
ebfd146a 1665 {
78c60e3d
SS
1666 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1668 }
1669
ad885386 1670 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1671}
1672
1673/* Checks if CALL can be vectorized in type VECTYPE. Returns
1674 a function declaration if the target has a vectorized version
1675 of the function, or NULL_TREE if the function cannot be vectorized. */
1676
1677tree
1678vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1679{
1680 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1681
1682 /* We only handle functions that do not read or clobber memory -- i.e.
1683 const or novops ones. */
1684 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1685 return NULL_TREE;
1686
1687 if (!fndecl
1688 || TREE_CODE (fndecl) != FUNCTION_DECL
1689 || !DECL_BUILT_IN (fndecl))
1690 return NULL_TREE;
1691
62f7fd21 1692 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1693 vectype_in);
1694}
1695
1696/* Function vectorizable_call.
1697
b8698a0f
L
1698 Check if STMT performs a function call that can be vectorized.
1699 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1700 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1701 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1702
1703static bool
190c2236
JJ
1704vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1705 slp_tree slp_node)
ebfd146a
IR
1706{
1707 tree vec_dest;
1708 tree scalar_dest;
1709 tree op, type;
1710 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1712 tree vectype_out, vectype_in;
1713 int nunits_in;
1714 int nunits_out;
1715 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1716 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1717 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1718 gimple def_stmt;
0502fb85
UB
1719 enum vect_def_type dt[3]
1720 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1721 gimple new_stmt = NULL;
ebfd146a
IR
1722 int ncopies, j;
1723 VEC(tree, heap) *vargs = NULL;
1724 enum { NARROW, NONE, WIDEN } modifier;
1725 size_t i, nargs;
9d5e7640 1726 tree lhs;
ebfd146a 1727
190c2236 1728 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1729 return false;
1730
8644a673 1731 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1732 return false;
1733
ebfd146a
IR
1734 /* Is STMT a vectorizable call? */
1735 if (!is_gimple_call (stmt))
1736 return false;
1737
1738 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1739 return false;
1740
822ba6d7 1741 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1742 return false;
1743
b690cc0f
RG
1744 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1745
ebfd146a
IR
1746 /* Process function arguments. */
1747 rhs_type = NULL_TREE;
b690cc0f 1748 vectype_in = NULL_TREE;
ebfd146a
IR
1749 nargs = gimple_call_num_args (stmt);
1750
1b1562a5
MM
1751 /* Bail out if the function has more than three arguments, we do not have
1752 interesting builtin functions to vectorize with more than two arguments
1753 except for fma. No arguments is also not good. */
1754 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1755 return false;
1756
1757 for (i = 0; i < nargs; i++)
1758 {
b690cc0f
RG
1759 tree opvectype;
1760
ebfd146a
IR
1761 op = gimple_call_arg (stmt, i);
1762
1763 /* We can only handle calls with arguments of the same type. */
1764 if (rhs_type
8533c9d8 1765 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1766 {
73fbfcad 1767 if (dump_enabled_p ())
78c60e3d
SS
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "argument types differ.");
ebfd146a
IR
1770 return false;
1771 }
b690cc0f
RG
1772 if (!rhs_type)
1773 rhs_type = TREE_TYPE (op);
ebfd146a 1774
24ee1384 1775 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1776 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1777 {
73fbfcad 1778 if (dump_enabled_p ())
78c60e3d
SS
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1780 "use not simple.");
ebfd146a
IR
1781 return false;
1782 }
ebfd146a 1783
b690cc0f
RG
1784 if (!vectype_in)
1785 vectype_in = opvectype;
1786 else if (opvectype
1787 && opvectype != vectype_in)
1788 {
73fbfcad 1789 if (dump_enabled_p ())
78c60e3d
SS
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1791 "argument vector types differ.");
b690cc0f
RG
1792 return false;
1793 }
1794 }
1795 /* If all arguments are external or constant defs use a vector type with
1796 the same size as the output vector type. */
ebfd146a 1797 if (!vectype_in)
b690cc0f 1798 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1799 if (vec_stmt)
1800 gcc_assert (vectype_in);
1801 if (!vectype_in)
1802 {
73fbfcad 1803 if (dump_enabled_p ())
7d8930a0 1804 {
78c60e3d
SS
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "no vectype for scalar type ");
1807 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
7d8930a0
IR
1808 }
1809
1810 return false;
1811 }
ebfd146a
IR
1812
1813 /* FORNOW */
b690cc0f
RG
1814 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1815 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1816 if (nunits_in == nunits_out / 2)
1817 modifier = NARROW;
1818 else if (nunits_out == nunits_in)
1819 modifier = NONE;
1820 else if (nunits_out == nunits_in / 2)
1821 modifier = WIDEN;
1822 else
1823 return false;
1824
1825 /* For now, we only vectorize functions if a target specific builtin
1826 is available. TODO -- in some cases, it might be profitable to
1827 insert the calls for pieces of the vector, in order to be able
1828 to vectorize other operations in the loop. */
1829 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1830 if (fndecl == NULL_TREE)
1831 {
73fbfcad 1832 if (dump_enabled_p ())
78c60e3d
SS
1833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1834 "function is not vectorizable.");
ebfd146a
IR
1835
1836 return false;
1837 }
1838
5006671f 1839 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1840
190c2236
JJ
1841 if (slp_node || PURE_SLP_STMT (stmt_info))
1842 ncopies = 1;
1843 else if (modifier == NARROW)
ebfd146a
IR
1844 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1845 else
1846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1847
1848 /* Sanity check: make sure that at least one copy of the vectorized stmt
1849 needs to be generated. */
1850 gcc_assert (ncopies >= 1);
1851
1852 if (!vec_stmt) /* transformation not required. */
1853 {
1854 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1855 if (dump_enabled_p ())
78c60e3d 1856 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
c3e7ee41 1857 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1858 return true;
1859 }
1860
1861 /** Transform. **/
1862
73fbfcad 1863 if (dump_enabled_p ())
78c60e3d 1864 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
ebfd146a
IR
1865
1866 /* Handle def. */
1867 scalar_dest = gimple_call_lhs (stmt);
1868 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1869
1870 prev_stmt_info = NULL;
1871 switch (modifier)
1872 {
1873 case NONE:
1874 for (j = 0; j < ncopies; ++j)
1875 {
1876 /* Build argument list for the vectorized call. */
1877 if (j == 0)
1878 vargs = VEC_alloc (tree, heap, nargs);
1879 else
1880 VEC_truncate (tree, vargs, 0);
1881
190c2236
JJ
1882 if (slp_node)
1883 {
1884 VEC (slp_void_p, heap) *vec_defs
1885 = VEC_alloc (slp_void_p, heap, nargs);
1886 VEC (tree, heap) *vec_oprnds0;
1887
1888 for (i = 0; i < nargs; i++)
1889 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1890 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1891 vec_oprnds0
1892 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1893
1894 /* Arguments are ready. Create the new vector stmt. */
1895 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1896 {
1897 size_t k;
1898 for (k = 0; k < nargs; k++)
1899 {
1900 VEC (tree, heap) *vec_oprndsk
1901 = (VEC (tree, heap) *)
1902 VEC_index (slp_void_p, vec_defs, k);
1903 VEC_replace (tree, vargs, k,
1904 VEC_index (tree, vec_oprndsk, i));
1905 }
1906 new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_call_set_lhs (new_stmt, new_temp);
1909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190c2236
JJ
1910 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1911 new_stmt);
1912 }
1913
1914 for (i = 0; i < nargs; i++)
1915 {
1916 VEC (tree, heap) *vec_oprndsi
1917 = (VEC (tree, heap) *)
1918 VEC_index (slp_void_p, vec_defs, i);
1919 VEC_free (tree, heap, vec_oprndsi);
1920 }
1921 VEC_free (slp_void_p, heap, vec_defs);
1922 continue;
1923 }
1924
ebfd146a
IR
1925 for (i = 0; i < nargs; i++)
1926 {
1927 op = gimple_call_arg (stmt, i);
1928 if (j == 0)
1929 vec_oprnd0
1930 = vect_get_vec_def_for_operand (op, stmt, NULL);
1931 else
63827fb8
IR
1932 {
1933 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1934 vec_oprnd0
1935 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1936 }
ebfd146a
IR
1937
1938 VEC_quick_push (tree, vargs, vec_oprnd0);
1939 }
1940
1941 new_stmt = gimple_build_call_vec (fndecl, vargs);
1942 new_temp = make_ssa_name (vec_dest, new_stmt);
1943 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
1944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1945
1946 if (j == 0)
1947 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1948 else
1949 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1950
1951 prev_stmt_info = vinfo_for_stmt (new_stmt);
1952 }
1953
1954 break;
1955
1956 case NARROW:
1957 for (j = 0; j < ncopies; ++j)
1958 {
1959 /* Build argument list for the vectorized call. */
1960 if (j == 0)
1961 vargs = VEC_alloc (tree, heap, nargs * 2);
1962 else
1963 VEC_truncate (tree, vargs, 0);
1964
190c2236
JJ
1965 if (slp_node)
1966 {
1967 VEC (slp_void_p, heap) *vec_defs
1968 = VEC_alloc (slp_void_p, heap, nargs);
1969 VEC (tree, heap) *vec_oprnds0;
1970
1971 for (i = 0; i < nargs; i++)
1972 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1973 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1974 vec_oprnds0
1975 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1976
1977 /* Arguments are ready. Create the new vector stmt. */
1978 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1979 i += 2)
1980 {
1981 size_t k;
1982 VEC_truncate (tree, vargs, 0);
1983 for (k = 0; k < nargs; k++)
1984 {
1985 VEC (tree, heap) *vec_oprndsk
1986 = (VEC (tree, heap) *)
1987 VEC_index (slp_void_p, vec_defs, k);
1988 VEC_quick_push (tree, vargs,
1989 VEC_index (tree, vec_oprndsk, i));
1990 VEC_quick_push (tree, vargs,
1991 VEC_index (tree, vec_oprndsk, i + 1));
1992 }
1993 new_stmt = gimple_build_call_vec (fndecl, vargs);
1994 new_temp = make_ssa_name (vec_dest, new_stmt);
1995 gimple_call_set_lhs (new_stmt, new_temp);
1996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190c2236
JJ
1997 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1998 new_stmt);
1999 }
2000
2001 for (i = 0; i < nargs; i++)
2002 {
2003 VEC (tree, heap) *vec_oprndsi
2004 = (VEC (tree, heap) *)
2005 VEC_index (slp_void_p, vec_defs, i);
2006 VEC_free (tree, heap, vec_oprndsi);
2007 }
2008 VEC_free (slp_void_p, heap, vec_defs);
2009 continue;
2010 }
2011
ebfd146a
IR
2012 for (i = 0; i < nargs; i++)
2013 {
2014 op = gimple_call_arg (stmt, i);
2015 if (j == 0)
2016 {
2017 vec_oprnd0
2018 = vect_get_vec_def_for_operand (op, stmt, NULL);
2019 vec_oprnd1
63827fb8 2020 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2021 }
2022 else
2023 {
336ecb65 2024 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2025 vec_oprnd0
63827fb8 2026 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2027 vec_oprnd1
63827fb8 2028 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2029 }
2030
2031 VEC_quick_push (tree, vargs, vec_oprnd0);
2032 VEC_quick_push (tree, vargs, vec_oprnd1);
2033 }
2034
2035 new_stmt = gimple_build_call_vec (fndecl, vargs);
2036 new_temp = make_ssa_name (vec_dest, new_stmt);
2037 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2039
2040 if (j == 0)
2041 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2042 else
2043 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2044
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2046 }
2047
2048 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2049
2050 break;
2051
2052 case WIDEN:
2053 /* No current target implements this case. */
2054 return false;
2055 }
2056
2057 VEC_free (tree, heap, vargs);
2058
2059 /* Update the exception handling table with the vector stmt if necessary. */
2060 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2061 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2062
2063 /* The call in STMT might prevent it from being removed in dce.
2064 We however cannot remove it here, due to the way the ssa name
2065 it defines is mapped to the new definition. So just replace
2066 rhs of the statement with something harmless. */
2067
dd34c087
JJ
2068 if (slp_node)
2069 return true;
2070
ebfd146a 2071 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2072 if (is_pattern_stmt_p (stmt_info))
2073 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2074 else
2075 lhs = gimple_call_lhs (stmt);
2076 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2077 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2078 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2079 STMT_VINFO_STMT (stmt_info) = new_stmt;
2080 gsi_replace (gsi, new_stmt, false);
2081 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2082
2083 return true;
2084}
2085
2086
2087/* Function vect_gen_widened_results_half
2088
2089 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2090 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2091 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2092 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2093 needs to be created (DECL is a function-decl of a target-builtin).
2094 STMT is the original scalar stmt that we are vectorizing. */
2095
2096static gimple
2097vect_gen_widened_results_half (enum tree_code code,
2098 tree decl,
2099 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2100 tree vec_dest, gimple_stmt_iterator *gsi,
2101 gimple stmt)
b8698a0f 2102{
ebfd146a 2103 gimple new_stmt;
b8698a0f
L
2104 tree new_temp;
2105
2106 /* Generate half of the widened result: */
2107 if (code == CALL_EXPR)
2108 {
2109 /* Target specific support */
ebfd146a
IR
2110 if (op_type == binary_op)
2111 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2112 else
2113 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2114 new_temp = make_ssa_name (vec_dest, new_stmt);
2115 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2116 }
2117 else
ebfd146a 2118 {
b8698a0f
L
2119 /* Generic support */
2120 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2121 if (op_type != binary_op)
2122 vec_oprnd1 = NULL;
2123 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2124 vec_oprnd1);
2125 new_temp = make_ssa_name (vec_dest, new_stmt);
2126 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2127 }
ebfd146a
IR
2128 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2129
ebfd146a
IR
2130 return new_stmt;
2131}
2132
4a00c761
JJ
2133
2134/* Get vectorized definitions for loop-based vectorization. For the first
2135 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2136 scalar operand), and for the rest we get a copy with
2137 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2138 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2139 The vectors are collected into VEC_OPRNDS. */
2140
2141static void
2142vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2143 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2144{
2145 tree vec_oprnd;
2146
2147 /* Get first vector operand. */
2148 /* All the vector operands except the very first one (that is scalar oprnd)
2149 are stmt copies. */
2150 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2151 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2152 else
2153 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2154
2155 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2156
2157 /* Get second vector operand. */
2158 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2159 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2160
2161 *oprnd = vec_oprnd;
2162
2163 /* For conversion in multiple steps, continue to get operands
2164 recursively. */
2165 if (multi_step_cvt)
2166 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2167}
2168
2169
2170/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2171 For multi-step conversions store the resulting vectors and call the function
2172 recursively. */
2173
2174static void
2175vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2176 int multi_step_cvt, gimple stmt,
2177 VEC (tree, heap) *vec_dsts,
2178 gimple_stmt_iterator *gsi,
2179 slp_tree slp_node, enum tree_code code,
2180 stmt_vec_info *prev_stmt_info)
2181{
2182 unsigned int i;
2183 tree vop0, vop1, new_tmp, vec_dest;
2184 gimple new_stmt;
2185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2186
2187 vec_dest = VEC_pop (tree, vec_dsts);
2188
2189 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2190 {
2191 /* Create demotion operation. */
2192 vop0 = VEC_index (tree, *vec_oprnds, i);
2193 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2194 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2195 new_tmp = make_ssa_name (vec_dest, new_stmt);
2196 gimple_assign_set_lhs (new_stmt, new_tmp);
2197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2198
2199 if (multi_step_cvt)
2200 /* Store the resulting vector for next recursive call. */
2201 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2202 else
2203 {
2204 /* This is the last step of the conversion sequence. Store the
2205 vectors in SLP_NODE or in vector info of the scalar statement
2206 (or in STMT_VINFO_RELATED_STMT chain). */
2207 if (slp_node)
2208 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2209 else
2210 {
2211 if (!*prev_stmt_info)
2212 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2213 else
2214 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2215
2216 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2217 }
2218 }
2219 }
2220
2221 /* For multi-step demotion operations we first generate demotion operations
2222 from the source type to the intermediate types, and then combine the
2223 results (stored in VEC_OPRNDS) in demotion operation to the destination
2224 type. */
2225 if (multi_step_cvt)
2226 {
2227 /* At each level of recursion we have half of the operands we had at the
2228 previous level. */
2229 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2230 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2231 stmt, vec_dsts, gsi, slp_node,
2232 VEC_PACK_TRUNC_EXPR,
2233 prev_stmt_info);
2234 }
2235
2236 VEC_quick_push (tree, vec_dsts, vec_dest);
2237}
2238
2239
2240/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2241 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2242 the resulting vectors and call the function recursively. */
2243
2244static void
2245vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2246 VEC (tree, heap) **vec_oprnds1,
2247 gimple stmt, tree vec_dest,
2248 gimple_stmt_iterator *gsi,
2249 enum tree_code code1,
2250 enum tree_code code2, tree decl1,
2251 tree decl2, int op_type)
2252{
2253 int i;
2254 tree vop0, vop1, new_tmp1, new_tmp2;
2255 gimple new_stmt1, new_stmt2;
2256 VEC (tree, heap) *vec_tmp = NULL;
2257
2258 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2259 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2260 {
2261 if (op_type == binary_op)
2262 vop1 = VEC_index (tree, *vec_oprnds1, i);
2263 else
2264 vop1 = NULL_TREE;
2265
2266 /* Generate the two halves of promotion operation. */
2267 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2268 op_type, vec_dest, gsi, stmt);
2269 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2270 op_type, vec_dest, gsi, stmt);
2271 if (is_gimple_call (new_stmt1))
2272 {
2273 new_tmp1 = gimple_call_lhs (new_stmt1);
2274 new_tmp2 = gimple_call_lhs (new_stmt2);
2275 }
2276 else
2277 {
2278 new_tmp1 = gimple_assign_lhs (new_stmt1);
2279 new_tmp2 = gimple_assign_lhs (new_stmt2);
2280 }
2281
2282 /* Store the results for the next step. */
2283 VEC_quick_push (tree, vec_tmp, new_tmp1);
2284 VEC_quick_push (tree, vec_tmp, new_tmp2);
2285 }
2286
2287 VEC_free (tree, heap, *vec_oprnds0);
2288 *vec_oprnds0 = vec_tmp;
2289}
2290
2291
b8698a0f
L
2292/* Check if STMT performs a conversion operation, that can be vectorized.
2293 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2294 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2295 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2296
2297static bool
2298vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2299 gimple *vec_stmt, slp_tree slp_node)
2300{
2301 tree vec_dest;
2302 tree scalar_dest;
4a00c761 2303 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2304 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2308 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2309 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2310 tree new_temp;
2311 tree def;
2312 gimple def_stmt;
2313 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2314 gimple new_stmt = NULL;
2315 stmt_vec_info prev_stmt_info;
2316 int nunits_in;
2317 int nunits_out;
2318 tree vectype_out, vectype_in;
4a00c761
JJ
2319 int ncopies, i, j;
2320 tree lhs_type, rhs_type;
ebfd146a 2321 enum { NARROW, NONE, WIDEN } modifier;
4a00c761 2322 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
ebfd146a 2323 tree vop0;
4a00c761
JJ
2324 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2325 int multi_step_cvt = 0;
2326 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2327 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2328 int op_type;
2329 enum machine_mode rhs_mode;
2330 unsigned short fltsz;
ebfd146a
IR
2331
2332 /* Is STMT a vectorizable conversion? */
2333
4a00c761 2334 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2335 return false;
2336
8644a673 2337 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2338 return false;
2339
2340 if (!is_gimple_assign (stmt))
2341 return false;
2342
2343 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2344 return false;
2345
2346 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2347 if (!CONVERT_EXPR_CODE_P (code)
2348 && code != FIX_TRUNC_EXPR
2349 && code != FLOAT_EXPR
2350 && code != WIDEN_MULT_EXPR
2351 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2352 return false;
2353
4a00c761
JJ
2354 op_type = TREE_CODE_LENGTH (code);
2355
ebfd146a 2356 /* Check types of lhs and rhs. */
b690cc0f 2357 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2358 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2359 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2360
ebfd146a
IR
2361 op0 = gimple_assign_rhs1 (stmt);
2362 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2363
2364 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2365 && !((INTEGRAL_TYPE_P (lhs_type)
2366 && INTEGRAL_TYPE_P (rhs_type))
2367 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2368 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2369 return false;
2370
2371 if ((INTEGRAL_TYPE_P (lhs_type)
2372 && (TYPE_PRECISION (lhs_type)
2373 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2374 || (INTEGRAL_TYPE_P (rhs_type)
2375 && (TYPE_PRECISION (rhs_type)
2376 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2377 {
73fbfcad 2378 if (dump_enabled_p ())
78c60e3d
SS
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "type conversion to/from bit-precision unsupported.");
4a00c761
JJ
2381 return false;
2382 }
2383
b690cc0f 2384 /* Check the operands of the operation. */
24ee1384 2385 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2386 &def_stmt, &def, &dt[0], &vectype_in))
2387 {
73fbfcad 2388 if (dump_enabled_p ())
78c60e3d
SS
2389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2390 "use not simple.");
b690cc0f
RG
2391 return false;
2392 }
4a00c761
JJ
2393 if (op_type == binary_op)
2394 {
2395 bool ok;
2396
2397 op1 = gimple_assign_rhs2 (stmt);
2398 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2399 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2400 OP1. */
2401 if (CONSTANT_CLASS_P (op0))
f5709183 2402 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2403 &def_stmt, &def, &dt[1], &vectype_in);
2404 else
f5709183 2405 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2406 &def, &dt[1]);
4a00c761
JJ
2407
2408 if (!ok)
2409 {
73fbfcad 2410 if (dump_enabled_p ())
78c60e3d
SS
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2412 "use not simple.");
4a00c761
JJ
2413 return false;
2414 }
2415 }
2416
b690cc0f
RG
2417 /* If op0 is an external or constant defs use a vector type of
2418 the same size as the output vector type. */
ebfd146a 2419 if (!vectype_in)
b690cc0f 2420 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2421 if (vec_stmt)
2422 gcc_assert (vectype_in);
2423 if (!vectype_in)
2424 {
73fbfcad 2425 if (dump_enabled_p ())
4a00c761 2426 {
78c60e3d
SS
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "no vectype for scalar type ");
2429 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4a00c761 2430 }
7d8930a0
IR
2431
2432 return false;
2433 }
ebfd146a 2434
b690cc0f
RG
2435 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2436 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2437 if (nunits_in < nunits_out)
ebfd146a
IR
2438 modifier = NARROW;
2439 else if (nunits_out == nunits_in)
2440 modifier = NONE;
ebfd146a 2441 else
4a00c761 2442 modifier = WIDEN;
ebfd146a 2443
ff802fa1
IR
2444 /* Multiple types in SLP are handled by creating the appropriate number of
2445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2446 case of SLP. */
437f4a00 2447 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2448 ncopies = 1;
4a00c761
JJ
2449 else if (modifier == NARROW)
2450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2451 else
2452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2453
ebfd146a
IR
2454 /* Sanity check: make sure that at least one copy of the vectorized stmt
2455 needs to be generated. */
2456 gcc_assert (ncopies >= 1);
2457
ebfd146a 2458 /* Supportable by target? */
4a00c761 2459 switch (modifier)
ebfd146a 2460 {
4a00c761
JJ
2461 case NONE:
2462 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2463 return false;
2464 if (supportable_convert_operation (code, vectype_out, vectype_in,
2465 &decl1, &code1))
2466 break;
2467 /* FALLTHRU */
2468 unsupported:
73fbfcad 2469 if (dump_enabled_p ())
78c60e3d
SS
2470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2471 "conversion not supported by target.");
ebfd146a 2472 return false;
ebfd146a 2473
4a00c761
JJ
2474 case WIDEN:
2475 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2476 &code1, &code2, &multi_step_cvt,
2477 &interm_types))
4a00c761
JJ
2478 {
2479 /* Binary widening operation can only be supported directly by the
2480 architecture. */
2481 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2482 break;
2483 }
2484
2485 if (code != FLOAT_EXPR
2486 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2487 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2488 goto unsupported;
2489
2490 rhs_mode = TYPE_MODE (rhs_type);
2491 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2492 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2493 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2494 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2495 {
2496 cvt_type
2497 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2498 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2499 if (cvt_type == NULL_TREE)
2500 goto unsupported;
2501
2502 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2503 {
2504 if (!supportable_convert_operation (code, vectype_out,
2505 cvt_type, &decl1, &codecvt1))
2506 goto unsupported;
2507 }
2508 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2509 cvt_type, &codecvt1,
2510 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2511 &interm_types))
2512 continue;
2513 else
2514 gcc_assert (multi_step_cvt == 0);
2515
2516 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2517 vectype_in, &code1, &code2,
2518 &multi_step_cvt, &interm_types))
4a00c761
JJ
2519 break;
2520 }
2521
2522 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2523 goto unsupported;
2524
2525 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2526 codecvt2 = ERROR_MARK;
2527 else
2528 {
2529 multi_step_cvt++;
2530 VEC_safe_push (tree, heap, interm_types, cvt_type);
2531 cvt_type = NULL_TREE;
2532 }
2533 break;
2534
2535 case NARROW:
2536 gcc_assert (op_type == unary_op);
2537 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2538 &code1, &multi_step_cvt,
2539 &interm_types))
2540 break;
2541
2542 if (code != FIX_TRUNC_EXPR
2543 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2544 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2545 goto unsupported;
2546
2547 rhs_mode = TYPE_MODE (rhs_type);
2548 cvt_type
2549 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2550 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2551 if (cvt_type == NULL_TREE)
2552 goto unsupported;
2553 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2554 &decl1, &codecvt1))
2555 goto unsupported;
2556 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2557 &code1, &multi_step_cvt,
2558 &interm_types))
2559 break;
2560 goto unsupported;
2561
2562 default:
2563 gcc_unreachable ();
ebfd146a
IR
2564 }
2565
2566 if (!vec_stmt) /* transformation not required. */
2567 {
73fbfcad 2568 if (dump_enabled_p ())
78c60e3d
SS
2569 dump_printf_loc (MSG_NOTE, vect_location,
2570 "=== vectorizable_conversion ===");
4a00c761 2571 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2572 {
2573 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2574 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2575 }
4a00c761
JJ
2576 else if (modifier == NARROW)
2577 {
2578 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2579 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2580 }
2581 else
2582 {
2583 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2584 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2585 }
2586 VEC_free (tree, heap, interm_types);
ebfd146a
IR
2587 return true;
2588 }
2589
2590 /** Transform. **/
73fbfcad 2591 if (dump_enabled_p ())
78c60e3d
SS
2592 dump_printf_loc (MSG_NOTE, vect_location,
2593 "transform conversion. ncopies = %d.", ncopies);
ebfd146a 2594
4a00c761
JJ
2595 if (op_type == binary_op)
2596 {
2597 if (CONSTANT_CLASS_P (op0))
2598 op0 = fold_convert (TREE_TYPE (op1), op0);
2599 else if (CONSTANT_CLASS_P (op1))
2600 op1 = fold_convert (TREE_TYPE (op0), op1);
2601 }
2602
2603 /* In case of multi-step conversion, we first generate conversion operations
2604 to the intermediate types, and then from that types to the final one.
2605 We create vector destinations for the intermediate type (TYPES) received
2606 from supportable_*_operation, and store them in the correct order
2607 for future use in vect_create_vectorized_*_stmts (). */
2608 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
82294ec1
JJ
2609 vec_dest = vect_create_destination_var (scalar_dest,
2610 (cvt_type && modifier == WIDEN)
2611 ? cvt_type : vectype_out);
4a00c761
JJ
2612 VEC_quick_push (tree, vec_dsts, vec_dest);
2613
2614 if (multi_step_cvt)
2615 {
2616 for (i = VEC_length (tree, interm_types) - 1;
2617 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2618 {
2619 vec_dest = vect_create_destination_var (scalar_dest,
2620 intermediate_type);
2621 VEC_quick_push (tree, vec_dsts, vec_dest);
2622 }
2623 }
ebfd146a 2624
4a00c761 2625 if (cvt_type)
82294ec1
JJ
2626 vec_dest = vect_create_destination_var (scalar_dest,
2627 modifier == WIDEN
2628 ? vectype_out : cvt_type);
4a00c761
JJ
2629
2630 if (!slp_node)
2631 {
2632 if (modifier == NONE)
2633 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2634 else if (modifier == WIDEN)
2635 {
2636 vec_oprnds0 = VEC_alloc (tree, heap,
2637 (multi_step_cvt
2638 ? vect_pow2 (multi_step_cvt) : 1));
2639 if (op_type == binary_op)
2640 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2641 }
2642 else
2643 vec_oprnds0 = VEC_alloc (tree, heap,
2644 2 * (multi_step_cvt
2645 ? vect_pow2 (multi_step_cvt) : 1));
2646 }
2647 else if (code == WIDEN_LSHIFT_EXPR)
2648 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
ebfd146a 2649
4a00c761 2650 last_oprnd = op0;
ebfd146a
IR
2651 prev_stmt_info = NULL;
2652 switch (modifier)
2653 {
2654 case NONE:
2655 for (j = 0; j < ncopies; j++)
2656 {
ebfd146a 2657 if (j == 0)
d092494c
IR
2658 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2659 -1);
ebfd146a
IR
2660 else
2661 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2662
ac47786e 2663 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
4a00c761
JJ
2664 {
2665 /* Arguments are ready, create the new vector stmt. */
2666 if (code1 == CALL_EXPR)
2667 {
2668 new_stmt = gimple_build_call (decl1, 1, vop0);
2669 new_temp = make_ssa_name (vec_dest, new_stmt);
2670 gimple_call_set_lhs (new_stmt, new_temp);
2671 }
2672 else
2673 {
2674 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2675 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2676 vop0, NULL);
2677 new_temp = make_ssa_name (vec_dest, new_stmt);
2678 gimple_assign_set_lhs (new_stmt, new_temp);
2679 }
2680
2681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2682 if (slp_node)
2683 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2684 new_stmt);
2685 }
2686
ebfd146a
IR
2687 if (j == 0)
2688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2689 else
2690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2691 prev_stmt_info = vinfo_for_stmt (new_stmt);
2692 }
2693 break;
2694
2695 case WIDEN:
2696 /* In case the vectorization factor (VF) is bigger than the number
2697 of elements that we can fit in a vectype (nunits), we have to
2698 generate more than one vector stmt - i.e - we need to "unroll"
2699 the vector stmt by a factor VF/nunits. */
2700 for (j = 0; j < ncopies; j++)
2701 {
4a00c761 2702 /* Handle uses. */
ebfd146a 2703 if (j == 0)
4a00c761
JJ
2704 {
2705 if (slp_node)
2706 {
2707 if (code == WIDEN_LSHIFT_EXPR)
2708 {
2709 unsigned int k;
ebfd146a 2710
4a00c761
JJ
2711 vec_oprnd1 = op1;
2712 /* Store vec_oprnd1 for every vector stmt to be created
2713 for SLP_NODE. We check during the analysis that all
2714 the shift arguments are the same. */
2715 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2716 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2717
2718 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2719 slp_node, -1);
2720 }
2721 else
2722 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2723 &vec_oprnds1, slp_node, -1);
2724 }
2725 else
2726 {
2727 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2728 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2729 if (op_type == binary_op)
2730 {
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 vec_oprnd1 = op1;
2733 else
2734 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2735 NULL);
2736 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2737 }
2738 }
2739 }
ebfd146a 2740 else
4a00c761
JJ
2741 {
2742 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2743 VEC_truncate (tree, vec_oprnds0, 0);
2744 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2745 if (op_type == binary_op)
2746 {
2747 if (code == WIDEN_LSHIFT_EXPR)
2748 vec_oprnd1 = op1;
2749 else
2750 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2751 vec_oprnd1);
2752 VEC_truncate (tree, vec_oprnds1, 0);
2753 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2754 }
2755 }
ebfd146a 2756
4a00c761
JJ
2757 /* Arguments are ready. Create the new vector stmts. */
2758 for (i = multi_step_cvt; i >= 0; i--)
2759 {
2760 tree this_dest = VEC_index (tree, vec_dsts, i);
2761 enum tree_code c1 = code1, c2 = code2;
2762 if (i == 0 && codecvt2 != ERROR_MARK)
2763 {
2764 c1 = codecvt1;
2765 c2 = codecvt2;
2766 }
2767 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2768 &vec_oprnds1,
2769 stmt, this_dest, gsi,
2770 c1, c2, decl1, decl2,
2771 op_type);
2772 }
2773
2774 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2775 {
2776 if (cvt_type)
2777 {
2778 if (codecvt1 == CALL_EXPR)
2779 {
2780 new_stmt = gimple_build_call (decl1, 1, vop0);
2781 new_temp = make_ssa_name (vec_dest, new_stmt);
2782 gimple_call_set_lhs (new_stmt, new_temp);
2783 }
2784 else
2785 {
2786 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2787 new_temp = make_ssa_name (vec_dest, NULL);
2788 new_stmt = gimple_build_assign_with_ops (codecvt1,
2789 new_temp,
2790 vop0, NULL);
2791 }
2792
2793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2794 }
2795 else
2796 new_stmt = SSA_NAME_DEF_STMT (vop0);
2797
2798 if (slp_node)
2799 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2800 new_stmt);
2801 else
2802 {
2803 if (!prev_stmt_info)
2804 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2805 else
2806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2807 prev_stmt_info = vinfo_for_stmt (new_stmt);
2808 }
2809 }
ebfd146a 2810 }
4a00c761
JJ
2811
2812 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2813 break;
2814
2815 case NARROW:
2816 /* In case the vectorization factor (VF) is bigger than the number
2817 of elements that we can fit in a vectype (nunits), we have to
2818 generate more than one vector stmt - i.e - we need to "unroll"
2819 the vector stmt by a factor VF/nunits. */
2820 for (j = 0; j < ncopies; j++)
2821 {
2822 /* Handle uses. */
4a00c761
JJ
2823 if (slp_node)
2824 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2825 slp_node, -1);
ebfd146a
IR
2826 else
2827 {
4a00c761
JJ
2828 VEC_truncate (tree, vec_oprnds0, 0);
2829 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2830 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2831 }
2832
4a00c761
JJ
2833 /* Arguments are ready. Create the new vector stmts. */
2834 if (cvt_type)
2835 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2836 {
2837 if (codecvt1 == CALL_EXPR)
2838 {
2839 new_stmt = gimple_build_call (decl1, 1, vop0);
2840 new_temp = make_ssa_name (vec_dest, new_stmt);
2841 gimple_call_set_lhs (new_stmt, new_temp);
2842 }
2843 else
2844 {
2845 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2846 new_temp = make_ssa_name (vec_dest, NULL);
2847 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2848 vop0, NULL);
2849 }
ebfd146a 2850
4a00c761
JJ
2851 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2852 VEC_replace (tree, vec_oprnds0, i, new_temp);
2853 }
ebfd146a 2854
4a00c761
JJ
2855 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2856 stmt, vec_dsts, gsi,
2857 slp_node, code1,
2858 &prev_stmt_info);
ebfd146a
IR
2859 }
2860
2861 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2862 break;
ebfd146a
IR
2863 }
2864
4a00c761
JJ
2865 VEC_free (tree, heap, vec_oprnds0);
2866 VEC_free (tree, heap, vec_oprnds1);
2867 VEC_free (tree, heap, vec_dsts);
2868 VEC_free (tree, heap, interm_types);
ebfd146a
IR
2869
2870 return true;
2871}
ff802fa1
IR
2872
2873
ebfd146a
IR
2874/* Function vectorizable_assignment.
2875
b8698a0f
L
2876 Check if STMT performs an assignment (copy) that can be vectorized.
2877 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2878 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2879 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2880
2881static bool
2882vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2883 gimple *vec_stmt, slp_tree slp_node)
2884{
2885 tree vec_dest;
2886 tree scalar_dest;
2887 tree op;
2888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2889 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2891 tree new_temp;
2892 tree def;
2893 gimple def_stmt;
2894 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2895 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2896 int ncopies;
f18b55bd 2897 int i, j;
ebfd146a
IR
2898 VEC(tree,heap) *vec_oprnds = NULL;
2899 tree vop;
a70d6342 2900 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2901 gimple new_stmt = NULL;
2902 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2903 enum tree_code code;
2904 tree vectype_in;
ebfd146a
IR
2905
2906 /* Multiple types in SLP are handled by creating the appropriate number of
2907 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2908 case of SLP. */
437f4a00 2909 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2910 ncopies = 1;
2911 else
2912 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2913
2914 gcc_assert (ncopies >= 1);
ebfd146a 2915
a70d6342 2916 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2917 return false;
2918
8644a673 2919 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2920 return false;
2921
2922 /* Is vectorizable assignment? */
2923 if (!is_gimple_assign (stmt))
2924 return false;
2925
2926 scalar_dest = gimple_assign_lhs (stmt);
2927 if (TREE_CODE (scalar_dest) != SSA_NAME)
2928 return false;
2929
fde9c428 2930 code = gimple_assign_rhs_code (stmt);
ebfd146a 2931 if (gimple_assign_single_p (stmt)
fde9c428
RG
2932 || code == PAREN_EXPR
2933 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2934 op = gimple_assign_rhs1 (stmt);
2935 else
2936 return false;
2937
7b7ec6c5
RG
2938 if (code == VIEW_CONVERT_EXPR)
2939 op = TREE_OPERAND (op, 0);
2940
24ee1384 2941 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2942 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2943 {
73fbfcad 2944 if (dump_enabled_p ())
78c60e3d
SS
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2946 "use not simple.");
ebfd146a
IR
2947 return false;
2948 }
2949
fde9c428
RG
2950 /* We can handle NOP_EXPR conversions that do not change the number
2951 of elements or the vector size. */
7b7ec6c5
RG
2952 if ((CONVERT_EXPR_CODE_P (code)
2953 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2954 && (!vectype_in
2955 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2956 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2957 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2958 return false;
2959
7b7b1813
RG
2960 /* We do not handle bit-precision changes. */
2961 if ((CONVERT_EXPR_CODE_P (code)
2962 || code == VIEW_CONVERT_EXPR)
2963 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2964 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2965 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2966 || ((TYPE_PRECISION (TREE_TYPE (op))
2967 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2968 /* But a conversion that does not change the bit-pattern is ok. */
2969 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2970 > TYPE_PRECISION (TREE_TYPE (op)))
2971 && TYPE_UNSIGNED (TREE_TYPE (op))))
2972 {
73fbfcad 2973 if (dump_enabled_p ())
78c60e3d
SS
2974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2975 "type conversion to/from bit-precision "
2976 "unsupported.");
7b7b1813
RG
2977 return false;
2978 }
2979
ebfd146a
IR
2980 if (!vec_stmt) /* transformation not required. */
2981 {
2982 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 2983 if (dump_enabled_p ())
78c60e3d
SS
2984 dump_printf_loc (MSG_NOTE, vect_location,
2985 "=== vectorizable_assignment ===");
c3e7ee41 2986 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2987 return true;
2988 }
2989
2990 /** Transform. **/
73fbfcad 2991 if (dump_enabled_p ())
78c60e3d 2992 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
ebfd146a
IR
2993
2994 /* Handle def. */
2995 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2996
2997 /* Handle use. */
f18b55bd 2998 for (j = 0; j < ncopies; j++)
ebfd146a 2999 {
f18b55bd
IR
3000 /* Handle uses. */
3001 if (j == 0)
d092494c 3002 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3003 else
3004 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3005
3006 /* Arguments are ready. create the new vector stmt. */
ac47786e 3007 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
f18b55bd 3008 {
7b7ec6c5
RG
3009 if (CONVERT_EXPR_CODE_P (code)
3010 || code == VIEW_CONVERT_EXPR)
4a73490d 3011 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3012 new_stmt = gimple_build_assign (vec_dest, vop);
3013 new_temp = make_ssa_name (vec_dest, new_stmt);
3014 gimple_assign_set_lhs (new_stmt, new_temp);
3015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3016 if (slp_node)
3017 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3018 }
ebfd146a
IR
3019
3020 if (slp_node)
f18b55bd
IR
3021 continue;
3022
3023 if (j == 0)
3024 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3025 else
3026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3027
3028 prev_stmt_info = vinfo_for_stmt (new_stmt);
3029 }
b8698a0f
L
3030
3031 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
3032 return true;
3033}
3034
9dc3f7de 3035
1107f3ae
IR
3036/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3037 either as shift by a scalar or by a vector. */
3038
3039bool
3040vect_supportable_shift (enum tree_code code, tree scalar_type)
3041{
3042
3043 enum machine_mode vec_mode;
3044 optab optab;
3045 int icode;
3046 tree vectype;
3047
3048 vectype = get_vectype_for_scalar_type (scalar_type);
3049 if (!vectype)
3050 return false;
3051
3052 optab = optab_for_tree_code (code, vectype, optab_scalar);
3053 if (!optab
3054 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3055 {
3056 optab = optab_for_tree_code (code, vectype, optab_vector);
3057 if (!optab
3058 || (optab_handler (optab, TYPE_MODE (vectype))
3059 == CODE_FOR_nothing))
3060 return false;
3061 }
3062
3063 vec_mode = TYPE_MODE (vectype);
3064 icode = (int) optab_handler (optab, vec_mode);
3065 if (icode == CODE_FOR_nothing)
3066 return false;
3067
3068 return true;
3069}
3070
3071
9dc3f7de
IR
3072/* Function vectorizable_shift.
3073
3074 Check if STMT performs a shift operation that can be vectorized.
3075 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3076 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3077 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3078
3079static bool
3080vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3081 gimple *vec_stmt, slp_tree slp_node)
3082{
3083 tree vec_dest;
3084 tree scalar_dest;
3085 tree op0, op1 = NULL;
3086 tree vec_oprnd1 = NULL_TREE;
3087 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3088 tree vectype;
3089 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3090 enum tree_code code;
3091 enum machine_mode vec_mode;
3092 tree new_temp;
3093 optab optab;
3094 int icode;
3095 enum machine_mode optab_op2_mode;
3096 tree def;
3097 gimple def_stmt;
3098 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3099 gimple new_stmt = NULL;
3100 stmt_vec_info prev_stmt_info;
3101 int nunits_in;
3102 int nunits_out;
3103 tree vectype_out;
cede2577 3104 tree op1_vectype;
9dc3f7de
IR
3105 int ncopies;
3106 int j, i;
3107 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3108 tree vop0, vop1;
3109 unsigned int k;
49eab32e 3110 bool scalar_shift_arg = true;
9dc3f7de
IR
3111 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3112 int vf;
3113
3114 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3115 return false;
3116
3117 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3118 return false;
3119
3120 /* Is STMT a vectorizable binary/unary operation? */
3121 if (!is_gimple_assign (stmt))
3122 return false;
3123
3124 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3125 return false;
3126
3127 code = gimple_assign_rhs_code (stmt);
3128
3129 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3130 || code == RROTATE_EXPR))
3131 return false;
3132
3133 scalar_dest = gimple_assign_lhs (stmt);
3134 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3135 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3136 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3137 {
73fbfcad 3138 if (dump_enabled_p ())
78c60e3d
SS
3139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3140 "bit-precision shifts not supported.");
7b7b1813
RG
3141 return false;
3142 }
9dc3f7de
IR
3143
3144 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3145 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3146 &def_stmt, &def, &dt[0], &vectype))
3147 {
73fbfcad 3148 if (dump_enabled_p ())
78c60e3d
SS
3149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3150 "use not simple.");
9dc3f7de
IR
3151 return false;
3152 }
3153 /* If op0 is an external or constant def use a vector type with
3154 the same size as the output vector type. */
3155 if (!vectype)
3156 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3157 if (vec_stmt)
3158 gcc_assert (vectype);
3159 if (!vectype)
3160 {
73fbfcad 3161 if (dump_enabled_p ())
78c60e3d
SS
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "no vectype for scalar type ");
9dc3f7de
IR
3164 return false;
3165 }
3166
3167 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3168 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3169 if (nunits_out != nunits_in)
3170 return false;
3171
3172 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3173 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3174 &def, &dt[1], &op1_vectype))
9dc3f7de 3175 {
73fbfcad 3176 if (dump_enabled_p ())
78c60e3d
SS
3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3178 "use not simple.");
9dc3f7de
IR
3179 return false;
3180 }
3181
3182 if (loop_vinfo)
3183 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3184 else
3185 vf = 1;
3186
3187 /* Multiple types in SLP are handled by creating the appropriate number of
3188 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3189 case of SLP. */
437f4a00 3190 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3191 ncopies = 1;
3192 else
3193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3194
3195 gcc_assert (ncopies >= 1);
3196
3197 /* Determine whether the shift amount is a vector, or scalar. If the
3198 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3199
49eab32e
JJ
3200 if (dt[1] == vect_internal_def && !slp_node)
3201 scalar_shift_arg = false;
3202 else if (dt[1] == vect_constant_def
3203 || dt[1] == vect_external_def
3204 || dt[1] == vect_internal_def)
3205 {
3206 /* In SLP, need to check whether the shift count is the same,
3207 in loops if it is a constant or invariant, it is always
3208 a scalar shift. */
3209 if (slp_node)
3210 {
3211 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3212 gimple slpstmt;
3213
3214 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3215 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3216 scalar_shift_arg = false;
3217 }
3218 }
3219 else
3220 {
73fbfcad 3221 if (dump_enabled_p ())
78c60e3d
SS
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 "operand mode requires invariant argument.");
49eab32e
JJ
3224 return false;
3225 }
3226
9dc3f7de 3227 /* Vector shifted by vector. */
49eab32e 3228 if (!scalar_shift_arg)
9dc3f7de
IR
3229 {
3230 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3231 if (dump_enabled_p ())
78c60e3d
SS
3232 dump_printf_loc (MSG_NOTE, vect_location,
3233 "vector/vector shift/rotate found.");
3234
aa948027
JJ
3235 if (!op1_vectype)
3236 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3237 if (op1_vectype == NULL_TREE
3238 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3239 {
73fbfcad 3240 if (dump_enabled_p ())
78c60e3d
SS
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "unusable type for last operand in"
3243 " vector/vector shift/rotate.");
cede2577
JJ
3244 return false;
3245 }
9dc3f7de
IR
3246 }
3247 /* See if the machine has a vector shifted by scalar insn and if not
3248 then see if it has a vector shifted by vector insn. */
49eab32e 3249 else
9dc3f7de
IR
3250 {
3251 optab = optab_for_tree_code (code, vectype, optab_scalar);
3252 if (optab
3253 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3254 {
73fbfcad 3255 if (dump_enabled_p ())
78c60e3d
SS
3256 dump_printf_loc (MSG_NOTE, vect_location,
3257 "vector/scalar shift/rotate found.");
9dc3f7de
IR
3258 }
3259 else
3260 {
3261 optab = optab_for_tree_code (code, vectype, optab_vector);
3262 if (optab
3263 && (optab_handler (optab, TYPE_MODE (vectype))
3264 != CODE_FOR_nothing))
3265 {
49eab32e
JJ
3266 scalar_shift_arg = false;
3267
73fbfcad 3268 if (dump_enabled_p ())
78c60e3d
SS
3269 dump_printf_loc (MSG_NOTE, vect_location,
3270 "vector/vector shift/rotate found.");
9dc3f7de
IR
3271
3272 /* Unlike the other binary operators, shifts/rotates have
3273 the rhs being int, instead of the same type as the lhs,
3274 so make sure the scalar is the right type if we are
aa948027 3275 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3276 if (dt[1] == vect_constant_def)
3277 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3278 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3279 TREE_TYPE (op1)))
3280 {
3281 if (slp_node
3282 && TYPE_MODE (TREE_TYPE (vectype))
3283 != TYPE_MODE (TREE_TYPE (op1)))
3284 {
73fbfcad 3285 if (dump_enabled_p ())
78c60e3d
SS
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "unusable type for last operand in"
3288 " vector/vector shift/rotate.");
aa948027
JJ
3289 return false;
3290 }
3291 if (vec_stmt && !slp_node)
3292 {
3293 op1 = fold_convert (TREE_TYPE (vectype), op1);
3294 op1 = vect_init_vector (stmt, op1,
3295 TREE_TYPE (vectype), NULL);
3296 }
3297 }
9dc3f7de
IR
3298 }
3299 }
3300 }
9dc3f7de
IR
3301
3302 /* Supportable by target? */
3303 if (!optab)
3304 {
73fbfcad 3305 if (dump_enabled_p ())
78c60e3d
SS
3306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3307 "no optab.");
9dc3f7de
IR
3308 return false;
3309 }
3310 vec_mode = TYPE_MODE (vectype);
3311 icode = (int) optab_handler (optab, vec_mode);
3312 if (icode == CODE_FOR_nothing)
3313 {
73fbfcad 3314 if (dump_enabled_p ())
78c60e3d
SS
3315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3316 "op not supported by target.");
9dc3f7de
IR
3317 /* Check only during analysis. */
3318 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3319 || (vf < vect_min_worthwhile_factor (code)
3320 && !vec_stmt))
3321 return false;
73fbfcad 3322 if (dump_enabled_p ())
78c60e3d 3323 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
9dc3f7de
IR
3324 }
3325
3326 /* Worthwhile without SIMD support? Check only during analysis. */
3327 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3328 && vf < vect_min_worthwhile_factor (code)
3329 && !vec_stmt)
3330 {
73fbfcad 3331 if (dump_enabled_p ())
78c60e3d
SS
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "not worthwhile without SIMD support.");
9dc3f7de
IR
3334 return false;
3335 }
3336
3337 if (!vec_stmt) /* transformation not required. */
3338 {
3339 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3340 if (dump_enabled_p ())
78c60e3d 3341 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
c3e7ee41 3342 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3343 return true;
3344 }
3345
3346 /** Transform. **/
3347
73fbfcad 3348 if (dump_enabled_p ())
78c60e3d
SS
3349 dump_printf_loc (MSG_NOTE, vect_location,
3350 "transform binary/unary operation.");
9dc3f7de
IR
3351
3352 /* Handle def. */
3353 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3354
3355 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3356 created in the previous stages of the recursion, so no allocation is
3357 needed, except for the case of shift with scalar shift argument. In that
3358 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3359 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3360 In case of loop-based vectorization we allocate VECs of size 1. We
3361 allocate VEC_OPRNDS1 only in case of binary operation. */
3362 if (!slp_node)
3363 {
3364 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3365 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3366 }
3367 else if (scalar_shift_arg)
3368 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3369
3370 prev_stmt_info = NULL;
3371 for (j = 0; j < ncopies; j++)
3372 {
3373 /* Handle uses. */
3374 if (j == 0)
3375 {
3376 if (scalar_shift_arg)
3377 {
3378 /* Vector shl and shr insn patterns can be defined with scalar
3379 operand 2 (shift operand). In this case, use constant or loop
3380 invariant op1 directly, without extending it to vector mode
3381 first. */
3382 optab_op2_mode = insn_data[icode].operand[2].mode;
3383 if (!VECTOR_MODE_P (optab_op2_mode))
3384 {
73fbfcad 3385 if (dump_enabled_p ())
78c60e3d
SS
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "operand 1 using scalar mode.");
9dc3f7de
IR
3388 vec_oprnd1 = op1;
3389 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3390 if (slp_node)
3391 {
3392 /* Store vec_oprnd1 for every vector stmt to be created
3393 for SLP_NODE. We check during the analysis that all
3394 the shift arguments are the same.
3395 TODO: Allow different constants for different vector
3396 stmts generated for an SLP instance. */
3397 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3398 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3399 }
3400 }
3401 }
3402
3403 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3404 (a special case for certain kind of vector shifts); otherwise,
3405 operand 1 should be of a vector type (the usual case). */
3406 if (vec_oprnd1)
3407 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3408 slp_node, -1);
9dc3f7de
IR
3409 else
3410 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3411 slp_node, -1);
9dc3f7de
IR
3412 }
3413 else
3414 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3415
3416 /* Arguments are ready. Create the new vector stmt. */
3417 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3418 {
3419 vop1 = VEC_index (tree, vec_oprnds1, i);
3420 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3421 new_temp = make_ssa_name (vec_dest, new_stmt);
3422 gimple_assign_set_lhs (new_stmt, new_temp);
3423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3424 if (slp_node)
3425 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3426 }
3427
3428 if (slp_node)
3429 continue;
3430
3431 if (j == 0)
3432 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3433 else
3434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3435 prev_stmt_info = vinfo_for_stmt (new_stmt);
3436 }
3437
3438 VEC_free (tree, heap, vec_oprnds0);
3439 VEC_free (tree, heap, vec_oprnds1);
3440
3441 return true;
3442}
3443
3444
5deb57cb
JJ
3445static tree permute_vec_elements (tree, tree, tree, gimple,
3446 gimple_stmt_iterator *);
3447
3448
ebfd146a
IR
3449/* Function vectorizable_operation.
3450
16949072
RG
3451 Check if STMT performs a binary, unary or ternary operation that can
3452 be vectorized.
b8698a0f 3453 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3454 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3455 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3456
3457static bool
3458vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3459 gimple *vec_stmt, slp_tree slp_node)
3460{
00f07b86 3461 tree vec_dest;
ebfd146a 3462 tree scalar_dest;
16949072 3463 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3464 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3465 tree vectype;
ebfd146a
IR
3466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3467 enum tree_code code;
3468 enum machine_mode vec_mode;
3469 tree new_temp;
3470 int op_type;
00f07b86 3471 optab optab;
ebfd146a 3472 int icode;
ebfd146a
IR
3473 tree def;
3474 gimple def_stmt;
16949072
RG
3475 enum vect_def_type dt[3]
3476 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3477 gimple new_stmt = NULL;
3478 stmt_vec_info prev_stmt_info;
b690cc0f 3479 int nunits_in;
ebfd146a
IR
3480 int nunits_out;
3481 tree vectype_out;
3482 int ncopies;
3483 int j, i;
16949072
RG
3484 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3485 tree vop0, vop1, vop2;
a70d6342
IR
3486 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3487 int vf;
3488
a70d6342 3489 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3490 return false;
3491
8644a673 3492 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3493 return false;
3494
3495 /* Is STMT a vectorizable binary/unary operation? */
3496 if (!is_gimple_assign (stmt))
3497 return false;
3498
3499 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3500 return false;
3501
ebfd146a
IR
3502 code = gimple_assign_rhs_code (stmt);
3503
3504 /* For pointer addition, we should use the normal plus for
3505 the vector addition. */
3506 if (code == POINTER_PLUS_EXPR)
3507 code = PLUS_EXPR;
3508
3509 /* Support only unary or binary operations. */
3510 op_type = TREE_CODE_LENGTH (code);
16949072 3511 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3512 {
73fbfcad 3513 if (dump_enabled_p ())
78c60e3d
SS
3514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3515 "num. args = %d (not unary/binary/ternary op).",
3516 op_type);
ebfd146a
IR
3517 return false;
3518 }
3519
b690cc0f
RG
3520 scalar_dest = gimple_assign_lhs (stmt);
3521 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3522
7b7b1813
RG
3523 /* Most operations cannot handle bit-precision types without extra
3524 truncations. */
3525 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3526 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3527 /* Exception are bitwise binary operations. */
3528 && code != BIT_IOR_EXPR
3529 && code != BIT_XOR_EXPR
3530 && code != BIT_AND_EXPR)
3531 {
73fbfcad 3532 if (dump_enabled_p ())
78c60e3d
SS
3533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534 "bit-precision arithmetic not supported.");
7b7b1813
RG
3535 return false;
3536 }
3537
ebfd146a 3538 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3539 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3540 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3541 {
73fbfcad 3542 if (dump_enabled_p ())
78c60e3d
SS
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "use not simple.");
ebfd146a
IR
3545 return false;
3546 }
b690cc0f
RG
3547 /* If op0 is an external or constant def use a vector type with
3548 the same size as the output vector type. */
3549 if (!vectype)
3550 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3551 if (vec_stmt)
3552 gcc_assert (vectype);
3553 if (!vectype)
3554 {
73fbfcad 3555 if (dump_enabled_p ())
7d8930a0 3556 {
78c60e3d
SS
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "no vectype for scalar type ");
3559 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3560 TREE_TYPE (op0));
7d8930a0
IR
3561 }
3562
3563 return false;
3564 }
b690cc0f
RG
3565
3566 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3567 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3568 if (nunits_out != nunits_in)
3569 return false;
ebfd146a 3570
16949072 3571 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3572 {
3573 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3574 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3575 &def, &dt[1]))
ebfd146a 3576 {
73fbfcad 3577 if (dump_enabled_p ())
78c60e3d
SS
3578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3579 "use not simple.");
ebfd146a
IR
3580 return false;
3581 }
3582 }
16949072
RG
3583 if (op_type == ternary_op)
3584 {
3585 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3586 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3587 &def, &dt[2]))
16949072 3588 {
73fbfcad 3589 if (dump_enabled_p ())
78c60e3d
SS
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3591 "use not simple.");
16949072
RG
3592 return false;
3593 }
3594 }
ebfd146a 3595
b690cc0f
RG
3596 if (loop_vinfo)
3597 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3598 else
3599 vf = 1;
3600
3601 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3602 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3603 case of SLP. */
437f4a00 3604 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3605 ncopies = 1;
3606 else
3607 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3608
3609 gcc_assert (ncopies >= 1);
3610
9dc3f7de 3611 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3612 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3613 || code == RROTATE_EXPR)
9dc3f7de 3614 return false;
ebfd146a 3615
ebfd146a 3616 /* Supportable by target? */
00f07b86
RH
3617
3618 vec_mode = TYPE_MODE (vectype);
3619 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3620 {
00f07b86 3621 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3622 icode = LAST_INSN_CODE;
00f07b86
RH
3623 else
3624 icode = CODE_FOR_nothing;
ebfd146a 3625 }
00f07b86
RH
3626 else
3627 {
3628 optab = optab_for_tree_code (code, vectype, optab_default);
3629 if (!optab)
5deb57cb 3630 {
73fbfcad 3631 if (dump_enabled_p ())
78c60e3d
SS
3632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3633 "no optab.");
00f07b86 3634 return false;
5deb57cb 3635 }
00f07b86 3636 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3637 }
3638
ebfd146a
IR
3639 if (icode == CODE_FOR_nothing)
3640 {
73fbfcad 3641 if (dump_enabled_p ())
78c60e3d
SS
3642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3643 "op not supported by target.");
ebfd146a
IR
3644 /* Check only during analysis. */
3645 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3646 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3647 return false;
73fbfcad 3648 if (dump_enabled_p ())
78c60e3d 3649 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
383d9c83
IR
3650 }
3651
4a00c761 3652 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3653 if (!VECTOR_MODE_P (vec_mode)
3654 && !vec_stmt
3655 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3656 {
73fbfcad 3657 if (dump_enabled_p ())
78c60e3d
SS
3658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3659 "not worthwhile without SIMD support.");
e34842c6 3660 return false;
7d8930a0 3661 }
ebfd146a 3662
ebfd146a
IR
3663 if (!vec_stmt) /* transformation not required. */
3664 {
4a00c761 3665 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3666 if (dump_enabled_p ())
78c60e3d
SS
3667 dump_printf_loc (MSG_NOTE, vect_location,
3668 "=== vectorizable_operation ===");
c3e7ee41 3669 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3670 return true;
3671 }
3672
3673 /** Transform. **/
3674
73fbfcad 3675 if (dump_enabled_p ())
78c60e3d
SS
3676 dump_printf_loc (MSG_NOTE, vect_location,
3677 "transform binary/unary operation.");
383d9c83 3678
ebfd146a 3679 /* Handle def. */
00f07b86 3680 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3681
ebfd146a
IR
3682 /* In case the vectorization factor (VF) is bigger than the number
3683 of elements that we can fit in a vectype (nunits), we have to generate
3684 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3685 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3686 from one copy of the vector stmt to the next, in the field
3687 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3688 stages to find the correct vector defs to be used when vectorizing
3689 stmts that use the defs of the current stmt. The example below
3690 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3691 we need to create 4 vectorized stmts):
3692
3693 before vectorization:
3694 RELATED_STMT VEC_STMT
3695 S1: x = memref - -
3696 S2: z = x + 1 - -
3697
3698 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3699 there):
3700 RELATED_STMT VEC_STMT
3701 VS1_0: vx0 = memref0 VS1_1 -
3702 VS1_1: vx1 = memref1 VS1_2 -
3703 VS1_2: vx2 = memref2 VS1_3 -
3704 VS1_3: vx3 = memref3 - -
3705 S1: x = load - VS1_0
3706 S2: z = x + 1 - -
3707
3708 step2: vectorize stmt S2 (done here):
3709 To vectorize stmt S2 we first need to find the relevant vector
3710 def for the first operand 'x'. This is, as usual, obtained from
3711 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3712 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3713 relevant vector def 'vx0'. Having found 'vx0' we can generate
3714 the vector stmt VS2_0, and as usual, record it in the
3715 STMT_VINFO_VEC_STMT of stmt S2.
3716 When creating the second copy (VS2_1), we obtain the relevant vector
3717 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3718 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3719 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3720 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3721 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3722 chain of stmts and pointers:
3723 RELATED_STMT VEC_STMT
3724 VS1_0: vx0 = memref0 VS1_1 -
3725 VS1_1: vx1 = memref1 VS1_2 -
3726 VS1_2: vx2 = memref2 VS1_3 -
3727 VS1_3: vx3 = memref3 - -
3728 S1: x = load - VS1_0
3729 VS2_0: vz0 = vx0 + v1 VS2_1 -
3730 VS2_1: vz1 = vx1 + v1 VS2_2 -
3731 VS2_2: vz2 = vx2 + v1 VS2_3 -
3732 VS2_3: vz3 = vx3 + v1 - -
3733 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3734
3735 prev_stmt_info = NULL;
3736 for (j = 0; j < ncopies; j++)
3737 {
3738 /* Handle uses. */
3739 if (j == 0)
4a00c761
JJ
3740 {
3741 if (op_type == binary_op || op_type == ternary_op)
3742 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3743 slp_node, -1);
3744 else
3745 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3746 slp_node, -1);
3747 if (op_type == ternary_op)
36ba4aae 3748 {
4a00c761
JJ
3749 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3750 VEC_quick_push (tree, vec_oprnds2,
3751 vect_get_vec_def_for_operand (op2, stmt, NULL));
36ba4aae 3752 }
4a00c761 3753 }
ebfd146a 3754 else
4a00c761
JJ
3755 {
3756 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3757 if (op_type == ternary_op)
3758 {
3759 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3760 VEC_quick_push (tree, vec_oprnds2,
3761 vect_get_vec_def_for_stmt_copy (dt[2],
3762 vec_oprnd));
3763 }
3764 }
3765
3766 /* Arguments are ready. Create the new vector stmt. */
3767 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
ebfd146a 3768 {
4a00c761
JJ
3769 vop1 = ((op_type == binary_op || op_type == ternary_op)
3770 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3771 vop2 = ((op_type == ternary_op)
3772 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
73804b12
RG
3773 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3774 vop0, vop1, vop2);
4a00c761
JJ
3775 new_temp = make_ssa_name (vec_dest, new_stmt);
3776 gimple_assign_set_lhs (new_stmt, new_temp);
3777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3778 if (slp_node)
3779 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
ebfd146a
IR
3780 }
3781
4a00c761
JJ
3782 if (slp_node)
3783 continue;
3784
3785 if (j == 0)
3786 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3787 else
3788 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3789 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3790 }
3791
ebfd146a 3792 VEC_free (tree, heap, vec_oprnds0);
4a00c761
JJ
3793 if (vec_oprnds1)
3794 VEC_free (tree, heap, vec_oprnds1);
3795 if (vec_oprnds2)
3796 VEC_free (tree, heap, vec_oprnds2);
ebfd146a 3797
ebfd146a
IR
3798 return true;
3799}
3800
3801
3802/* Function vectorizable_store.
3803
b8698a0f
L
3804 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3805 can be vectorized.
3806 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3807 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3808 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3809
3810static bool
3811vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3812 slp_tree slp_node)
3813{
3814 tree scalar_dest;
3815 tree data_ref;
3816 tree op;
3817 tree vec_oprnd = NULL_TREE;
3818 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3819 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3820 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3821 tree elem_type;
ebfd146a 3822 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3823 struct loop *loop = NULL;
ebfd146a
IR
3824 enum machine_mode vec_mode;
3825 tree dummy;
3826 enum dr_alignment_support alignment_support_scheme;
3827 tree def;
3828 gimple def_stmt;
3829 enum vect_def_type dt;
3830 stmt_vec_info prev_stmt_info = NULL;
3831 tree dataref_ptr = NULL_TREE;
3832 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3833 int ncopies;
3834 int j;
3835 gimple next_stmt, first_stmt = NULL;
0d0293ac 3836 bool grouped_store = false;
272c6793 3837 bool store_lanes_p = false;
ebfd146a
IR
3838 unsigned int group_size, i;
3839 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3840 bool inv_p;
3841 VEC(tree,heap) *vec_oprnds = NULL;
3842 bool slp = (slp_node != NULL);
ebfd146a 3843 unsigned int vec_num;
a70d6342 3844 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3845 tree aggr_type;
a70d6342
IR
3846
3847 if (loop_vinfo)
3848 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3849
3850 /* Multiple types in SLP are handled by creating the appropriate number of
3851 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3852 case of SLP. */
437f4a00 3853 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3854 ncopies = 1;
3855 else
3856 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3857
3858 gcc_assert (ncopies >= 1);
3859
3860 /* FORNOW. This restriction should be relaxed. */
a70d6342 3861 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3862 {
73fbfcad 3863 if (dump_enabled_p ())
78c60e3d
SS
3864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3865 "multiple types in nested loop.");
ebfd146a
IR
3866 return false;
3867 }
3868
a70d6342 3869 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3870 return false;
3871
8644a673 3872 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3873 return false;
3874
3875 /* Is vectorizable store? */
3876
3877 if (!is_gimple_assign (stmt))
3878 return false;
3879
3880 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3881 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3882 && is_pattern_stmt_p (stmt_info))
3883 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a
IR
3884 if (TREE_CODE (scalar_dest) != ARRAY_REF
3885 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3886 && TREE_CODE (scalar_dest) != COMPONENT_REF
3887 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3888 && TREE_CODE (scalar_dest) != REALPART_EXPR
3889 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3890 return false;
3891
3892 gcc_assert (gimple_assign_single_p (stmt));
3893 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3894 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3895 &def, &dt))
ebfd146a 3896 {
73fbfcad 3897 if (dump_enabled_p ())
78c60e3d
SS
3898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3899 "use not simple.");
ebfd146a
IR
3900 return false;
3901 }
3902
272c6793 3903 elem_type = TREE_TYPE (vectype);
ebfd146a 3904 vec_mode = TYPE_MODE (vectype);
7b7b1813 3905
ebfd146a
IR
3906 /* FORNOW. In some cases can vectorize even if data-type not supported
3907 (e.g. - array initialization with 0). */
947131ba 3908 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3909 return false;
3910
3911 if (!STMT_VINFO_DATA_REF (stmt_info))
3912 return false;
3913
a7ce6ec3
RG
3914 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3915 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3916 size_zero_node) < 0)
a1e53f3f 3917 {
73fbfcad 3918 if (dump_enabled_p ())
78c60e3d
SS
3919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3920 "negative step for store.");
a1e53f3f
L
3921 return false;
3922 }
3923
0d0293ac 3924 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3925 {
0d0293ac 3926 grouped_store = true;
e14c1050 3927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3928 if (!slp && !PURE_SLP_STMT (stmt_info))
3929 {
e14c1050 3930 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3931 if (vect_store_lanes_supported (vectype, group_size))
3932 store_lanes_p = true;
0d0293ac 3933 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3934 return false;
3935 }
b8698a0f 3936
ebfd146a
IR
3937 if (first_stmt == stmt)
3938 {
3939 /* STMT is the leader of the group. Check the operands of all the
3940 stmts of the group. */
e14c1050 3941 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3942 while (next_stmt)
3943 {
3944 gcc_assert (gimple_assign_single_p (next_stmt));
3945 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3946 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3947 &def_stmt, &def, &dt))
ebfd146a 3948 {
73fbfcad 3949 if (dump_enabled_p ())
78c60e3d
SS
3950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3951 "use not simple.");
ebfd146a
IR
3952 return false;
3953 }
e14c1050 3954 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3955 }
3956 }
3957 }
3958
3959 if (!vec_stmt) /* transformation not required. */
3960 {
3961 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
3962 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3963 NULL, NULL, NULL);
ebfd146a
IR
3964 return true;
3965 }
3966
3967 /** Transform. **/
3968
0d0293ac 3969 if (grouped_store)
ebfd146a
IR
3970 {
3971 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 3972 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 3973
e14c1050 3974 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
3975
3976 /* FORNOW */
a70d6342 3977 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3978
3979 /* We vectorize all the stmts of the interleaving group when we
3980 reach the last stmt in the group. */
e14c1050
IR
3981 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3982 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3983 && !slp)
3984 {
3985 *vec_stmt = NULL;
3986 return true;
3987 }
3988
3989 if (slp)
4b5caab7 3990 {
0d0293ac 3991 grouped_store = false;
4b5caab7
IR
3992 /* VEC_NUM is the number of vect stmts to be created for this
3993 group. */
3994 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3995 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3996 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 3997 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 3998 }
ebfd146a 3999 else
4b5caab7
IR
4000 /* VEC_NUM is the number of vect stmts to be created for this
4001 group. */
ebfd146a
IR
4002 vec_num = group_size;
4003 }
b8698a0f 4004 else
ebfd146a
IR
4005 {
4006 first_stmt = stmt;
4007 first_dr = dr;
4008 group_size = vec_num = 1;
ebfd146a 4009 }
b8698a0f 4010
73fbfcad 4011 if (dump_enabled_p ())
78c60e3d
SS
4012 dump_printf_loc (MSG_NOTE, vect_location,
4013 "transform store. ncopies = %d", ncopies);
ebfd146a
IR
4014
4015 dr_chain = VEC_alloc (tree, heap, group_size);
4016 oprnds = VEC_alloc (tree, heap, group_size);
4017
720f5239 4018 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4019 gcc_assert (alignment_support_scheme);
272c6793
RS
4020 /* Targets with store-lane instructions must not require explicit
4021 realignment. */
4022 gcc_assert (!store_lanes_p
4023 || alignment_support_scheme == dr_aligned
4024 || alignment_support_scheme == dr_unaligned_supported);
4025
4026 if (store_lanes_p)
4027 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4028 else
4029 aggr_type = vectype;
ebfd146a
IR
4030
4031 /* In case the vectorization factor (VF) is bigger than the number
4032 of elements that we can fit in a vectype (nunits), we have to generate
4033 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4034 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4035 vect_get_vec_def_for_copy_stmt. */
4036
0d0293ac 4037 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4038
4039 S1: &base + 2 = x2
4040 S2: &base = x0
4041 S3: &base + 1 = x1
4042 S4: &base + 3 = x3
4043
4044 We create vectorized stores starting from base address (the access of the
4045 first stmt in the chain (S2 in the above example), when the last store stmt
4046 of the chain (S4) is reached:
4047
4048 VS1: &base = vx2
4049 VS2: &base + vec_size*1 = vx0
4050 VS3: &base + vec_size*2 = vx1
4051 VS4: &base + vec_size*3 = vx3
4052
4053 Then permutation statements are generated:
4054
3fcc1b55
JJ
4055 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4056 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4057 ...
b8698a0f 4058
ebfd146a
IR
4059 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4060 (the order of the data-refs in the output of vect_permute_store_chain
4061 corresponds to the order of scalar stmts in the interleaving chain - see
4062 the documentation of vect_permute_store_chain()).
4063
4064 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4065 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4066 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4067 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4068 */
4069
4070 prev_stmt_info = NULL;
4071 for (j = 0; j < ncopies; j++)
4072 {
4073 gimple new_stmt;
4074 gimple ptr_incr;
4075
4076 if (j == 0)
4077 {
4078 if (slp)
4079 {
4080 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4081 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4082 NULL, slp_node, -1);
ebfd146a
IR
4083
4084 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4085 }
4086 else
4087 {
b8698a0f
L
4088 /* For interleaved stores we collect vectorized defs for all the
4089 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4090 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4091 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4092
0d0293ac 4093 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4094 OPRNDS are of size 1. */
b8698a0f 4095 next_stmt = first_stmt;
ebfd146a
IR
4096 for (i = 0; i < group_size; i++)
4097 {
b8698a0f
L
4098 /* Since gaps are not supported for interleaved stores,
4099 GROUP_SIZE is the exact number of stmts in the chain.
4100 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4101 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4102 iteration of the loop will be executed. */
4103 gcc_assert (next_stmt
4104 && gimple_assign_single_p (next_stmt));
4105 op = gimple_assign_rhs1 (next_stmt);
4106
b8698a0f 4107 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4108 NULL);
b8698a0f
L
4109 VEC_quick_push(tree, dr_chain, vec_oprnd);
4110 VEC_quick_push(tree, oprnds, vec_oprnd);
e14c1050 4111 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4112 }
4113 }
4114
4115 /* We should have catched mismatched types earlier. */
4116 gcc_assert (useless_type_conversion_p (vectype,
4117 TREE_TYPE (vec_oprnd)));
272c6793 4118 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
920e8172
RS
4119 NULL_TREE, &dummy, gsi,
4120 &ptr_incr, false, &inv_p);
a70d6342 4121 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4122 }
b8698a0f 4123 else
ebfd146a 4124 {
b8698a0f
L
4125 /* For interleaved stores we created vectorized defs for all the
4126 defs stored in OPRNDS in the previous iteration (previous copy).
4127 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4128 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4129 next copy.
0d0293ac 4130 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4131 OPRNDS are of size 1. */
4132 for (i = 0; i < group_size; i++)
4133 {
4134 op = VEC_index (tree, oprnds, i);
24ee1384
IR
4135 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4136 &def, &dt);
b8698a0f 4137 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
ebfd146a
IR
4138 VEC_replace(tree, dr_chain, i, vec_oprnd);
4139 VEC_replace(tree, oprnds, i, vec_oprnd);
4140 }
272c6793
RS
4141 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4142 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4143 }
4144
272c6793 4145 if (store_lanes_p)
ebfd146a 4146 {
272c6793 4147 tree vec_array;
267d3070 4148
272c6793
RS
4149 /* Combine all the vectors into an array. */
4150 vec_array = create_vector_array (vectype, vec_num);
4151 for (i = 0; i < vec_num; i++)
c2d7ab2a 4152 {
272c6793
RS
4153 vec_oprnd = VEC_index (tree, dr_chain, i);
4154 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4155 }
b8698a0f 4156
272c6793
RS
4157 /* Emit:
4158 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4159 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4160 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4161 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4163 }
4164 else
4165 {
4166 new_stmt = NULL;
0d0293ac 4167 if (grouped_store)
272c6793
RS
4168 {
4169 result_chain = VEC_alloc (tree, heap, group_size);
4170 /* Permute. */
4171 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4172 &result_chain);
4173 }
c2d7ab2a 4174
272c6793
RS
4175 next_stmt = first_stmt;
4176 for (i = 0; i < vec_num; i++)
4177 {
644ffefd 4178 unsigned align, misalign;
272c6793
RS
4179
4180 if (i > 0)
4181 /* Bump the vector pointer. */
4182 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4183 stmt, NULL_TREE);
4184
4185 if (slp)
4186 vec_oprnd = VEC_index (tree, vec_oprnds, i);
0d0293ac
MM
4187 else if (grouped_store)
4188 /* For grouped stores vectorized defs are interleaved in
272c6793
RS
4189 vect_permute_store_chain(). */
4190 vec_oprnd = VEC_index (tree, result_chain, i);
4191
4192 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4193 build_int_cst (reference_alias_ptr_type
4194 (DR_REF (first_dr)), 0));
644ffefd 4195 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4196 if (aligned_access_p (first_dr))
644ffefd 4197 misalign = 0;
272c6793
RS
4198 else if (DR_MISALIGNMENT (first_dr) == -1)
4199 {
4200 TREE_TYPE (data_ref)
4201 = build_aligned_type (TREE_TYPE (data_ref),
4202 TYPE_ALIGN (elem_type));
644ffefd
MJ
4203 align = TYPE_ALIGN_UNIT (elem_type);
4204 misalign = 0;
272c6793
RS
4205 }
4206 else
4207 {
4208 TREE_TYPE (data_ref)
4209 = build_aligned_type (TREE_TYPE (data_ref),
4210 TYPE_ALIGN (elem_type));
644ffefd 4211 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4212 }
644ffefd
MJ
4213 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4214 misalign);
c2d7ab2a 4215
272c6793
RS
4216 /* Arguments are ready. Create the new vector stmt. */
4217 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4219
4220 if (slp)
4221 continue;
4222
e14c1050 4223 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4224 if (!next_stmt)
4225 break;
4226 }
ebfd146a 4227 }
1da0876c
RS
4228 if (!slp)
4229 {
4230 if (j == 0)
4231 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4232 else
4233 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4234 prev_stmt_info = vinfo_for_stmt (new_stmt);
4235 }
ebfd146a
IR
4236 }
4237
b8698a0f
L
4238 VEC_free (tree, heap, dr_chain);
4239 VEC_free (tree, heap, oprnds);
ebfd146a 4240 if (result_chain)
b8698a0f 4241 VEC_free (tree, heap, result_chain);
ff802fa1
IR
4242 if (vec_oprnds)
4243 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
4244
4245 return true;
4246}
4247
aec7ae7d
JJ
4248/* Given a vector type VECTYPE and permutation SEL returns
4249 the VECTOR_CST mask that implements the permutation of the
4250 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4251
3fcc1b55
JJ
4252tree
4253vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4254{
d2a12ae7 4255 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4256 int i, nunits;
a1e53f3f 4257
22e4dee7 4258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4259
4260 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4261 return NULL;
4262
96f9265a
RG
4263 mask_elt_type = lang_hooks.types.type_for_mode
4264 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4265 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4266
d2a12ae7 4267 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4268 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4269 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4270 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4271
2635892a 4272 return mask_vec;
a1e53f3f
L
4273}
4274
aec7ae7d
JJ
4275/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4276 reversal of the vector elements. If that is impossible to do,
4277 returns NULL. */
4278
4279static tree
4280perm_mask_for_reverse (tree vectype)
4281{
4282 int i, nunits;
4283 unsigned char *sel;
4284
4285 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4286 sel = XALLOCAVEC (unsigned char, nunits);
4287
4288 for (i = 0; i < nunits; ++i)
4289 sel[i] = nunits - 1 - i;
4290
3fcc1b55 4291 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4292}
4293
4294/* Given a vector variable X and Y, that was generated for the scalar
4295 STMT, generate instructions to permute the vector elements of X and Y
4296 using permutation mask MASK_VEC, insert them at *GSI and return the
4297 permuted vector variable. */
a1e53f3f
L
4298
4299static tree
aec7ae7d
JJ
4300permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4301 gimple_stmt_iterator *gsi)
a1e53f3f
L
4302{
4303 tree vectype = TREE_TYPE (x);
aec7ae7d 4304 tree perm_dest, data_ref;
a1e53f3f
L
4305 gimple perm_stmt;
4306
a1e53f3f 4307 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4308 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4309
4310 /* Generate the permute statement. */
73804b12
RG
4311 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4312 x, y, mask_vec);
a1e53f3f
L
4313 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4314
4315 return data_ref;
4316}
4317
ebfd146a
IR
4318/* vectorizable_load.
4319
b8698a0f
L
4320 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4321 can be vectorized.
4322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4323 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4325
4326static bool
4327vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4328 slp_tree slp_node, slp_instance slp_node_instance)
4329{
4330 tree scalar_dest;
4331 tree vec_dest = NULL;
4332 tree data_ref = NULL;
4333 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4334 stmt_vec_info prev_stmt_info;
ebfd146a 4335 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4336 struct loop *loop = NULL;
ebfd146a 4337 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4338 bool nested_in_vect_loop = false;
ebfd146a
IR
4339 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4340 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4341 tree elem_type;
ebfd146a 4342 tree new_temp;
947131ba 4343 enum machine_mode mode;
ebfd146a
IR
4344 gimple new_stmt = NULL;
4345 tree dummy;
4346 enum dr_alignment_support alignment_support_scheme;
4347 tree dataref_ptr = NULL_TREE;
4348 gimple ptr_incr;
4349 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4350 int ncopies;
4351 int i, j, group_size;
4352 tree msq = NULL_TREE, lsq;
4353 tree offset = NULL_TREE;
4354 tree realignment_token = NULL_TREE;
4355 gimple phi = NULL;
4356 VEC(tree,heap) *dr_chain = NULL;
0d0293ac 4357 bool grouped_load = false;
272c6793 4358 bool load_lanes_p = false;
ebfd146a 4359 gimple first_stmt;
ebfd146a 4360 bool inv_p;
319e6439 4361 bool negative = false;
ebfd146a
IR
4362 bool compute_in_loop = false;
4363 struct loop *at_loop;
4364 int vec_num;
4365 bool slp = (slp_node != NULL);
4366 bool slp_perm = false;
4367 enum tree_code code;
a70d6342
IR
4368 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4369 int vf;
272c6793 4370 tree aggr_type;
aec7ae7d
JJ
4371 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4372 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
7d75abc8 4373 tree stride_base, stride_step;
aec7ae7d
JJ
4374 int gather_scale = 1;
4375 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4376
4377 if (loop_vinfo)
4378 {
4379 loop = LOOP_VINFO_LOOP (loop_vinfo);
4380 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4381 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4382 }
4383 else
3533e503 4384 vf = 1;
ebfd146a
IR
4385
4386 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4387 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4388 case of SLP. */
437f4a00 4389 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4390 ncopies = 1;
4391 else
4392 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4393
4394 gcc_assert (ncopies >= 1);
4395
4396 /* FORNOW. This restriction should be relaxed. */
4397 if (nested_in_vect_loop && ncopies > 1)
4398 {
73fbfcad 4399 if (dump_enabled_p ())
78c60e3d
SS
4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401 "multiple types in nested loop.");
ebfd146a
IR
4402 return false;
4403 }
4404
a70d6342 4405 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4406 return false;
4407
8644a673 4408 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4409 return false;
4410
4411 /* Is vectorizable load? */
4412 if (!is_gimple_assign (stmt))
4413 return false;
4414
4415 scalar_dest = gimple_assign_lhs (stmt);
4416 if (TREE_CODE (scalar_dest) != SSA_NAME)
4417 return false;
4418
4419 code = gimple_assign_rhs_code (stmt);
4420 if (code != ARRAY_REF
4421 && code != INDIRECT_REF
e9dbe7bb
IR
4422 && code != COMPONENT_REF
4423 && code != IMAGPART_EXPR
70f34814 4424 && code != REALPART_EXPR
42373e0b
RG
4425 && code != MEM_REF
4426 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4427 return false;
4428
4429 if (!STMT_VINFO_DATA_REF (stmt_info))
4430 return false;
4431
7b7b1813 4432 elem_type = TREE_TYPE (vectype);
947131ba 4433 mode = TYPE_MODE (vectype);
ebfd146a
IR
4434
4435 /* FORNOW. In some cases can vectorize even if data-type not supported
4436 (e.g. - data copies). */
947131ba 4437 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4438 {
73fbfcad 4439 if (dump_enabled_p ())
78c60e3d
SS
4440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4441 "Aligned load, but unsupported type.");
ebfd146a
IR
4442 return false;
4443 }
4444
ebfd146a 4445 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4446 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4447 {
0d0293ac 4448 grouped_load = true;
ebfd146a 4449 /* FORNOW */
aec7ae7d 4450 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4451
e14c1050 4452 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4453 if (!slp && !PURE_SLP_STMT (stmt_info))
4454 {
e14c1050 4455 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4456 if (vect_load_lanes_supported (vectype, group_size))
4457 load_lanes_p = true;
0d0293ac 4458 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4459 return false;
4460 }
ebfd146a
IR
4461 }
4462
a1e53f3f 4463
aec7ae7d
JJ
4464 if (STMT_VINFO_GATHER_P (stmt_info))
4465 {
4466 gimple def_stmt;
4467 tree def;
4468 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4469 &gather_off, &gather_scale);
4470 gcc_assert (gather_decl);
24ee1384 4471 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4472 &def_stmt, &def, &gather_dt,
4473 &gather_off_vectype))
4474 {
73fbfcad 4475 if (dump_enabled_p ())
78c60e3d
SS
4476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4477 "gather index use not simple.");
aec7ae7d
JJ
4478 return false;
4479 }
4480 }
7d75abc8
MM
4481 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4482 {
319e6439
RG
4483 if (!vect_check_strided_load (stmt, loop_vinfo,
4484 &stride_base, &stride_step))
4485 return false;
4486 }
4487 else
4488 {
4489 negative = tree_int_cst_compare (nested_in_vect_loop
4490 ? STMT_VINFO_DR_STEP (stmt_info)
4491 : DR_STEP (dr),
4492 size_zero_node) < 0;
4493 if (negative && ncopies > 1)
4494 {
73fbfcad 4495 if (dump_enabled_p ())
78c60e3d
SS
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4497 "multiple types with negative step.");
319e6439
RG
4498 return false;
4499 }
4500
4501 if (negative)
4502 {
4503 gcc_assert (!grouped_load);
4504 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4505 if (alignment_support_scheme != dr_aligned
4506 && alignment_support_scheme != dr_unaligned_supported)
4507 {
73fbfcad 4508 if (dump_enabled_p ())
78c60e3d
SS
4509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4510 "negative step but alignment required.");
319e6439
RG
4511 return false;
4512 }
4513 if (!perm_mask_for_reverse (vectype))
4514 {
73fbfcad 4515 if (dump_enabled_p ())
78c60e3d
SS
4516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4517 "negative step and reversing not supported.");
319e6439
RG
4518 return false;
4519 }
4520 }
7d75abc8 4521 }
aec7ae7d 4522
ebfd146a
IR
4523 if (!vec_stmt) /* transformation not required. */
4524 {
4525 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4526 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4527 return true;
4528 }
4529
73fbfcad 4530 if (dump_enabled_p ())
78c60e3d
SS
4531 dump_printf_loc (MSG_NOTE, vect_location,
4532 "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4533
4534 /** Transform. **/
4535
aec7ae7d
JJ
4536 if (STMT_VINFO_GATHER_P (stmt_info))
4537 {
4538 tree vec_oprnd0 = NULL_TREE, op;
4539 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4540 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4541 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4542 edge pe = loop_preheader_edge (loop);
4543 gimple_seq seq;
4544 basic_block new_bb;
4545 enum { NARROW, NONE, WIDEN } modifier;
4546 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4547
4548 if (nunits == gather_off_nunits)
4549 modifier = NONE;
4550 else if (nunits == gather_off_nunits / 2)
4551 {
4552 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4553 modifier = WIDEN;
4554
4555 for (i = 0; i < gather_off_nunits; ++i)
4556 sel[i] = i | nunits;
4557
3fcc1b55 4558 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4559 gcc_assert (perm_mask != NULL_TREE);
4560 }
4561 else if (nunits == gather_off_nunits * 2)
4562 {
4563 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4564 modifier = NARROW;
4565
4566 for (i = 0; i < nunits; ++i)
4567 sel[i] = i < gather_off_nunits
4568 ? i : i + nunits - gather_off_nunits;
4569
3fcc1b55 4570 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4571 gcc_assert (perm_mask != NULL_TREE);
4572 ncopies *= 2;
4573 }
4574 else
4575 gcc_unreachable ();
4576
4577 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4578 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4579 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4580 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4581 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4582 scaletype = TREE_VALUE (arglist);
4583 gcc_checking_assert (types_compatible_p (srctype, rettype)
4584 && types_compatible_p (srctype, masktype));
4585
4586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4587
4588 ptr = fold_convert (ptrtype, gather_base);
4589 if (!is_gimple_min_invariant (ptr))
4590 {
4591 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4592 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4593 gcc_assert (!new_bb);
4594 }
4595
4596 /* Currently we support only unconditional gather loads,
4597 so mask should be all ones. */
4598 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4599 mask = build_int_cst (TREE_TYPE (masktype), -1);
4600 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4601 {
4602 REAL_VALUE_TYPE r;
4603 long tmp[6];
4604 for (j = 0; j < 6; ++j)
4605 tmp[j] = -1;
4606 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4607 mask = build_real (TREE_TYPE (masktype), r);
4608 }
4609 else
4610 gcc_unreachable ();
4611 mask = build_vector_from_val (masktype, mask);
4612 mask = vect_init_vector (stmt, mask, masktype, NULL);
4613
4614 scale = build_int_cst (scaletype, gather_scale);
4615
4616 prev_stmt_info = NULL;
4617 for (j = 0; j < ncopies; ++j)
4618 {
4619 if (modifier == WIDEN && (j & 1))
4620 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4621 perm_mask, stmt, gsi);
4622 else if (j == 0)
4623 op = vec_oprnd0
4624 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4625 else
4626 op = vec_oprnd0
4627 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4628
4629 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4630 {
4631 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4632 == TYPE_VECTOR_SUBPARTS (idxtype));
4633 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4634 var = make_ssa_name (var, NULL);
4635 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4636 new_stmt
4637 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4638 op, NULL_TREE);
4639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4640 op = var;
4641 }
4642
4643 new_stmt
4644 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4645
4646 if (!useless_type_conversion_p (vectype, rettype))
4647 {
4648 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4649 == TYPE_VECTOR_SUBPARTS (rettype));
4650 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4651 op = make_ssa_name (var, new_stmt);
4652 gimple_call_set_lhs (new_stmt, op);
4653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4654 var = make_ssa_name (vec_dest, NULL);
4655 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4656 new_stmt
4657 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4658 NULL_TREE);
4659 }
4660 else
4661 {
4662 var = make_ssa_name (vec_dest, new_stmt);
4663 gimple_call_set_lhs (new_stmt, var);
4664 }
4665
4666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4667
4668 if (modifier == NARROW)
4669 {
4670 if ((j & 1) == 0)
4671 {
4672 prev_res = var;
4673 continue;
4674 }
4675 var = permute_vec_elements (prev_res, var,
4676 perm_mask, stmt, gsi);
4677 new_stmt = SSA_NAME_DEF_STMT (var);
4678 }
4679
4680 if (prev_stmt_info == NULL)
4681 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4682 else
4683 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4684 prev_stmt_info = vinfo_for_stmt (new_stmt);
4685 }
4686 return true;
4687 }
7d75abc8
MM
4688 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4689 {
4690 gimple_stmt_iterator incr_gsi;
4691 bool insert_after;
4692 gimple incr;
4693 tree offvar;
4694 tree ref = DR_REF (dr);
4695 tree ivstep;
4696 tree running_off;
4697 VEC(constructor_elt, gc) *v = NULL;
4698 gimple_seq stmts = NULL;
4699
4700 gcc_assert (stride_base && stride_step);
4701
4702 /* For a load with loop-invariant (but other than power-of-2)
4703 stride (i.e. not a grouped access) like so:
4704
4705 for (i = 0; i < n; i += stride)
4706 ... = array[i];
4707
4708 we generate a new induction variable and new accesses to
4709 form a new vector (or vectors, depending on ncopies):
4710
4711 for (j = 0; ; j += VF*stride)
4712 tmp1 = array[j];
4713 tmp2 = array[j + stride];
4714 ...
4715 vectemp = {tmp1, tmp2, ...}
4716 */
4717
4718 ivstep = stride_step;
4719 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4720 build_int_cst (TREE_TYPE (ivstep), vf));
4721
4722 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4723
4724 create_iv (stride_base, ivstep, NULL,
4725 loop, &incr_gsi, insert_after,
4726 &offvar, NULL);
4727 incr = gsi_stmt (incr_gsi);
4728 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4729
4730 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4731 if (stmts)
4732 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4733
4734 prev_stmt_info = NULL;
4735 running_off = offvar;
4736 for (j = 0; j < ncopies; j++)
4737 {
4738 tree vec_inv;
4739
4740 v = VEC_alloc (constructor_elt, gc, nunits);
4741 for (i = 0; i < nunits; i++)
4742 {
4743 tree newref, newoff;
4744 gimple incr;
4745 if (TREE_CODE (ref) == ARRAY_REF)
cded4e9e
JJ
4746 {
4747 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4748 unshare_expr (TREE_OPERAND (ref, 0)),
4749 running_off,
4750 NULL_TREE, NULL_TREE);
4751 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4752 TREE_TYPE (newref)))
4753 newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
4754 newref);
4755 }
7d75abc8 4756 else
cded4e9e 4757 newref = build2 (MEM_REF, TREE_TYPE (vectype),
7d75abc8
MM
4758 running_off,
4759 TREE_OPERAND (ref, 1));
4760
4761 newref = force_gimple_operand_gsi (gsi, newref, true,
4762 NULL_TREE, true,
4763 GSI_SAME_STMT);
4764 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4765 newoff = copy_ssa_name (running_off, NULL);
7d75abc8
MM
4766 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4767 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4768 running_off, stride_step);
4769 else
4770 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4771 running_off, stride_step);
7d75abc8
MM
4772 vect_finish_stmt_generation (stmt, incr, gsi);
4773
4774 running_off = newoff;
4775 }
4776
4777 vec_inv = build_constructor (vectype, v);
4778 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4779 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4780
4781 if (j == 0)
4782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4783 else
4784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4785 prev_stmt_info = vinfo_for_stmt (new_stmt);
4786 }
4787 return true;
4788 }
aec7ae7d 4789
0d0293ac 4790 if (grouped_load)
ebfd146a 4791 {
e14c1050 4792 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4
IR
4793 if (slp
4794 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4795 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4796 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4797
ebfd146a
IR
4798 /* Check if the chain of loads is already vectorized. */
4799 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4800 {
4801 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4802 return true;
4803 }
4804 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4805 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4806
4807 /* VEC_NUM is the number of vect stmts to be created for this group. */
4808 if (slp)
4809 {
0d0293ac 4810 grouped_load = false;
ebfd146a 4811 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
a70d6342
IR
4812 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4813 slp_perm = true;
4814 }
ebfd146a
IR
4815 else
4816 vec_num = group_size;
ebfd146a
IR
4817 }
4818 else
4819 {
4820 first_stmt = stmt;
4821 first_dr = dr;
4822 group_size = vec_num = 1;
4823 }
4824
720f5239 4825 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4826 gcc_assert (alignment_support_scheme);
272c6793
RS
4827 /* Targets with load-lane instructions must not require explicit
4828 realignment. */
4829 gcc_assert (!load_lanes_p
4830 || alignment_support_scheme == dr_aligned
4831 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4832
4833 /* In case the vectorization factor (VF) is bigger than the number
4834 of elements that we can fit in a vectype (nunits), we have to generate
4835 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4836 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4837 from one copy of the vector stmt to the next, in the field
ff802fa1 4838 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4839 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4840 stmts that use the defs of the current stmt. The example below
4841 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4842 need to create 4 vectorized stmts):
ebfd146a
IR
4843
4844 before vectorization:
4845 RELATED_STMT VEC_STMT
4846 S1: x = memref - -
4847 S2: z = x + 1 - -
4848
4849 step 1: vectorize stmt S1:
4850 We first create the vector stmt VS1_0, and, as usual, record a
4851 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4852 Next, we create the vector stmt VS1_1, and record a pointer to
4853 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4854 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4855 stmts and pointers:
4856 RELATED_STMT VEC_STMT
4857 VS1_0: vx0 = memref0 VS1_1 -
4858 VS1_1: vx1 = memref1 VS1_2 -
4859 VS1_2: vx2 = memref2 VS1_3 -
4860 VS1_3: vx3 = memref3 - -
4861 S1: x = load - VS1_0
4862 S2: z = x + 1 - -
4863
b8698a0f
L
4864 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4865 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4866 stmt S2. */
4867
0d0293ac 4868 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4869
4870 S1: x2 = &base + 2
4871 S2: x0 = &base
4872 S3: x1 = &base + 1
4873 S4: x3 = &base + 3
4874
b8698a0f 4875 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4876 starting from the access of the first stmt of the chain:
4877
4878 VS1: vx0 = &base
4879 VS2: vx1 = &base + vec_size*1
4880 VS3: vx3 = &base + vec_size*2
4881 VS4: vx4 = &base + vec_size*3
4882
4883 Then permutation statements are generated:
4884
e2c83630
RH
4885 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4886 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4887 ...
4888
4889 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4890 (the order of the data-refs in the output of vect_permute_load_chain
4891 corresponds to the order of scalar stmts in the interleaving chain - see
4892 the documentation of vect_permute_load_chain()).
4893 The generation of permutation stmts and recording them in
0d0293ac 4894 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4895
b8698a0f 4896 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4897 permutation stmts above are created for every copy. The result vector
4898 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4899 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4900
4901 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4902 on a target that supports unaligned accesses (dr_unaligned_supported)
4903 we generate the following code:
4904 p = initial_addr;
4905 indx = 0;
4906 loop {
4907 p = p + indx * vectype_size;
4908 vec_dest = *(p);
4909 indx = indx + 1;
4910 }
4911
4912 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4913 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4914 then generate the following code, in which the data in each iteration is
4915 obtained by two vector loads, one from the previous iteration, and one
4916 from the current iteration:
4917 p1 = initial_addr;
4918 msq_init = *(floor(p1))
4919 p2 = initial_addr + VS - 1;
4920 realignment_token = call target_builtin;
4921 indx = 0;
4922 loop {
4923 p2 = p2 + indx * vectype_size
4924 lsq = *(floor(p2))
4925 vec_dest = realign_load (msq, lsq, realignment_token)
4926 indx = indx + 1;
4927 msq = lsq;
4928 } */
4929
4930 /* If the misalignment remains the same throughout the execution of the
4931 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4932 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4933 This can only occur when vectorizing memory accesses in the inner-loop
4934 nested within an outer-loop that is being vectorized. */
4935
d1e4b493 4936 if (nested_in_vect_loop
211bea38 4937 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
4938 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4939 {
4940 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4941 compute_in_loop = true;
4942 }
4943
4944 if ((alignment_support_scheme == dr_explicit_realign_optimized
4945 || alignment_support_scheme == dr_explicit_realign)
4946 && !compute_in_loop)
4947 {
4948 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4949 alignment_support_scheme, NULL_TREE,
4950 &at_loop);
4951 if (alignment_support_scheme == dr_explicit_realign_optimized)
4952 {
4953 phi = SSA_NAME_DEF_STMT (msq);
4954 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4955 }
4956 }
4957 else
4958 at_loop = loop;
4959
a1e53f3f
L
4960 if (negative)
4961 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4962
272c6793
RS
4963 if (load_lanes_p)
4964 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4965 else
4966 aggr_type = vectype;
4967
ebfd146a
IR
4968 prev_stmt_info = NULL;
4969 for (j = 0; j < ncopies; j++)
b8698a0f 4970 {
272c6793 4971 /* 1. Create the vector or array pointer update chain. */
ebfd146a 4972 if (j == 0)
272c6793 4973 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
920e8172
RS
4974 offset, &dummy, gsi,
4975 &ptr_incr, false, &inv_p);
ebfd146a 4976 else
272c6793
RS
4977 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4978 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 4979
0d0293ac 4980 if (grouped_load || slp_perm)
5ce1ee7f
RS
4981 dr_chain = VEC_alloc (tree, heap, vec_num);
4982
272c6793 4983 if (load_lanes_p)
ebfd146a 4984 {
272c6793
RS
4985 tree vec_array;
4986
4987 vec_array = create_vector_array (vectype, vec_num);
4988
4989 /* Emit:
4990 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4991 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4992 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4993 gimple_call_set_lhs (new_stmt, vec_array);
4994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 4995
272c6793
RS
4996 /* Extract each vector into an SSA_NAME. */
4997 for (i = 0; i < vec_num; i++)
ebfd146a 4998 {
272c6793
RS
4999 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5000 vec_array, i);
5001 VEC_quick_push (tree, dr_chain, new_temp);
5002 }
5003
5004 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5005 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5006 }
5007 else
5008 {
5009 for (i = 0; i < vec_num; i++)
5010 {
5011 if (i > 0)
5012 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5013 stmt, NULL_TREE);
5014
5015 /* 2. Create the vector-load in the loop. */
5016 switch (alignment_support_scheme)
5017 {
5018 case dr_aligned:
5019 case dr_unaligned_supported:
be1ac4ec 5020 {
644ffefd
MJ
5021 unsigned int align, misalign;
5022
272c6793
RS
5023 data_ref
5024 = build2 (MEM_REF, vectype, dataref_ptr,
5025 build_int_cst (reference_alias_ptr_type
5026 (DR_REF (first_dr)), 0));
644ffefd 5027 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5028 if (alignment_support_scheme == dr_aligned)
5029 {
5030 gcc_assert (aligned_access_p (first_dr));
644ffefd 5031 misalign = 0;
272c6793
RS
5032 }
5033 else if (DR_MISALIGNMENT (first_dr) == -1)
5034 {
5035 TREE_TYPE (data_ref)
5036 = build_aligned_type (TREE_TYPE (data_ref),
5037 TYPE_ALIGN (elem_type));
644ffefd
MJ
5038 align = TYPE_ALIGN_UNIT (elem_type);
5039 misalign = 0;
272c6793
RS
5040 }
5041 else
5042 {
5043 TREE_TYPE (data_ref)
5044 = build_aligned_type (TREE_TYPE (data_ref),
5045 TYPE_ALIGN (elem_type));
644ffefd 5046 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5047 }
644ffefd
MJ
5048 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5049 align, misalign);
272c6793 5050 break;
be1ac4ec 5051 }
272c6793 5052 case dr_explicit_realign:
267d3070 5053 {
272c6793
RS
5054 tree ptr, bump;
5055 tree vs_minus_1;
5056
5057 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5058
5059 if (compute_in_loop)
5060 msq = vect_setup_realignment (first_stmt, gsi,
5061 &realignment_token,
5062 dr_explicit_realign,
5063 dataref_ptr, NULL);
5064
070ecdfd 5065 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5066 new_stmt = gimple_build_assign_with_ops
070ecdfd 5067 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5068 build_int_cst
5069 (TREE_TYPE (dataref_ptr),
5070 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5071 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5072 data_ref
5073 = build2 (MEM_REF, vectype, ptr,
5074 build_int_cst (reference_alias_ptr_type
5075 (DR_REF (first_dr)), 0));
5076 vec_dest = vect_create_destination_var (scalar_dest,
5077 vectype);
5078 new_stmt = gimple_build_assign (vec_dest, data_ref);
5079 new_temp = make_ssa_name (vec_dest, new_stmt);
5080 gimple_assign_set_lhs (new_stmt, new_temp);
5081 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5082 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5083 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5084 msq = new_temp;
5085
5086 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5087 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5088 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5089 new_stmt = gimple_build_assign_with_ops
5090 (BIT_AND_EXPR, NULL_TREE, ptr,
5091 build_int_cst
5092 (TREE_TYPE (ptr),
5093 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5094 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5095 gimple_assign_set_lhs (new_stmt, ptr);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5097 data_ref
5098 = build2 (MEM_REF, vectype, ptr,
5099 build_int_cst (reference_alias_ptr_type
5100 (DR_REF (first_dr)), 0));
5101 break;
267d3070 5102 }
272c6793 5103 case dr_explicit_realign_optimized:
070ecdfd 5104 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5105 new_stmt = gimple_build_assign_with_ops
070ecdfd 5106 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5107 build_int_cst
5108 (TREE_TYPE (dataref_ptr),
5109 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5111 data_ref
5112 = build2 (MEM_REF, vectype, new_temp,
5113 build_int_cst (reference_alias_ptr_type
5114 (DR_REF (first_dr)), 0));
5115 break;
5116 default:
5117 gcc_unreachable ();
5118 }
ebfd146a 5119 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5120 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5121 new_temp = make_ssa_name (vec_dest, new_stmt);
5122 gimple_assign_set_lhs (new_stmt, new_temp);
5123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5124
272c6793
RS
5125 /* 3. Handle explicit realignment if necessary/supported.
5126 Create in loop:
5127 vec_dest = realign_load (msq, lsq, realignment_token) */
5128 if (alignment_support_scheme == dr_explicit_realign_optimized
5129 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5130 {
272c6793
RS
5131 lsq = gimple_assign_lhs (new_stmt);
5132 if (!realignment_token)
5133 realignment_token = dataref_ptr;
5134 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5135 new_stmt
73804b12
RG
5136 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5137 vec_dest, msq, lsq,
5138 realignment_token);
272c6793
RS
5139 new_temp = make_ssa_name (vec_dest, new_stmt);
5140 gimple_assign_set_lhs (new_stmt, new_temp);
5141 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5142
5143 if (alignment_support_scheme == dr_explicit_realign_optimized)
5144 {
5145 gcc_assert (phi);
5146 if (i == vec_num - 1 && j == ncopies - 1)
5147 add_phi_arg (phi, lsq,
5148 loop_latch_edge (containing_loop),
9e227d60 5149 UNKNOWN_LOCATION);
272c6793
RS
5150 msq = lsq;
5151 }
ebfd146a 5152 }
ebfd146a 5153
272c6793
RS
5154 /* 4. Handle invariant-load. */
5155 if (inv_p && !bb_vinfo)
ebfd146a 5156 {
ab70d825 5157 gimple_stmt_iterator gsi2 = *gsi;
0d0293ac 5158 gcc_assert (!grouped_load);
ab70d825 5159 gsi_next (&gsi2);
418b7df3 5160 new_temp = vect_init_vector (stmt, scalar_dest,
ab70d825
RG
5161 vectype, &gsi2);
5162 new_stmt = SSA_NAME_DEF_STMT (new_temp);
272c6793 5163 }
ebfd146a 5164
272c6793
RS
5165 if (negative)
5166 {
aec7ae7d
JJ
5167 tree perm_mask = perm_mask_for_reverse (vectype);
5168 new_temp = permute_vec_elements (new_temp, new_temp,
5169 perm_mask, stmt, gsi);
ebfd146a
IR
5170 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5171 }
267d3070 5172
272c6793 5173 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5174 vect_transform_grouped_load (). */
5175 if (grouped_load || slp_perm)
272c6793 5176 VEC_quick_push (tree, dr_chain, new_temp);
267d3070 5177
272c6793
RS
5178 /* Store vector loads in the corresponding SLP_NODE. */
5179 if (slp && !slp_perm)
5180 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5181 new_stmt);
5182 }
ebfd146a
IR
5183 }
5184
5185 if (slp && !slp_perm)
5186 continue;
5187
5188 if (slp_perm)
5189 {
a70d6342 5190 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
5191 slp_node_instance, false))
5192 {
5193 VEC_free (tree, heap, dr_chain);
5194 return false;
5195 }
5196 }
5197 else
5198 {
0d0293ac 5199 if (grouped_load)
ebfd146a 5200 {
272c6793 5201 if (!load_lanes_p)
0d0293ac 5202 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5203 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5204 }
5205 else
5206 {
5207 if (j == 0)
5208 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5209 else
5210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5211 prev_stmt_info = vinfo_for_stmt (new_stmt);
5212 }
5213 }
5ce1ee7f
RS
5214 if (dr_chain)
5215 VEC_free (tree, heap, dr_chain);
ebfd146a
IR
5216 }
5217
ebfd146a
IR
5218 return true;
5219}
5220
5221/* Function vect_is_simple_cond.
b8698a0f 5222
ebfd146a
IR
5223 Input:
5224 LOOP - the loop that is being vectorized.
5225 COND - Condition that is checked for simple use.
5226
e9e1d143
RG
5227 Output:
5228 *COMP_VECTYPE - the vector type for the comparison.
5229
ebfd146a
IR
5230 Returns whether a COND can be vectorized. Checks whether
5231 condition operands are supportable using vec_is_simple_use. */
5232
87aab9b2 5233static bool
24ee1384
IR
5234vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5235 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5236{
5237 tree lhs, rhs;
5238 tree def;
5239 enum vect_def_type dt;
e9e1d143 5240 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5241
5242 if (!COMPARISON_CLASS_P (cond))
5243 return false;
5244
5245 lhs = TREE_OPERAND (cond, 0);
5246 rhs = TREE_OPERAND (cond, 1);
5247
5248 if (TREE_CODE (lhs) == SSA_NAME)
5249 {
5250 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5251 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5252 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5253 return false;
5254 }
5255 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5256 && TREE_CODE (lhs) != FIXED_CST)
5257 return false;
5258
5259 if (TREE_CODE (rhs) == SSA_NAME)
5260 {
5261 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5262 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5263 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5264 return false;
5265 }
f7e531cf 5266 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5267 && TREE_CODE (rhs) != FIXED_CST)
5268 return false;
5269
e9e1d143 5270 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5271 return true;
5272}
5273
5274/* vectorizable_condition.
5275
b8698a0f
L
5276 Check if STMT is conditional modify expression that can be vectorized.
5277 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5278 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5279 at GSI.
5280
5281 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5282 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5283 else caluse if it is 2).
ebfd146a
IR
5284
5285 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5286
4bbe8262 5287bool
ebfd146a 5288vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5289 gimple *vec_stmt, tree reduc_def, int reduc_index,
5290 slp_tree slp_node)
ebfd146a
IR
5291{
5292 tree scalar_dest = NULL_TREE;
5293 tree vec_dest = NULL_TREE;
ebfd146a
IR
5294 tree cond_expr, then_clause, else_clause;
5295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5296 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5297 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5298 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5299 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5300 tree vec_compare, vec_cond_expr;
5301 tree new_temp;
5302 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5303 tree def;
a855b1b1 5304 enum vect_def_type dt, dts[4];
ebfd146a 5305 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5306 int ncopies;
ebfd146a 5307 enum tree_code code;
a855b1b1 5308 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5309 int i, j;
5310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5311 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5312 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
b8698a0f 5313
f7e531cf
IR
5314 if (slp_node || PURE_SLP_STMT (stmt_info))
5315 ncopies = 1;
5316 else
5317 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5318
ebfd146a 5319 gcc_assert (ncopies >= 1);
a855b1b1 5320 if (reduc_index && ncopies > 1)
ebfd146a
IR
5321 return false; /* FORNOW */
5322
f7e531cf
IR
5323 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5324 return false;
5325
5326 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5327 return false;
5328
4bbe8262
IR
5329 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5330 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5331 && reduc_def))
ebfd146a
IR
5332 return false;
5333
ebfd146a 5334 /* FORNOW: not yet supported. */
b8698a0f 5335 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5336 {
73fbfcad 5337 if (dump_enabled_p ())
78c60e3d
SS
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "value used after loop.");
ebfd146a
IR
5340 return false;
5341 }
5342
5343 /* Is vectorizable conditional operation? */
5344 if (!is_gimple_assign (stmt))
5345 return false;
5346
5347 code = gimple_assign_rhs_code (stmt);
5348
5349 if (code != COND_EXPR)
5350 return false;
5351
4e71066d
RG
5352 cond_expr = gimple_assign_rhs1 (stmt);
5353 then_clause = gimple_assign_rhs2 (stmt);
5354 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5355
24ee1384
IR
5356 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5357 &comp_vectype)
e9e1d143 5358 || !comp_vectype)
ebfd146a
IR
5359 return false;
5360
5361 if (TREE_CODE (then_clause) == SSA_NAME)
5362 {
5363 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5364 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5365 &then_def_stmt, &def, &dt))
5366 return false;
5367 }
b8698a0f 5368 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5369 && TREE_CODE (then_clause) != REAL_CST
5370 && TREE_CODE (then_clause) != FIXED_CST)
5371 return false;
5372
5373 if (TREE_CODE (else_clause) == SSA_NAME)
5374 {
5375 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5376 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5377 &else_def_stmt, &def, &dt))
5378 return false;
5379 }
b8698a0f 5380 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5381 && TREE_CODE (else_clause) != REAL_CST
5382 && TREE_CODE (else_clause) != FIXED_CST)
5383 return false;
5384
b8698a0f 5385 if (!vec_stmt)
ebfd146a
IR
5386 {
5387 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5388 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5389 }
5390
f7e531cf
IR
5391 /* Transform. */
5392
5393 if (!slp_node)
5394 {
5395 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5396 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5397 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5398 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5399 }
ebfd146a
IR
5400
5401 /* Handle def. */
5402 scalar_dest = gimple_assign_lhs (stmt);
5403 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5404
5405 /* Handle cond expr. */
a855b1b1
MM
5406 for (j = 0; j < ncopies; j++)
5407 {
f7e531cf 5408 gimple new_stmt = NULL;
a855b1b1
MM
5409 if (j == 0)
5410 {
f7e531cf
IR
5411 if (slp_node)
5412 {
5413 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5414 VEC (slp_void_p, heap) *vec_defs;
5415
5416 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5417 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5418 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5419 VEC_safe_push (tree, heap, ops, then_clause);
5420 VEC_safe_push (tree, heap, ops, else_clause);
5421 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5422 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5423 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5424 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5425 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5426
5427 VEC_free (tree, heap, ops);
5428 VEC_free (slp_void_p, heap, vec_defs);
5429 }
5430 else
5431 {
5432 gimple gtemp;
5433 vec_cond_lhs =
a855b1b1
MM
5434 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5435 stmt, NULL);
24ee1384
IR
5436 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5437 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5438
5439 vec_cond_rhs =
5440 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5441 stmt, NULL);
24ee1384
IR
5442 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5443 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5444 if (reduc_index == 1)
5445 vec_then_clause = reduc_def;
5446 else
5447 {
5448 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5449 stmt, NULL);
24ee1384 5450 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5451 NULL, &gtemp, &def, &dts[2]);
5452 }
5453 if (reduc_index == 2)
5454 vec_else_clause = reduc_def;
5455 else
5456 {
5457 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5458 stmt, NULL);
24ee1384 5459 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5460 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5461 }
a855b1b1
MM
5462 }
5463 }
5464 else
5465 {
f7e531cf
IR
5466 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5467 VEC_pop (tree, vec_oprnds0));
5468 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5469 VEC_pop (tree, vec_oprnds1));
a855b1b1 5470 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
f7e531cf 5471 VEC_pop (tree, vec_oprnds2));
a855b1b1 5472 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
f7e531cf
IR
5473 VEC_pop (tree, vec_oprnds3));
5474 }
5475
5476 if (!slp_node)
5477 {
5478 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5479 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5480 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5481 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
a855b1b1
MM
5482 }
5483
9dc3f7de 5484 /* Arguments are ready. Create the new vector stmt. */
f7e531cf
IR
5485 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5486 {
5487 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5488 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5489 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
a855b1b1 5490
f7e531cf
IR
5491 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5492 vec_cond_lhs, vec_cond_rhs);
5493 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5494 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5495
f7e531cf
IR
5496 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5497 new_temp = make_ssa_name (vec_dest, new_stmt);
5498 gimple_assign_set_lhs (new_stmt, new_temp);
5499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5500 if (slp_node)
5501 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5502 }
5503
5504 if (slp_node)
5505 continue;
5506
5507 if (j == 0)
5508 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5509 else
5510 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5511
5512 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5513 }
b8698a0f 5514
f7e531cf
IR
5515 VEC_free (tree, heap, vec_oprnds0);
5516 VEC_free (tree, heap, vec_oprnds1);
5517 VEC_free (tree, heap, vec_oprnds2);
5518 VEC_free (tree, heap, vec_oprnds3);
5519
ebfd146a
IR
5520 return true;
5521}
5522
5523
8644a673 5524/* Make sure the statement is vectorizable. */
ebfd146a
IR
5525
5526bool
a70d6342 5527vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5528{
8644a673 5529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5531 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5532 bool ok;
a70d6342 5533 tree scalar_type, vectype;
363477c0
JJ
5534 gimple pattern_stmt;
5535 gimple_seq pattern_def_seq;
ebfd146a 5536
73fbfcad 5537 if (dump_enabled_p ())
ebfd146a 5538 {
78c60e3d
SS
5539 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5540 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 5541 }
ebfd146a 5542
1825a1f3 5543 if (gimple_has_volatile_ops (stmt))
b8698a0f 5544 {
73fbfcad 5545 if (dump_enabled_p ())
78c60e3d
SS
5546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5547 "not vectorized: stmt has volatile operands");
1825a1f3
IR
5548
5549 return false;
5550 }
b8698a0f
L
5551
5552 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5553 to include:
5554 - the COND_EXPR which is the loop exit condition
5555 - any LABEL_EXPRs in the loop
b8698a0f 5556 - computations that are used only for array indexing or loop control.
8644a673 5557 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5558 instance, therefore, all the statements are relevant.
ebfd146a 5559
d092494c 5560 Pattern statement needs to be analyzed instead of the original statement
83197f37 5561 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5562 statements. In basic blocks we are called from some SLP instance
5563 traversal, don't analyze pattern stmts instead, the pattern stmts
5564 already will be part of SLP instance. */
83197f37
IR
5565
5566 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5567 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5568 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5569 {
9d5e7640 5570 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5571 && pattern_stmt
9d5e7640
IR
5572 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5573 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5574 {
83197f37 5575 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5576 stmt = pattern_stmt;
5577 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5578 if (dump_enabled_p ())
9d5e7640 5579 {
78c60e3d
SS
5580 dump_printf_loc (MSG_NOTE, vect_location,
5581 "==> examining pattern statement: ");
5582 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
5583 }
5584 }
5585 else
5586 {
73fbfcad 5587 if (dump_enabled_p ())
78c60e3d 5588 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
ebfd146a 5589
9d5e7640
IR
5590 return true;
5591 }
8644a673 5592 }
83197f37 5593 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5594 && node == NULL
83197f37
IR
5595 && pattern_stmt
5596 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5597 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5598 {
5599 /* Analyze PATTERN_STMT too. */
73fbfcad 5600 if (dump_enabled_p ())
83197f37 5601 {
78c60e3d
SS
5602 dump_printf_loc (MSG_NOTE, vect_location,
5603 "==> examining pattern statement: ");
5604 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
5605 }
5606
5607 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5608 return false;
5609 }
ebfd146a 5610
1107f3ae 5611 if (is_pattern_stmt_p (stmt_info)
079c527f 5612 && node == NULL
363477c0 5613 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5614 {
363477c0 5615 gimple_stmt_iterator si;
1107f3ae 5616
363477c0
JJ
5617 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5618 {
5619 gimple pattern_def_stmt = gsi_stmt (si);
5620 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5621 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5622 {
5623 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5624 if (dump_enabled_p ())
363477c0 5625 {
78c60e3d
SS
5626 dump_printf_loc (MSG_NOTE, vect_location,
5627 "==> examining pattern def statement: ");
5628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 5629 }
1107f3ae 5630
363477c0
JJ
5631 if (!vect_analyze_stmt (pattern_def_stmt,
5632 need_to_vectorize, node))
5633 return false;
5634 }
5635 }
5636 }
1107f3ae 5637
8644a673
IR
5638 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5639 {
5640 case vect_internal_def:
5641 break;
ebfd146a 5642
8644a673 5643 case vect_reduction_def:
7c5222ff 5644 case vect_nested_cycle:
a70d6342 5645 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5646 || relevance == vect_used_in_outer_by_reduction
a70d6342 5647 || relevance == vect_unused_in_scope));
8644a673
IR
5648 break;
5649
5650 case vect_induction_def:
5651 case vect_constant_def:
5652 case vect_external_def:
5653 case vect_unknown_def_type:
5654 default:
5655 gcc_unreachable ();
5656 }
ebfd146a 5657
a70d6342
IR
5658 if (bb_vinfo)
5659 {
5660 gcc_assert (PURE_SLP_STMT (stmt_info));
5661
b690cc0f 5662 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5663 if (dump_enabled_p ())
a70d6342 5664 {
78c60e3d
SS
5665 dump_printf_loc (MSG_NOTE, vect_location,
5666 "get vectype for scalar type: ");
5667 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
a70d6342
IR
5668 }
5669
5670 vectype = get_vectype_for_scalar_type (scalar_type);
5671 if (!vectype)
5672 {
73fbfcad 5673 if (dump_enabled_p ())
a70d6342 5674 {
78c60e3d
SS
5675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5676 "not SLPed: unsupported data-type ");
5677 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5678 scalar_type);
a70d6342
IR
5679 }
5680 return false;
5681 }
5682
73fbfcad 5683 if (dump_enabled_p ())
a70d6342 5684 {
78c60e3d
SS
5685 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5686 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
a70d6342
IR
5687 }
5688
5689 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5690 }
5691
8644a673 5692 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5693 {
8644a673
IR
5694 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5695 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5696 *need_to_vectorize = true;
ebfd146a
IR
5697 }
5698
8644a673 5699 ok = true;
b8698a0f 5700 if (!bb_vinfo
a70d6342
IR
5701 && (STMT_VINFO_RELEVANT_P (stmt_info)
5702 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5703 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5704 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5705 || vectorizable_operation (stmt, NULL, NULL, NULL)
5706 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5707 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5708 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5709 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5710 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5711 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5712 else
5713 {
5714 if (bb_vinfo)
4a00c761
JJ
5715 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5716 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5717 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5718 || vectorizable_assignment (stmt, NULL, NULL, node)
5719 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5720 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5721 || vectorizable_store (stmt, NULL, NULL, node)
5722 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5723 }
8644a673
IR
5724
5725 if (!ok)
ebfd146a 5726 {
73fbfcad 5727 if (dump_enabled_p ())
8644a673 5728 {
78c60e3d
SS
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "not vectorized: relevant stmt not ");
5731 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5733 }
b8698a0f 5734
ebfd146a
IR
5735 return false;
5736 }
5737
a70d6342
IR
5738 if (bb_vinfo)
5739 return true;
5740
8644a673
IR
5741 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5742 need extra handling, except for vectorizable reductions. */
5743 if (STMT_VINFO_LIVE_P (stmt_info)
5744 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5745 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5746
8644a673 5747 if (!ok)
ebfd146a 5748 {
73fbfcad 5749 if (dump_enabled_p ())
8644a673 5750 {
78c60e3d
SS
5751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5752 "not vectorized: live stmt not ");
5753 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5755 }
b8698a0f 5756
8644a673 5757 return false;
ebfd146a
IR
5758 }
5759
ebfd146a
IR
5760 return true;
5761}
5762
5763
5764/* Function vect_transform_stmt.
5765
5766 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5767
5768bool
5769vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5770 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5771 slp_instance slp_node_instance)
5772{
5773 bool is_store = false;
5774 gimple vec_stmt = NULL;
5775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5776 bool done;
ebfd146a
IR
5777
5778 switch (STMT_VINFO_TYPE (stmt_info))
5779 {
5780 case type_demotion_vec_info_type:
ebfd146a 5781 case type_promotion_vec_info_type:
ebfd146a
IR
5782 case type_conversion_vec_info_type:
5783 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5784 gcc_assert (done);
5785 break;
5786
5787 case induc_vec_info_type:
5788 gcc_assert (!slp_node);
5789 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5790 gcc_assert (done);
5791 break;
5792
9dc3f7de
IR
5793 case shift_vec_info_type:
5794 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5795 gcc_assert (done);
5796 break;
5797
ebfd146a
IR
5798 case op_vec_info_type:
5799 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5800 gcc_assert (done);
5801 break;
5802
5803 case assignment_vec_info_type:
5804 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5805 gcc_assert (done);
5806 break;
5807
5808 case load_vec_info_type:
b8698a0f 5809 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5810 slp_node_instance);
5811 gcc_assert (done);
5812 break;
5813
5814 case store_vec_info_type:
5815 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5816 gcc_assert (done);
0d0293ac 5817 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5818 {
5819 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5820 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5821 one are skipped, and there vec_stmt_info shouldn't be freed
5822 meanwhile. */
0d0293ac 5823 *grouped_store = true;
ebfd146a
IR
5824 if (STMT_VINFO_VEC_STMT (stmt_info))
5825 is_store = true;
5826 }
5827 else
5828 is_store = true;
5829 break;
5830
5831 case condition_vec_info_type:
f7e531cf 5832 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5833 gcc_assert (done);
5834 break;
5835
5836 case call_vec_info_type:
190c2236 5837 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5838 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5839 break;
5840
5841 case reduc_vec_info_type:
b5aeb3bb 5842 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5843 gcc_assert (done);
5844 break;
5845
5846 default:
5847 if (!STMT_VINFO_LIVE_P (stmt_info))
5848 {
73fbfcad 5849 if (dump_enabled_p ())
78c60e3d
SS
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5851 "stmt not supported.");
ebfd146a
IR
5852 gcc_unreachable ();
5853 }
5854 }
5855
5856 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5857 is being vectorized, but outside the immediately enclosing loop. */
5858 if (vec_stmt
a70d6342
IR
5859 && STMT_VINFO_LOOP_VINFO (stmt_info)
5860 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5861 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5862 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5863 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5864 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5865 vect_used_in_outer_by_reduction))
ebfd146a 5866 {
a70d6342
IR
5867 struct loop *innerloop = LOOP_VINFO_LOOP (
5868 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5869 imm_use_iterator imm_iter;
5870 use_operand_p use_p;
5871 tree scalar_dest;
5872 gimple exit_phi;
5873
73fbfcad 5874 if (dump_enabled_p ())
78c60e3d
SS
5875 dump_printf_loc (MSG_NOTE, vect_location,
5876 "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5877
5878 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5879 (to be used when vectorizing outer-loop stmts that use the DEF of
5880 STMT). */
5881 if (gimple_code (stmt) == GIMPLE_PHI)
5882 scalar_dest = PHI_RESULT (stmt);
5883 else
5884 scalar_dest = gimple_assign_lhs (stmt);
5885
5886 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5887 {
5888 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5889 {
5890 exit_phi = USE_STMT (use_p);
5891 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5892 }
5893 }
5894 }
5895
5896 /* Handle stmts whose DEF is used outside the loop-nest that is
5897 being vectorized. */
5898 if (STMT_VINFO_LIVE_P (stmt_info)
5899 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5900 {
5901 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5902 gcc_assert (done);
5903 }
5904
5905 if (vec_stmt)
83197f37 5906 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 5907
b8698a0f 5908 return is_store;
ebfd146a
IR
5909}
5910
5911
b8698a0f 5912/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
5913 stmt_vec_info. */
5914
5915void
5916vect_remove_stores (gimple first_stmt)
5917{
5918 gimple next = first_stmt;
5919 gimple tmp;
5920 gimple_stmt_iterator next_si;
5921
5922 while (next)
5923 {
78048b1c
JJ
5924 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5925
5926 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5927 if (is_pattern_stmt_p (stmt_info))
5928 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
5929 /* Free the attached stmt_vec_info and remove the stmt. */
5930 next_si = gsi_for_stmt (next);
3d3f2249 5931 unlink_stmt_vdef (next);
ebfd146a 5932 gsi_remove (&next_si, true);
3d3f2249 5933 release_defs (next);
ebfd146a
IR
5934 free_stmt_vec_info (next);
5935 next = tmp;
5936 }
5937}
5938
5939
5940/* Function new_stmt_vec_info.
5941
5942 Create and initialize a new stmt_vec_info struct for STMT. */
5943
5944stmt_vec_info
b8698a0f 5945new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 5946 bb_vec_info bb_vinfo)
ebfd146a
IR
5947{
5948 stmt_vec_info res;
5949 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5950
5951 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5952 STMT_VINFO_STMT (res) = stmt;
5953 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 5954 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 5955 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
5956 STMT_VINFO_LIVE_P (res) = false;
5957 STMT_VINFO_VECTYPE (res) = NULL;
5958 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 5959 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
5960 STMT_VINFO_IN_PATTERN_P (res) = false;
5961 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 5962 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
5963 STMT_VINFO_DATA_REF (res) = NULL;
5964
5965 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5966 STMT_VINFO_DR_OFFSET (res) = NULL;
5967 STMT_VINFO_DR_INIT (res) = NULL;
5968 STMT_VINFO_DR_STEP (res) = NULL;
5969 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5970
5971 if (gimple_code (stmt) == GIMPLE_PHI
5972 && is_loop_header_bb_p (gimple_bb (stmt)))
5973 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5974 else
8644a673
IR
5975 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5976
c7e62a26 5977 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
32e8bb8e 5978 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
5979 GROUP_FIRST_ELEMENT (res) = NULL;
5980 GROUP_NEXT_ELEMENT (res) = NULL;
5981 GROUP_SIZE (res) = 0;
5982 GROUP_STORE_COUNT (res) = 0;
5983 GROUP_GAP (res) = 0;
5984 GROUP_SAME_DR_STMT (res) = NULL;
5985 GROUP_READ_WRITE_DEPENDENCE (res) = false;
ebfd146a
IR
5986
5987 return res;
5988}
5989
5990
5991/* Create a hash table for stmt_vec_info. */
5992
5993void
5994init_stmt_vec_info_vec (void)
5995{
5996 gcc_assert (!stmt_vec_info_vec);
5997 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5998}
5999
6000
6001/* Free hash table for stmt_vec_info. */
6002
6003void
6004free_stmt_vec_info_vec (void)
6005{
6006 gcc_assert (stmt_vec_info_vec);
6007 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
6008}
6009
6010
6011/* Free stmt vectorization related info. */
6012
6013void
6014free_stmt_vec_info (gimple stmt)
6015{
6016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6017
6018 if (!stmt_info)
6019 return;
6020
78048b1c
JJ
6021 /* Check if this statement has a related "pattern stmt"
6022 (introduced by the vectorizer during the pattern recognition
6023 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6024 too. */
6025 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6026 {
6027 stmt_vec_info patt_info
6028 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6029 if (patt_info)
6030 {
363477c0
JJ
6031 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6032 if (seq)
6033 {
6034 gimple_stmt_iterator si;
6035 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6036 free_stmt_vec_info (gsi_stmt (si));
6037 }
78048b1c
JJ
6038 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6039 }
6040 }
6041
ebfd146a
IR
6042 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
6043 set_vinfo_for_stmt (stmt, NULL);
6044 free (stmt_info);
6045}
6046
6047
bb67d9c7 6048/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6049
bb67d9c7 6050 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6051 by the target. */
6052
bb67d9c7
RG
6053static tree
6054get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6055{
6056 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6057 enum machine_mode simd_mode;
2f816591 6058 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6059 int nunits;
6060 tree vectype;
6061
cc4b5170 6062 if (nbytes == 0)
ebfd146a
IR
6063 return NULL_TREE;
6064
48f2e373
RB
6065 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6066 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6067 return NULL_TREE;
6068
7b7b1813
RG
6069 /* For vector types of elements whose mode precision doesn't
6070 match their types precision we use a element type of mode
6071 precision. The vectorization routines will have to make sure
48f2e373
RB
6072 they support the proper result truncation/extension.
6073 We also make sure to build vector types with INTEGER_TYPE
6074 component type only. */
6d7971b8 6075 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6076 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6077 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6078 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6079 TYPE_UNSIGNED (scalar_type));
6d7971b8 6080
ccbf5bb4
RG
6081 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6082 When the component mode passes the above test simply use a type
6083 corresponding to that mode. The theory is that any use that
6084 would cause problems with this will disable vectorization anyway. */
dfc2e2ac
RB
6085 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6086 && !INTEGRAL_TYPE_P (scalar_type)
6087 && !POINTER_TYPE_P (scalar_type))
60b95d28
RB
6088 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6089
6090 /* We can't build a vector type of elements with alignment bigger than
6091 their size. */
dfc2e2ac 6092 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
ccbf5bb4
RG
6093 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6094
dfc2e2ac
RB
6095 /* If we felt back to using the mode fail if there was
6096 no scalar type for it. */
6097 if (scalar_type == NULL_TREE)
6098 return NULL_TREE;
6099
bb67d9c7
RG
6100 /* If no size was supplied use the mode the target prefers. Otherwise
6101 lookup a vector mode of the specified size. */
6102 if (size == 0)
6103 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6104 else
6105 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6106 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6107 if (nunits <= 1)
6108 return NULL_TREE;
ebfd146a
IR
6109
6110 vectype = build_vector_type (scalar_type, nunits);
73fbfcad 6111 if (dump_enabled_p ())
ebfd146a 6112 {
78c60e3d
SS
6113 dump_printf_loc (MSG_NOTE, vect_location,
6114 "get vectype with %d units of type ", nunits);
6115 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
ebfd146a
IR
6116 }
6117
6118 if (!vectype)
6119 return NULL_TREE;
6120
73fbfcad 6121 if (dump_enabled_p ())
ebfd146a 6122 {
78c60e3d
SS
6123 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6124 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
ebfd146a
IR
6125 }
6126
6127 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6128 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6129 {
73fbfcad 6130 if (dump_enabled_p ())
78c60e3d
SS
6131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6132 "mode not supported by target.");
ebfd146a
IR
6133 return NULL_TREE;
6134 }
6135
6136 return vectype;
6137}
6138
bb67d9c7
RG
6139unsigned int current_vector_size;
6140
6141/* Function get_vectype_for_scalar_type.
6142
6143 Returns the vector type corresponding to SCALAR_TYPE as supported
6144 by the target. */
6145
6146tree
6147get_vectype_for_scalar_type (tree scalar_type)
6148{
6149 tree vectype;
6150 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6151 current_vector_size);
6152 if (vectype
6153 && current_vector_size == 0)
6154 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6155 return vectype;
6156}
6157
b690cc0f
RG
6158/* Function get_same_sized_vectype
6159
6160 Returns a vector type corresponding to SCALAR_TYPE of size
6161 VECTOR_TYPE if supported by the target. */
6162
6163tree
bb67d9c7 6164get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6165{
bb67d9c7
RG
6166 return get_vectype_for_scalar_type_and_size
6167 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6168}
6169
ebfd146a
IR
6170/* Function vect_is_simple_use.
6171
6172 Input:
a70d6342
IR
6173 LOOP_VINFO - the vect info of the loop that is being vectorized.
6174 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6175 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6176 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6177
6178 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6179 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6180 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6181 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6182 is the case in reduction/induction computations).
6183 For basic blocks, supportable operands are constants and bb invariants.
6184 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6185
6186bool
24ee1384 6187vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6188 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6189 tree *def, enum vect_def_type *dt)
b8698a0f 6190{
ebfd146a
IR
6191 basic_block bb;
6192 stmt_vec_info stmt_vinfo;
a70d6342 6193 struct loop *loop = NULL;
b8698a0f 6194
a70d6342
IR
6195 if (loop_vinfo)
6196 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6197
6198 *def_stmt = NULL;
6199 *def = NULL_TREE;
b8698a0f 6200
73fbfcad 6201 if (dump_enabled_p ())
ebfd146a 6202 {
78c60e3d
SS
6203 dump_printf_loc (MSG_NOTE, vect_location,
6204 "vect_is_simple_use: operand ");
6205 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 6206 }
b8698a0f 6207
b758f602 6208 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6209 {
6210 *dt = vect_constant_def;
6211 return true;
6212 }
b8698a0f 6213
ebfd146a
IR
6214 if (is_gimple_min_invariant (operand))
6215 {
6216 *def = operand;
8644a673 6217 *dt = vect_external_def;
ebfd146a
IR
6218 return true;
6219 }
6220
6221 if (TREE_CODE (operand) == PAREN_EXPR)
6222 {
73fbfcad 6223 if (dump_enabled_p ())
78c60e3d 6224 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
ebfd146a
IR
6225 operand = TREE_OPERAND (operand, 0);
6226 }
b8698a0f 6227
ebfd146a
IR
6228 if (TREE_CODE (operand) != SSA_NAME)
6229 {
73fbfcad 6230 if (dump_enabled_p ())
78c60e3d
SS
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 "not ssa-name.");
ebfd146a
IR
6233 return false;
6234 }
b8698a0f 6235
ebfd146a
IR
6236 *def_stmt = SSA_NAME_DEF_STMT (operand);
6237 if (*def_stmt == NULL)
6238 {
73fbfcad 6239 if (dump_enabled_p ())
78c60e3d
SS
6240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6241 "no def_stmt.");
ebfd146a
IR
6242 return false;
6243 }
6244
73fbfcad 6245 if (dump_enabled_p ())
ebfd146a 6246 {
78c60e3d
SS
6247 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6248 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
6249 }
6250
8644a673 6251 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6252 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6253 if (gimple_nop_p (*def_stmt))
6254 {
6255 *def = operand;
8644a673 6256 *dt = vect_external_def;
ebfd146a
IR
6257 return true;
6258 }
6259
6260 bb = gimple_bb (*def_stmt);
a70d6342
IR
6261
6262 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6263 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6264 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6265 *dt = vect_external_def;
ebfd146a
IR
6266 else
6267 {
6268 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6269 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6270 }
6271
24ee1384
IR
6272 if (*dt == vect_unknown_def_type
6273 || (stmt
6274 && *dt == vect_double_reduction_def
6275 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6276 {
73fbfcad 6277 if (dump_enabled_p ())
78c60e3d
SS
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6279 "Unsupported pattern.");
ebfd146a
IR
6280 return false;
6281 }
6282
73fbfcad 6283 if (dump_enabled_p ())
78c60e3d 6284 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
ebfd146a
IR
6285
6286 switch (gimple_code (*def_stmt))
6287 {
6288 case GIMPLE_PHI:
6289 *def = gimple_phi_result (*def_stmt);
6290 break;
6291
6292 case GIMPLE_ASSIGN:
6293 *def = gimple_assign_lhs (*def_stmt);
6294 break;
6295
6296 case GIMPLE_CALL:
6297 *def = gimple_call_lhs (*def_stmt);
6298 if (*def != NULL)
6299 break;
6300 /* FALLTHRU */
6301 default:
73fbfcad 6302 if (dump_enabled_p ())
78c60e3d
SS
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 "unsupported defining stmt: ");
ebfd146a
IR
6305 return false;
6306 }
6307
6308 return true;
6309}
6310
b690cc0f
RG
6311/* Function vect_is_simple_use_1.
6312
6313 Same as vect_is_simple_use_1 but also determines the vector operand
6314 type of OPERAND and stores it to *VECTYPE. If the definition of
6315 OPERAND is vect_uninitialized_def, vect_constant_def or
6316 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6317 is responsible to compute the best suited vector type for the
6318 scalar operand. */
6319
6320bool
24ee1384 6321vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6322 bb_vec_info bb_vinfo, gimple *def_stmt,
6323 tree *def, enum vect_def_type *dt, tree *vectype)
6324{
24ee1384
IR
6325 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6326 def, dt))
b690cc0f
RG
6327 return false;
6328
6329 /* Now get a vector type if the def is internal, otherwise supply
6330 NULL_TREE and leave it up to the caller to figure out a proper
6331 type for the use stmt. */
6332 if (*dt == vect_internal_def
6333 || *dt == vect_induction_def
6334 || *dt == vect_reduction_def
6335 || *dt == vect_double_reduction_def
6336 || *dt == vect_nested_cycle)
6337 {
6338 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6339
6340 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6341 && !STMT_VINFO_RELEVANT (stmt_info)
6342 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6343 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6344
b690cc0f
RG
6345 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6346 gcc_assert (*vectype != NULL_TREE);
6347 }
6348 else if (*dt == vect_uninitialized_def
6349 || *dt == vect_constant_def
6350 || *dt == vect_external_def)
6351 *vectype = NULL_TREE;
6352 else
6353 gcc_unreachable ();
6354
6355 return true;
6356}
6357
ebfd146a
IR
6358
6359/* Function supportable_widening_operation
6360
b8698a0f
L
6361 Check whether an operation represented by the code CODE is a
6362 widening operation that is supported by the target platform in
b690cc0f
RG
6363 vector form (i.e., when operating on arguments of type VECTYPE_IN
6364 producing a result of type VECTYPE_OUT).
b8698a0f 6365
ebfd146a
IR
6366 Widening operations we currently support are NOP (CONVERT), FLOAT
6367 and WIDEN_MULT. This function checks if these operations are supported
6368 by the target platform either directly (via vector tree-codes), or via
6369 target builtins.
6370
6371 Output:
b8698a0f
L
6372 - CODE1 and CODE2 are codes of vector operations to be used when
6373 vectorizing the operation, if available.
ebfd146a
IR
6374 - MULTI_STEP_CVT determines the number of required intermediate steps in
6375 case of multi-step conversion (like char->short->int - in that case
6376 MULTI_STEP_CVT will be 1).
b8698a0f
L
6377 - INTERM_TYPES contains the intermediate type required to perform the
6378 widening operation (short in the above example). */
ebfd146a
IR
6379
6380bool
b690cc0f
RG
6381supportable_widening_operation (enum tree_code code, gimple stmt,
6382 tree vectype_out, tree vectype_in,
ebfd146a
IR
6383 enum tree_code *code1, enum tree_code *code2,
6384 int *multi_step_cvt,
6385 VEC (tree, heap) **interm_types)
6386{
6387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6388 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6389 struct loop *vect_loop = NULL;
ebfd146a 6390 enum machine_mode vec_mode;
81f40b79 6391 enum insn_code icode1, icode2;
ebfd146a 6392 optab optab1, optab2;
b690cc0f
RG
6393 tree vectype = vectype_in;
6394 tree wide_vectype = vectype_out;
ebfd146a 6395 enum tree_code c1, c2;
4a00c761
JJ
6396 int i;
6397 tree prev_type, intermediate_type;
6398 enum machine_mode intermediate_mode, prev_mode;
6399 optab optab3, optab4;
ebfd146a 6400
4a00c761 6401 *multi_step_cvt = 0;
4ef69dfc
IR
6402 if (loop_info)
6403 vect_loop = LOOP_VINFO_LOOP (loop_info);
6404
ebfd146a
IR
6405 switch (code)
6406 {
6407 case WIDEN_MULT_EXPR:
6ae6116f
RH
6408 /* The result of a vectorized widening operation usually requires
6409 two vectors (because the widened results do not fit into one vector).
6410 The generated vector results would normally be expected to be
6411 generated in the same order as in the original scalar computation,
6412 i.e. if 8 results are generated in each vector iteration, they are
6413 to be organized as follows:
6414 vect1: [res1,res2,res3,res4],
6415 vect2: [res5,res6,res7,res8].
6416
6417 However, in the special case that the result of the widening
6418 operation is used in a reduction computation only, the order doesn't
6419 matter (because when vectorizing a reduction we change the order of
6420 the computation). Some targets can take advantage of this and
6421 generate more efficient code. For example, targets like Altivec,
6422 that support widen_mult using a sequence of {mult_even,mult_odd}
6423 generate the following vectors:
6424 vect1: [res1,res3,res5,res7],
6425 vect2: [res2,res4,res6,res8].
6426
6427 When vectorizing outer-loops, we execute the inner-loop sequentially
6428 (each vectorized inner-loop iteration contributes to VF outer-loop
6429 iterations in parallel). We therefore don't allow to change the
6430 order of the computation in the inner-loop during outer-loop
6431 vectorization. */
6432 /* TODO: Another case in which order doesn't *really* matter is when we
6433 widen and then contract again, e.g. (short)((int)x * y >> 8).
6434 Normally, pack_trunc performs an even/odd permute, whereas the
6435 repack from an even/odd expansion would be an interleave, which
6436 would be significantly simpler for e.g. AVX2. */
6437 /* In any case, in order to avoid duplicating the code below, recurse
6438 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6439 are properly set up for the caller. If we fail, we'll continue with
6440 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6441 if (vect_loop
6442 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6443 && !nested_in_vect_loop_p (vect_loop, stmt)
6444 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6445 stmt, vectype_out, vectype_in,
a86ec597
RH
6446 code1, code2, multi_step_cvt,
6447 interm_types))
6ae6116f 6448 return true;
4a00c761
JJ
6449 c1 = VEC_WIDEN_MULT_LO_EXPR;
6450 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6451 break;
6452
6ae6116f
RH
6453 case VEC_WIDEN_MULT_EVEN_EXPR:
6454 /* Support the recursion induced just above. */
6455 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6456 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6457 break;
6458
36ba4aae 6459 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6460 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6461 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6462 break;
6463
ebfd146a 6464 CASE_CONVERT:
4a00c761
JJ
6465 c1 = VEC_UNPACK_LO_EXPR;
6466 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6467 break;
6468
6469 case FLOAT_EXPR:
4a00c761
JJ
6470 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6471 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6472 break;
6473
6474 case FIX_TRUNC_EXPR:
6475 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6476 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6477 computing the operation. */
6478 return false;
6479
6480 default:
6481 gcc_unreachable ();
6482 }
6483
6ae6116f 6484 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6485 {
6486 enum tree_code ctmp = c1;
6487 c1 = c2;
6488 c2 = ctmp;
6489 }
6490
ebfd146a
IR
6491 if (code == FIX_TRUNC_EXPR)
6492 {
6493 /* The signedness is determined from output operand. */
b690cc0f
RG
6494 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6495 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6496 }
6497 else
6498 {
6499 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6500 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6501 }
6502
6503 if (!optab1 || !optab2)
6504 return false;
6505
6506 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6507 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6508 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6509 return false;
6510
4a00c761
JJ
6511 *code1 = c1;
6512 *code2 = c2;
6513
6514 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6515 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6516 return true;
6517
b8698a0f 6518 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6519 types. */
ebfd146a 6520
4a00c761
JJ
6521 prev_type = vectype;
6522 prev_mode = vec_mode;
b8698a0f 6523
4a00c761
JJ
6524 if (!CONVERT_EXPR_CODE_P (code))
6525 return false;
b8698a0f 6526
4a00c761
JJ
6527 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6528 intermediate steps in promotion sequence. We try
6529 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6530 not. */
6531 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6532 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6533 {
6534 intermediate_mode = insn_data[icode1].operand[0].mode;
6535 intermediate_type
6536 = lang_hooks.types.type_for_mode (intermediate_mode,
6537 TYPE_UNSIGNED (prev_type));
6538 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6539 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6540
6541 if (!optab3 || !optab4
6542 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6543 || insn_data[icode1].operand[0].mode != intermediate_mode
6544 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6545 || insn_data[icode2].operand[0].mode != intermediate_mode
6546 || ((icode1 = optab_handler (optab3, intermediate_mode))
6547 == CODE_FOR_nothing)
6548 || ((icode2 = optab_handler (optab4, intermediate_mode))
6549 == CODE_FOR_nothing))
6550 break;
ebfd146a 6551
4a00c761
JJ
6552 VEC_quick_push (tree, *interm_types, intermediate_type);
6553 (*multi_step_cvt)++;
6554
6555 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6556 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6557 return true;
6558
6559 prev_type = intermediate_type;
6560 prev_mode = intermediate_mode;
ebfd146a
IR
6561 }
6562
4a00c761
JJ
6563 VEC_free (tree, heap, *interm_types);
6564 return false;
ebfd146a
IR
6565}
6566
6567
6568/* Function supportable_narrowing_operation
6569
b8698a0f
L
6570 Check whether an operation represented by the code CODE is a
6571 narrowing operation that is supported by the target platform in
b690cc0f
RG
6572 vector form (i.e., when operating on arguments of type VECTYPE_IN
6573 and producing a result of type VECTYPE_OUT).
b8698a0f 6574
ebfd146a 6575 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6576 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6577 the target platform directly via vector tree-codes.
6578
6579 Output:
b8698a0f
L
6580 - CODE1 is the code of a vector operation to be used when
6581 vectorizing the operation, if available.
ebfd146a
IR
6582 - MULTI_STEP_CVT determines the number of required intermediate steps in
6583 case of multi-step conversion (like int->short->char - in that case
6584 MULTI_STEP_CVT will be 1).
6585 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6586 narrowing operation (short in the above example). */
ebfd146a
IR
6587
6588bool
6589supportable_narrowing_operation (enum tree_code code,
b690cc0f 6590 tree vectype_out, tree vectype_in,
ebfd146a
IR
6591 enum tree_code *code1, int *multi_step_cvt,
6592 VEC (tree, heap) **interm_types)
6593{
6594 enum machine_mode vec_mode;
6595 enum insn_code icode1;
6596 optab optab1, interm_optab;
b690cc0f
RG
6597 tree vectype = vectype_in;
6598 tree narrow_vectype = vectype_out;
ebfd146a 6599 enum tree_code c1;
4a00c761
JJ
6600 tree intermediate_type;
6601 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6602 int i;
4a00c761 6603 bool uns;
ebfd146a 6604
4a00c761 6605 *multi_step_cvt = 0;
ebfd146a
IR
6606 switch (code)
6607 {
6608 CASE_CONVERT:
6609 c1 = VEC_PACK_TRUNC_EXPR;
6610 break;
6611
6612 case FIX_TRUNC_EXPR:
6613 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6614 break;
6615
6616 case FLOAT_EXPR:
6617 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6618 tree code and optabs used for computing the operation. */
6619 return false;
6620
6621 default:
6622 gcc_unreachable ();
6623 }
6624
6625 if (code == FIX_TRUNC_EXPR)
6626 /* The signedness is determined from output operand. */
b690cc0f 6627 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6628 else
6629 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6630
6631 if (!optab1)
6632 return false;
6633
6634 vec_mode = TYPE_MODE (vectype);
947131ba 6635 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6636 return false;
6637
4a00c761
JJ
6638 *code1 = c1;
6639
6640 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6641 return true;
6642
ebfd146a
IR
6643 /* Check if it's a multi-step conversion that can be done using intermediate
6644 types. */
4a00c761
JJ
6645 prev_mode = vec_mode;
6646 if (code == FIX_TRUNC_EXPR)
6647 uns = TYPE_UNSIGNED (vectype_out);
6648 else
6649 uns = TYPE_UNSIGNED (vectype);
6650
6651 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6652 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6653 costly than signed. */
6654 if (code == FIX_TRUNC_EXPR && uns)
6655 {
6656 enum insn_code icode2;
6657
6658 intermediate_type
6659 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6660 interm_optab
6661 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6662 if (interm_optab != unknown_optab
4a00c761
JJ
6663 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6664 && insn_data[icode1].operand[0].mode
6665 == insn_data[icode2].operand[0].mode)
6666 {
6667 uns = false;
6668 optab1 = interm_optab;
6669 icode1 = icode2;
6670 }
6671 }
ebfd146a 6672
4a00c761
JJ
6673 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6674 intermediate steps in promotion sequence. We try
6675 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6676 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6677 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6678 {
6679 intermediate_mode = insn_data[icode1].operand[0].mode;
6680 intermediate_type
6681 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6682 interm_optab
6683 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6684 optab_default);
6685 if (!interm_optab
6686 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6687 || insn_data[icode1].operand[0].mode != intermediate_mode
6688 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6689 == CODE_FOR_nothing))
6690 break;
6691
6692 VEC_quick_push (tree, *interm_types, intermediate_type);
6693 (*multi_step_cvt)++;
6694
6695 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6696 return true;
6697
6698 prev_mode = intermediate_mode;
6699 optab1 = interm_optab;
ebfd146a
IR
6700 }
6701
4a00c761
JJ
6702 VEC_free (tree, heap, *interm_types);
6703 return false;
ebfd146a 6704}