]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
expr.h (extract_bit_field): Remove packedp parameter.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
7a300452 32#include "tree-ssa.h"
ebfd146a 33#include "cfgloop.h"
ebfd146a 34#include "expr.h"
7ee2468b 35#include "recog.h" /* FIXME: for insn_data */
ebfd146a 36#include "optabs.h"
718f9c0f 37#include "diagnostic-core.h"
ebfd146a 38#include "tree-vectorizer.h"
7ee2468b 39#include "dumpfile.h"
ebfd146a 40
7ee2468b
SB
41/* For lang_hooks.types.type_for_mode. */
42#include "langhooks.h"
ebfd146a 43
c3e7ee41
BS
44/* Return the vectorized type for the given statement. */
45
46tree
47stmt_vectype (struct _stmt_vec_info *stmt_info)
48{
49 return STMT_VINFO_VECTYPE (stmt_info);
50}
51
52/* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54bool
55stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56{
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68}
69
70/* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74unsigned
92345349 75record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 77 int misalign, enum vect_cost_model_location where)
c3e7ee41 78{
92345349 79 if (body_cost_vec)
c3e7ee41 80 {
92345349
BS
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
c3e7ee41 85 return (unsigned)
92345349 86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
92345349
BS
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
c3e7ee41
BS
102 }
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
120read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
146write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
150 gimple new_stmt;
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
9771b263 183vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
ebfd146a
IR
186{
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 190 gimple pattern_stmt;
ebfd146a 191
73fbfcad 192 if (dump_enabled_p ())
78c60e3d 193 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 194 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 195
83197f37
IR
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
ebfd146a
IR
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 {
83197f37
IR
202 bool found = false;
203 if (!used_in_pattern)
204 {
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
13c931c9
JJ
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 211
83197f37
IR
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
ebfd146a 216
83197f37
IR
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
ab0ef706
JJ
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 {
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
226
13c931c9
JJ
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
229
ab0ef706
JJ
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 {
233 found = true;
234 break;
235 }
236 }
83197f37
IR
237 }
238
239 if (!found)
240 {
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
245
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
73fbfcad 248 if (dump_enabled_p ())
78c60e3d
SS
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
e645e942 251 " relevant/live.\n");
83197f37
IR
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
257 }
ebfd146a
IR
258 }
259
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266 {
73fbfcad 267 if (dump_enabled_p ())
78c60e3d 268 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 269 "already marked relevant/live.\n");
ebfd146a
IR
270 return;
271 }
272
9771b263 273 worklist->safe_push (stmt);
ebfd146a
IR
274}
275
276
277/* Function vect_stmt_relevant_p.
278
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
281
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
286
287 CHECKME: what other side effects would the vectorizer allow? */
288
289static bool
290vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
292{
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
298
8644a673 299 *relevant = vect_unused_in_scope;
ebfd146a
IR
300 *live_p = false;
301
302 /* cond stmt other than loop exit cond. */
b8698a0f
L
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
8644a673 306 *relevant = vect_used_in_scope;
ebfd146a
IR
307
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 310 if (gimple_vdef (stmt))
ebfd146a 311 {
73fbfcad 312 if (dump_enabled_p ())
78c60e3d 313 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 314 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316 }
317
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320 {
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 {
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
325 {
73fbfcad 326 if (dump_enabled_p ())
78c60e3d 327 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 328 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 329
3157b0c2
AO
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
332
ebfd146a
IR
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
337
338 *live_p = true;
339 }
340 }
341 }
342
343 return (*live_p || *relevant);
344}
345
346
b8698a0f 347/* Function exist_non_indexing_operands_for_use_p
ebfd146a 348
ff802fa1 349 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
350 used in STMT for anything other than indexing an array. */
351
352static bool
353exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354{
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 357
ff802fa1 358 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
59a05b0c 363
ebfd146a
IR
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
369
370 'var' in the second case corresponds to a def, not a use,
b8698a0f 371 so USE cannot correspond to any operands that are not used
ebfd146a
IR
372 for array indexing.
373
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
ebfd146a
IR
376
377 if (!gimple_assign_copy_p (stmt))
378 return false;
59a05b0c
EB
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
ebfd146a 381 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
384
385 if (operand == use)
386 return true;
387
388 return false;
389}
390
391
b8698a0f 392/*
ebfd146a
IR
393 Function process_use.
394
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 398 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
ebfd146a
IR
402
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 410 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 411 of the respective DEF_STMT is left unchanged.
b8698a0f
L
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
416
417 Return true if everything is as expected. Return false otherwise. */
418
419static bool
b8698a0f 420process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 421 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 422 bool force)
ebfd146a
IR
423{
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
431
b8698a0f 432 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 433 that are used for address computation are not considered relevant. */
aec7ae7d 434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
435 return true;
436
24ee1384 437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 438 {
73fbfcad 439 if (dump_enabled_p ())
78c60e3d 440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 441 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
442 return false;
443 }
444
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
447
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
450 {
73fbfcad 451 if (dump_enabled_p ())
e645e942 452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
453 return true;
454 }
455
b8698a0f
L
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
468 {
73fbfcad 469 if (dump_enabled_p ())
78c60e3d 470 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 471 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
477 return true;
478 }
479
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488 {
73fbfcad 489 if (dump_enabled_p ())
78c60e3d 490 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 491 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 492
ebfd146a
IR
493 switch (relevant)
494 {
8644a673 495 case vect_unused_in_scope:
7c5222ff
IR
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 498 break;
7c5222ff 499
ebfd146a 500 case vect_used_in_outer_by_reduction:
7c5222ff 501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
502 relevant = vect_used_by_reduction;
503 break;
7c5222ff 504
ebfd146a 505 case vect_used_in_outer:
7c5222ff 506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 507 relevant = vect_used_in_scope;
ebfd146a 508 break;
7c5222ff 509
8644a673 510 case vect_used_in_scope:
ebfd146a
IR
511 break;
512
513 default:
514 gcc_unreachable ();
b8698a0f 515 }
ebfd146a
IR
516 }
517
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
520 ...
521 inner-loop:
522 d = def_stmt
06066f92 523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
b8698a0f 534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
537 break;
538
ebfd146a
IR
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
542
8644a673 543 case vect_used_in_scope:
ebfd146a
IR
544 relevant = vect_used_in_outer;
545 break;
546
547 default:
548 gcc_unreachable ();
549 }
550 }
551
83197f37
IR
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
554 return true;
555}
556
557
558/* Function vect_mark_stmts_to_be_vectorized.
559
560 Not all stmts in the loop need to be vectorized. For example:
561
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
566
567 3. j = j + 1
568
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
571
572 This pass detects such stmts. */
573
574bool
575vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576{
9771b263 577 vec<gimple> worklist;
ebfd146a
IR
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
06066f92
IR
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
ebfd146a 590
73fbfcad 591 if (dump_enabled_p ())
78c60e3d 592 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 593 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 594
9771b263 595 worklist.create (64);
ebfd146a
IR
596
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
599 {
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 602 {
ebfd146a 603 phi = gsi_stmt (si);
73fbfcad 604 if (dump_enabled_p ())
ebfd146a 605 {
78c60e3d
SS
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 608 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
609 }
610
611 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 612 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
613 }
614 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
615 {
616 stmt = gsi_stmt (si);
73fbfcad 617 if (dump_enabled_p ())
ebfd146a 618 {
78c60e3d
SS
619 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 621 dump_printf (MSG_NOTE, "\n");
b8698a0f 622 }
ebfd146a
IR
623
624 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 625 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
626 }
627 }
628
629 /* 2. Process_worklist */
9771b263 630 while (worklist.length () > 0)
ebfd146a
IR
631 {
632 use_operand_p use_p;
633 ssa_op_iter iter;
634
9771b263 635 stmt = worklist.pop ();
73fbfcad 636 if (dump_enabled_p ())
ebfd146a 637 {
78c60e3d
SS
638 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
639 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 640 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
641 }
642
b8698a0f
L
643 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
644 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
645 liveness and relevance properties of STMT. */
646 stmt_vinfo = vinfo_for_stmt (stmt);
647 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
648 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
649
650 /* Generally, the liveness and relevance properties of STMT are
651 propagated as is to the DEF_STMTs of its USEs:
652 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
653 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
654
655 One exception is when STMT has been identified as defining a reduction
656 variable; in this case we set the liveness/relevance as follows:
657 live_p = false
658 relevant = vect_used_by_reduction
659 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 660 those that are used by a reduction computation, and those that are
ff802fa1 661 (also) used by a regular computation. This allows us later on to
b8698a0f 662 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 663 order of the results that they produce does not have to be kept. */
ebfd146a 664
06066f92
IR
665 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
666 tmp_relevant = relevant;
667 switch (def_type)
ebfd146a 668 {
06066f92
IR
669 case vect_reduction_def:
670 switch (tmp_relevant)
671 {
672 case vect_unused_in_scope:
673 relevant = vect_used_by_reduction;
674 break;
675
676 case vect_used_by_reduction:
677 if (gimple_code (stmt) == GIMPLE_PHI)
678 break;
679 /* fall through */
680
681 default:
73fbfcad 682 if (dump_enabled_p ())
78c60e3d 683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 684 "unsupported use of reduction.\n");
9771b263 685 worklist.release ();
06066f92
IR
686 return false;
687 }
688
b8698a0f 689 live_p = false;
06066f92 690 break;
b8698a0f 691
06066f92
IR
692 case vect_nested_cycle:
693 if (tmp_relevant != vect_unused_in_scope
694 && tmp_relevant != vect_used_in_outer_by_reduction
695 && tmp_relevant != vect_used_in_outer)
696 {
73fbfcad 697 if (dump_enabled_p ())
78c60e3d 698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 699 "unsupported use of nested cycle.\n");
7c5222ff 700
9771b263 701 worklist.release ();
06066f92
IR
702 return false;
703 }
7c5222ff 704
b8698a0f
L
705 live_p = false;
706 break;
707
06066f92
IR
708 case vect_double_reduction_def:
709 if (tmp_relevant != vect_unused_in_scope
710 && tmp_relevant != vect_used_by_reduction)
711 {
73fbfcad 712 if (dump_enabled_p ())
78c60e3d 713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 714 "unsupported use of double reduction.\n");
7c5222ff 715
9771b263 716 worklist.release ();
7c5222ff 717 return false;
06066f92
IR
718 }
719
720 live_p = false;
b8698a0f 721 break;
7c5222ff 722
06066f92
IR
723 default:
724 break;
7c5222ff 725 }
b8698a0f 726
aec7ae7d 727 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
728 {
729 /* Pattern statements are not inserted into the code, so
730 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
731 have to scan the RHS or function arguments instead. */
732 if (is_gimple_assign (stmt))
733 {
69d2aade
JJ
734 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
735 tree op = gimple_assign_rhs1 (stmt);
736
737 i = 1;
738 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
739 {
740 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 741 live_p, relevant, &worklist, false)
69d2aade 742 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 743 live_p, relevant, &worklist, false))
69d2aade 744 {
9771b263 745 worklist.release ();
69d2aade
JJ
746 return false;
747 }
748 i = 2;
749 }
750 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 751 {
69d2aade 752 op = gimple_op (stmt, i);
9d5e7640 753 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 754 &worklist, false))
9d5e7640 755 {
9771b263 756 worklist.release ();
9d5e7640
IR
757 return false;
758 }
759 }
760 }
761 else if (is_gimple_call (stmt))
762 {
763 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 {
765 tree arg = gimple_call_arg (stmt, i);
766 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 767 &worklist, false))
9d5e7640 768 {
9771b263 769 worklist.release ();
9d5e7640
IR
770 return false;
771 }
772 }
773 }
774 }
775 else
776 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
777 {
778 tree op = USE_FROM_PTR (use_p);
779 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 780 &worklist, false))
9d5e7640 781 {
9771b263 782 worklist.release ();
9d5e7640
IR
783 return false;
784 }
785 }
aec7ae7d
JJ
786
787 if (STMT_VINFO_GATHER_P (stmt_vinfo))
788 {
789 tree off;
790 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
791 gcc_assert (decl);
792 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
793 &worklist, true))
794 {
9771b263 795 worklist.release ();
aec7ae7d
JJ
796 return false;
797 }
798 }
ebfd146a
IR
799 } /* while worklist */
800
9771b263 801 worklist.release ();
ebfd146a
IR
802 return true;
803}
804
805
b8698a0f 806/* Function vect_model_simple_cost.
ebfd146a 807
b8698a0f 808 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
809 single op. Right now, this does not account for multiple insns that could
810 be generated for the single vector op. We will handle that shortly. */
811
812void
b8698a0f 813vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
814 enum vect_def_type *dt,
815 stmt_vector_for_cost *prologue_cost_vec,
816 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
817{
818 int i;
92345349 819 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
820
821 /* The SLP costs were already calculated during SLP tree build. */
822 if (PURE_SLP_STMT (stmt_info))
823 return;
824
ebfd146a
IR
825 /* FORNOW: Assuming maximum 2 args per stmts. */
826 for (i = 0; i < 2; i++)
92345349
BS
827 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
828 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
829 stmt_info, 0, vect_prologue);
c3e7ee41
BS
830
831 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
832 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
833 stmt_info, 0, vect_body);
c3e7ee41 834
73fbfcad 835 if (dump_enabled_p ())
78c60e3d
SS
836 dump_printf_loc (MSG_NOTE, vect_location,
837 "vect_model_simple_cost: inside_cost = %d, "
e645e942 838 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
839}
840
841
8bd37302
BS
842/* Model cost for type demotion and promotion operations. PWR is normally
843 zero for single-step promotions and demotions. It will be one if
844 two-step promotion/demotion is required, and so on. Each additional
845 step doubles the number of instructions required. */
846
847static void
848vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
849 enum vect_def_type *dt, int pwr)
850{
851 int i, tmp;
92345349 852 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
853 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
854 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
855 void *target_cost_data;
8bd37302
BS
856
857 /* The SLP costs were already calculated during SLP tree build. */
858 if (PURE_SLP_STMT (stmt_info))
859 return;
860
c3e7ee41
BS
861 if (loop_vinfo)
862 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
863 else
864 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
865
8bd37302
BS
866 for (i = 0; i < pwr + 1; i++)
867 {
868 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
869 (i + 1) : i;
c3e7ee41 870 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
871 vec_promote_demote, stmt_info, 0,
872 vect_body);
8bd37302
BS
873 }
874
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i = 0; i < 2; i++)
92345349
BS
877 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
878 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
879 stmt_info, 0, vect_prologue);
8bd37302 880
73fbfcad 881 if (dump_enabled_p ())
78c60e3d
SS
882 dump_printf_loc (MSG_NOTE, vect_location,
883 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 884 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
885}
886
0d0293ac 887/* Function vect_cost_group_size
b8698a0f 888
0d0293ac 889 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
890 load or store of a group, else return 1. This ensures that group size is
891 only returned once per group. */
892
893static int
0d0293ac 894vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 895{
e14c1050 896 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
897
898 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 899 return GROUP_SIZE (stmt_info);
ebfd146a
IR
900
901 return 1;
902}
903
904
905/* Function vect_model_store_cost
906
0d0293ac
MM
907 Models cost for stores. In the case of grouped accesses, one access
908 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
909
910void
b8698a0f 911vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 912 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
913 slp_tree slp_node,
914 stmt_vector_for_cost *prologue_cost_vec,
915 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
916{
917 int group_size;
92345349 918 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
919 struct data_reference *first_dr;
920 gimple first_stmt;
ebfd146a
IR
921
922 /* The SLP costs were already calculated during SLP tree build. */
923 if (PURE_SLP_STMT (stmt_info))
924 return;
925
8644a673 926 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
927 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
928 stmt_info, 0, vect_prologue);
ebfd146a 929
0d0293ac
MM
930 /* Grouped access? */
931 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
932 {
933 if (slp_node)
934 {
9771b263 935 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
936 group_size = 1;
937 }
938 else
939 {
e14c1050 940 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 941 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
942 }
943
944 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
945 }
0d0293ac 946 /* Not a grouped access. */
ebfd146a 947 else
720f5239
IR
948 {
949 group_size = 1;
950 first_dr = STMT_VINFO_DATA_REF (stmt_info);
951 }
ebfd146a 952
272c6793 953 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 954 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
955 access is instead being provided by a permute-and-store operation,
956 include the cost of the permutes. */
957 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
958 {
959 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
960
961 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
962 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
963 stmt_info, 0, vect_body);
ebfd146a 964
73fbfcad 965 if (dump_enabled_p ())
78c60e3d 966 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 967 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 968 group_size);
ebfd146a
IR
969 }
970
971 /* Costs of the stores. */
92345349 972 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 973
73fbfcad 974 if (dump_enabled_p ())
78c60e3d
SS
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
e645e942 977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
978}
979
980
720f5239
IR
981/* Calculate cost of DR's memory access. */
982void
983vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 984 unsigned int *inside_cost,
92345349 985 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
986{
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
988 gimple stmt = DR_STMT (dr);
989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
990
991 switch (alignment_support_scheme)
992 {
993 case dr_aligned:
994 {
92345349
BS
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
720f5239 998
73fbfcad 999 if (dump_enabled_p ())
78c60e3d 1000 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1001 "vect_model_store_cost: aligned.\n");
720f5239
IR
1002 break;
1003 }
1004
1005 case dr_unaligned_supported:
1006 {
720f5239 1007 /* Here, we assign an additional cost for the unaligned store. */
92345349 1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1009 unaligned_store, stmt_info,
92345349 1010 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1011 if (dump_enabled_p ())
78c60e3d
SS
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
e645e942 1014 "hardware.\n");
720f5239
IR
1015 break;
1016 }
1017
38eec4c6
UW
1018 case dr_unaligned_unsupported:
1019 {
1020 *inside_cost = VECT_MAX_COST;
1021
73fbfcad 1022 if (dump_enabled_p ())
78c60e3d 1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1024 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1025 break;
1026 }
1027
720f5239
IR
1028 default:
1029 gcc_unreachable ();
1030 }
1031}
1032
1033
ebfd146a
IR
1034/* Function vect_model_load_cost
1035
0d0293ac
MM
1036 Models cost for loads. In the case of grouped accesses, the last access
1037 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1038 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1039 access scheme chosen. */
1040
1041void
92345349
BS
1042vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1043 bool load_lanes_p, slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1046{
1047 int group_size;
ebfd146a
IR
1048 gimple first_stmt;
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1050 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1051
1052 /* The SLP costs were already calculated during SLP tree build. */
1053 if (PURE_SLP_STMT (stmt_info))
1054 return;
1055
0d0293ac 1056 /* Grouped accesses? */
e14c1050 1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1058 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1059 {
0d0293ac 1060 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1061 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1062 }
0d0293ac 1063 /* Not a grouped access. */
ebfd146a
IR
1064 else
1065 {
1066 group_size = 1;
1067 first_dr = dr;
1068 }
1069
272c6793 1070 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1071 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1072 access is instead being provided by a load-and-permute operation,
1073 include the cost of the permutes. */
1074 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1075 {
1076 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1077 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1078 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1079 stmt_info, 0, vect_body);
ebfd146a 1080
73fbfcad 1081 if (dump_enabled_p ())
e645e942
TJ
1082 dump_printf_loc (MSG_NOTE, vect_location,
1083 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1084 group_size);
ebfd146a
IR
1085 }
1086
1087 /* The loads themselves. */
a82960aa
RG
1088 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1089 {
a21892ad
BS
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1092 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1094 scalar_load, stmt_info, 0, vect_body);
1095 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1096 stmt_info, 0, vect_body);
a82960aa
RG
1097 }
1098 else
1099 vect_get_load_cost (first_dr, ncopies,
1100 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1101 || group_size > 1 || slp_node),
92345349
BS
1102 &inside_cost, &prologue_cost,
1103 prologue_cost_vec, body_cost_vec, true);
720f5239 1104
73fbfcad 1105 if (dump_enabled_p ())
78c60e3d
SS
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
e645e942 1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1109}
1110
1111
1112/* Calculate cost of DR's memory access. */
1113void
1114vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1115 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
720f5239
IR
1120{
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1122 gimple stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1124
1125 switch (alignment_support_scheme)
ebfd146a
IR
1126 {
1127 case dr_aligned:
1128 {
92345349
BS
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
ebfd146a 1131
73fbfcad 1132 if (dump_enabled_p ())
78c60e3d 1133 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1134 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1135
1136 break;
1137 }
1138 case dr_unaligned_supported:
1139 {
720f5239 1140 /* Here, we assign an additional cost for the unaligned load. */
92345349 1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1142 unaligned_load, stmt_info,
92345349 1143 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1144
73fbfcad 1145 if (dump_enabled_p ())
78c60e3d
SS
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
e645e942 1148 "hardware.\n");
ebfd146a
IR
1149
1150 break;
1151 }
1152 case dr_explicit_realign:
1153 {
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1158
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
92345349 1161 prologue costs. */
ebfd146a 1162 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
ebfd146a 1165
73fbfcad 1166 if (dump_enabled_p ())
e645e942
TJ
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
8bd37302 1169
ebfd146a
IR
1170 break;
1171 }
1172 case dr_explicit_realign_optimized:
1173 {
73fbfcad 1174 if (dump_enabled_p ())
e645e942 1175 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1176 "vect_model_load_cost: unaligned software "
e645e942 1177 "pipelined.\n");
ebfd146a
IR
1178
1179 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1180 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1181 if this is an access in a group of loads, which provide grouped
ebfd146a 1182 access, then the above cost should only be considered for one
ff802fa1 1183 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1184 and a realignment op. */
1185
92345349 1186 if (add_realign_cost && record_prologue_costs)
ebfd146a 1187 {
92345349
BS
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
ebfd146a 1191 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
ebfd146a
IR
1195 }
1196
92345349
BS
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
8bd37302 1201
73fbfcad 1202 if (dump_enabled_p ())
78c60e3d 1203 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
8bd37302 1206
ebfd146a
IR
1207 break;
1208 }
1209
38eec4c6
UW
1210 case dr_unaligned_unsupported:
1211 {
1212 *inside_cost = VECT_MAX_COST;
1213
73fbfcad 1214 if (dump_enabled_p ())
78c60e3d 1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1216 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1217 break;
1218 }
1219
ebfd146a
IR
1220 default:
1221 gcc_unreachable ();
1222 }
ebfd146a
IR
1223}
1224
418b7df3
RG
1225/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1227
418b7df3
RG
1228static void
1229vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1230{
ebfd146a 1231 if (gsi)
418b7df3 1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1233 else
1234 {
418b7df3 1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1237
a70d6342
IR
1238 if (loop_vinfo)
1239 {
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1241 basic_block new_bb;
1242 edge pe;
a70d6342
IR
1243
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
b8698a0f 1246
a70d6342 1247 pe = loop_preheader_edge (loop);
418b7df3 1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1249 gcc_assert (!new_bb);
1250 }
1251 else
1252 {
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1256
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1259 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1261 }
ebfd146a
IR
1262 }
1263
73fbfcad 1264 if (dump_enabled_p ())
ebfd146a 1265 {
78c60e3d
SS
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1269 dump_printf (MSG_NOTE, "\n");
ebfd146a 1270 }
418b7df3
RG
1271}
1272
1273/* Function vect_init_vector.
ebfd146a 1274
5467ee52
RG
1275 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1276 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1277 vector type a vector with all elements equal to VAL is created first.
1278 Place the initialization at BSI if it is not NULL. Otherwise, place the
1279 initialization at the loop preheader.
418b7df3
RG
1280 Return the DEF of INIT_STMT.
1281 It will be used in the vectorization of STMT. */
1282
1283tree
5467ee52 1284vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1285{
1286 tree new_var;
1287 gimple init_stmt;
1288 tree vec_oprnd;
1289 tree new_temp;
1290
5467ee52
RG
1291 if (TREE_CODE (type) == VECTOR_TYPE
1292 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1293 {
5467ee52 1294 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1295 {
5467ee52
RG
1296 if (CONSTANT_CLASS_P (val))
1297 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1298 else
1299 {
83d5977e 1300 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1301 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1302 new_temp, val,
418b7df3 1303 NULL_TREE);
418b7df3 1304 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1305 val = new_temp;
418b7df3
RG
1306 }
1307 }
5467ee52 1308 val = build_vector_from_val (type, val);
418b7df3
RG
1309 }
1310
5467ee52 1311 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1312 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1313 new_temp = make_ssa_name (new_var, init_stmt);
1314 gimple_assign_set_lhs (init_stmt, new_temp);
1315 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1316 vec_oprnd = gimple_assign_lhs (init_stmt);
1317 return vec_oprnd;
1318}
1319
a70d6342 1320
ebfd146a
IR
1321/* Function vect_get_vec_def_for_operand.
1322
ff802fa1 1323 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1324 used in the vectorized stmt for STMT.
1325
1326 In the case that OP is an SSA_NAME which is defined in the loop, then
1327 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1328
1329 In case OP is an invariant or constant, a new stmt that creates a vector def
1330 needs to be introduced. */
1331
1332tree
1333vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1334{
1335 tree vec_oprnd;
1336 gimple vec_stmt;
1337 gimple def_stmt;
1338 stmt_vec_info def_stmt_info = NULL;
1339 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1340 unsigned int nunits;
ebfd146a 1341 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1342 tree def;
ebfd146a
IR
1343 enum vect_def_type dt;
1344 bool is_simple_use;
1345 tree vector_type;
1346
73fbfcad 1347 if (dump_enabled_p ())
ebfd146a 1348 {
78c60e3d
SS
1349 dump_printf_loc (MSG_NOTE, vect_location,
1350 "vect_get_vec_def_for_operand: ");
1351 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1352 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1353 }
1354
24ee1384
IR
1355 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1356 &def_stmt, &def, &dt);
ebfd146a 1357 gcc_assert (is_simple_use);
73fbfcad 1358 if (dump_enabled_p ())
ebfd146a 1359 {
78c60e3d 1360 int loc_printed = 0;
ebfd146a
IR
1361 if (def)
1362 {
78c60e3d
SS
1363 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1364 loc_printed = 1;
1365 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1366 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1367 }
1368 if (def_stmt)
1369 {
78c60e3d
SS
1370 if (loc_printed)
1371 dump_printf (MSG_NOTE, " def_stmt = ");
1372 else
1373 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1374 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1375 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1376 }
1377 }
1378
1379 switch (dt)
1380 {
1381 /* Case 1: operand is a constant. */
1382 case vect_constant_def:
1383 {
7569a6cc
RG
1384 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1385 gcc_assert (vector_type);
9dc3f7de 1386 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1387
b8698a0f 1388 if (scalar_def)
ebfd146a
IR
1389 *scalar_def = op;
1390
1391 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1392 if (dump_enabled_p ())
78c60e3d 1393 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1394 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1395
418b7df3 1396 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1397 }
1398
1399 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1400 case vect_external_def:
ebfd146a
IR
1401 {
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1403 gcc_assert (vector_type);
ebfd146a 1404
b8698a0f 1405 if (scalar_def)
ebfd146a
IR
1406 *scalar_def = def;
1407
1408 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1409 if (dump_enabled_p ())
e645e942 1410 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1411
418b7df3 1412 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1413 }
1414
1415 /* Case 3: operand is defined inside the loop. */
8644a673 1416 case vect_internal_def:
ebfd146a 1417 {
b8698a0f 1418 if (scalar_def)
ebfd146a
IR
1419 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1420
1421 /* Get the def from the vectorized stmt. */
1422 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1423
ebfd146a 1424 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1425 /* Get vectorized pattern statement. */
1426 if (!vec_stmt
1427 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1428 && !STMT_VINFO_RELEVANT (def_stmt_info))
1429 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1430 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1431 gcc_assert (vec_stmt);
1432 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1433 vec_oprnd = PHI_RESULT (vec_stmt);
1434 else if (is_gimple_call (vec_stmt))
1435 vec_oprnd = gimple_call_lhs (vec_stmt);
1436 else
1437 vec_oprnd = gimple_assign_lhs (vec_stmt);
1438 return vec_oprnd;
1439 }
1440
1441 /* Case 4: operand is defined by a loop header phi - reduction */
1442 case vect_reduction_def:
06066f92 1443 case vect_double_reduction_def:
7c5222ff 1444 case vect_nested_cycle:
ebfd146a
IR
1445 {
1446 struct loop *loop;
1447
1448 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1449 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1450
1451 /* Get the def before the loop */
1452 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1453 return get_initial_def_for_reduction (stmt, op, scalar_def);
1454 }
1455
1456 /* Case 5: operand is defined by loop-header phi - induction. */
1457 case vect_induction_def:
1458 {
1459 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1460
1461 /* Get the def from the vectorized stmt. */
1462 def_stmt_info = vinfo_for_stmt (def_stmt);
1463 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1464 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1465 vec_oprnd = PHI_RESULT (vec_stmt);
1466 else
1467 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1468 return vec_oprnd;
1469 }
1470
1471 default:
1472 gcc_unreachable ();
1473 }
1474}
1475
1476
1477/* Function vect_get_vec_def_for_stmt_copy
1478
ff802fa1 1479 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1480 vectorized stmt to be created (by the caller to this function) is a "copy"
1481 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1482 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1483 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1484 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1485 DT is the type of the vector def VEC_OPRND.
1486
1487 Context:
1488 In case the vectorization factor (VF) is bigger than the number
1489 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1490 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1491 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1492 smallest data-type determines the VF, and as a result, when vectorizing
1493 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1494 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1495 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1496 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1497 which VF=16 and nunits=4, so the number of copies required is 4):
1498
1499 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1500
ebfd146a
IR
1501 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1502 VS1.1: vx.1 = memref1 VS1.2
1503 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1504 VS1.3: vx.3 = memref3
ebfd146a
IR
1505
1506 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1507 VSnew.1: vz1 = vx.1 + ... VSnew.2
1508 VSnew.2: vz2 = vx.2 + ... VSnew.3
1509 VSnew.3: vz3 = vx.3 + ...
1510
1511 The vectorization of S1 is explained in vectorizable_load.
1512 The vectorization of S2:
b8698a0f
L
1513 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1514 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1515 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1516 returns the vector-def 'vx.0'.
1517
b8698a0f
L
1518 To create the remaining copies of the vector-stmt (VSnew.j), this
1519 function is called to get the relevant vector-def for each operand. It is
1520 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1521 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1522
b8698a0f
L
1523 For example, to obtain the vector-def 'vx.1' in order to create the
1524 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1525 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1526 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1527 and return its def ('vx.1').
1528 Overall, to create the above sequence this function will be called 3 times:
1529 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1530 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1531 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1532
1533tree
1534vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1535{
1536 gimple vec_stmt_for_operand;
1537 stmt_vec_info def_stmt_info;
1538
1539 /* Do nothing; can reuse same def. */
8644a673 1540 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1541 return vec_oprnd;
1542
1543 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1544 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1545 gcc_assert (def_stmt_info);
1546 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1547 gcc_assert (vec_stmt_for_operand);
1548 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1549 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1550 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1551 else
1552 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1553 return vec_oprnd;
1554}
1555
1556
1557/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1558 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1559
1560static void
b8698a0f 1561vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1562 vec<tree> *vec_oprnds0,
1563 vec<tree> *vec_oprnds1)
ebfd146a 1564{
9771b263 1565 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1566
1567 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1568 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1569
9771b263 1570 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1571 {
9771b263 1572 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1573 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1574 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1575 }
1576}
1577
1578
d092494c
IR
1579/* Get vectorized definitions for OP0 and OP1.
1580 REDUC_INDEX is the index of reduction operand in case of reduction,
1581 and -1 otherwise. */
ebfd146a 1582
d092494c 1583void
ebfd146a 1584vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1585 vec<tree> *vec_oprnds0,
1586 vec<tree> *vec_oprnds1,
d092494c 1587 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1588{
1589 if (slp_node)
d092494c
IR
1590 {
1591 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1592 vec<tree> ops;
1593 ops.create (nops);
37b5ec8f 1594 vec<vec<tree> > vec_defs;
9771b263 1595 vec_defs.create (nops);
d092494c 1596
9771b263 1597 ops.quick_push (op0);
d092494c 1598 if (op1)
9771b263 1599 ops.quick_push (op1);
d092494c
IR
1600
1601 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1602
37b5ec8f 1603 *vec_oprnds0 = vec_defs[0];
d092494c 1604 if (op1)
37b5ec8f 1605 *vec_oprnds1 = vec_defs[1];
d092494c 1606
9771b263
DN
1607 ops.release ();
1608 vec_defs.release ();
d092494c 1609 }
ebfd146a
IR
1610 else
1611 {
1612 tree vec_oprnd;
1613
9771b263 1614 vec_oprnds0->create (1);
b8698a0f 1615 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1616 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1617
1618 if (op1)
1619 {
9771b263 1620 vec_oprnds1->create (1);
b8698a0f 1621 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1622 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1623 }
1624 }
1625}
1626
1627
1628/* Function vect_finish_stmt_generation.
1629
1630 Insert a new stmt. */
1631
1632void
1633vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1634 gimple_stmt_iterator *gsi)
1635{
1636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1638 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1639
1640 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1641
54e8e2c3
RG
1642 if (!gsi_end_p (*gsi)
1643 && gimple_has_mem_ops (vec_stmt))
1644 {
1645 gimple at_stmt = gsi_stmt (*gsi);
1646 tree vuse = gimple_vuse (at_stmt);
1647 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1648 {
1649 tree vdef = gimple_vdef (at_stmt);
1650 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1651 /* If we have an SSA vuse and insert a store, update virtual
1652 SSA form to avoid triggering the renamer. Do so only
1653 if we can easily see all uses - which is what almost always
1654 happens with the way vectorized stmts are inserted. */
1655 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1656 && ((is_gimple_assign (vec_stmt)
1657 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1658 || (is_gimple_call (vec_stmt)
1659 && !(gimple_call_flags (vec_stmt)
1660 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1661 {
1662 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1663 gimple_set_vdef (vec_stmt, new_vdef);
1664 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1665 }
1666 }
1667 }
ebfd146a
IR
1668 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1669
b8698a0f 1670 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1671 bb_vinfo));
ebfd146a 1672
73fbfcad 1673 if (dump_enabled_p ())
ebfd146a 1674 {
78c60e3d
SS
1675 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1677 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1678 }
1679
ad885386 1680 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1681}
1682
1683/* Checks if CALL can be vectorized in type VECTYPE. Returns
1684 a function declaration if the target has a vectorized version
1685 of the function, or NULL_TREE if the function cannot be vectorized. */
1686
1687tree
1688vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1689{
1690 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1691
1692 /* We only handle functions that do not read or clobber memory -- i.e.
1693 const or novops ones. */
1694 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1695 return NULL_TREE;
1696
1697 if (!fndecl
1698 || TREE_CODE (fndecl) != FUNCTION_DECL
1699 || !DECL_BUILT_IN (fndecl))
1700 return NULL_TREE;
1701
62f7fd21 1702 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1703 vectype_in);
1704}
1705
1706/* Function vectorizable_call.
1707
b8698a0f
L
1708 Check if STMT performs a function call that can be vectorized.
1709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1712
1713static bool
190c2236
JJ
1714vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1715 slp_tree slp_node)
ebfd146a
IR
1716{
1717 tree vec_dest;
1718 tree scalar_dest;
1719 tree op, type;
1720 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1721 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1722 tree vectype_out, vectype_in;
1723 int nunits_in;
1724 int nunits_out;
1725 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1726 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1727 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1728 gimple def_stmt;
0502fb85
UB
1729 enum vect_def_type dt[3]
1730 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1731 gimple new_stmt = NULL;
ebfd146a 1732 int ncopies, j;
6e1aa848 1733 vec<tree> vargs = vNULL;
ebfd146a
IR
1734 enum { NARROW, NONE, WIDEN } modifier;
1735 size_t i, nargs;
9d5e7640 1736 tree lhs;
ebfd146a 1737
190c2236 1738 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1739 return false;
1740
8644a673 1741 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1742 return false;
1743
ebfd146a
IR
1744 /* Is STMT a vectorizable call? */
1745 if (!is_gimple_call (stmt))
1746 return false;
1747
1748 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1749 return false;
1750
822ba6d7 1751 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1752 return false;
1753
b690cc0f
RG
1754 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1755
ebfd146a
IR
1756 /* Process function arguments. */
1757 rhs_type = NULL_TREE;
b690cc0f 1758 vectype_in = NULL_TREE;
ebfd146a
IR
1759 nargs = gimple_call_num_args (stmt);
1760
1b1562a5
MM
1761 /* Bail out if the function has more than three arguments, we do not have
1762 interesting builtin functions to vectorize with more than two arguments
1763 except for fma. No arguments is also not good. */
1764 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1765 return false;
1766
74bf76ed
JJ
1767 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1768 if (gimple_call_internal_p (stmt)
1769 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1770 {
1771 nargs = 0;
1772 rhs_type = unsigned_type_node;
1773 }
1774
ebfd146a
IR
1775 for (i = 0; i < nargs; i++)
1776 {
b690cc0f
RG
1777 tree opvectype;
1778
ebfd146a
IR
1779 op = gimple_call_arg (stmt, i);
1780
1781 /* We can only handle calls with arguments of the same type. */
1782 if (rhs_type
8533c9d8 1783 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1784 {
73fbfcad 1785 if (dump_enabled_p ())
78c60e3d 1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1787 "argument types differ.\n");
ebfd146a
IR
1788 return false;
1789 }
b690cc0f
RG
1790 if (!rhs_type)
1791 rhs_type = TREE_TYPE (op);
ebfd146a 1792
24ee1384 1793 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1794 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1795 {
73fbfcad 1796 if (dump_enabled_p ())
78c60e3d 1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1798 "use not simple.\n");
ebfd146a
IR
1799 return false;
1800 }
ebfd146a 1801
b690cc0f
RG
1802 if (!vectype_in)
1803 vectype_in = opvectype;
1804 else if (opvectype
1805 && opvectype != vectype_in)
1806 {
73fbfcad 1807 if (dump_enabled_p ())
78c60e3d 1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1809 "argument vector types differ.\n");
b690cc0f
RG
1810 return false;
1811 }
1812 }
1813 /* If all arguments are external or constant defs use a vector type with
1814 the same size as the output vector type. */
ebfd146a 1815 if (!vectype_in)
b690cc0f 1816 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1817 if (vec_stmt)
1818 gcc_assert (vectype_in);
1819 if (!vectype_in)
1820 {
73fbfcad 1821 if (dump_enabled_p ())
7d8930a0 1822 {
78c60e3d
SS
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "no vectype for scalar type ");
1825 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 1826 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
1827 }
1828
1829 return false;
1830 }
ebfd146a
IR
1831
1832 /* FORNOW */
b690cc0f
RG
1833 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1834 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1835 if (nunits_in == nunits_out / 2)
1836 modifier = NARROW;
1837 else if (nunits_out == nunits_in)
1838 modifier = NONE;
1839 else if (nunits_out == nunits_in / 2)
1840 modifier = WIDEN;
1841 else
1842 return false;
1843
1844 /* For now, we only vectorize functions if a target specific builtin
1845 is available. TODO -- in some cases, it might be profitable to
1846 insert the calls for pieces of the vector, in order to be able
1847 to vectorize other operations in the loop. */
1848 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1849 if (fndecl == NULL_TREE)
1850 {
74bf76ed
JJ
1851 if (gimple_call_internal_p (stmt)
1852 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1853 && !slp_node
1854 && loop_vinfo
1855 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1856 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1857 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1858 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1859 {
1860 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1861 { 0, 1, 2, ... vf - 1 } vector. */
1862 gcc_assert (nargs == 0);
1863 }
1864 else
1865 {
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1868 "function is not vectorizable.\n");
74bf76ed
JJ
1869 return false;
1870 }
ebfd146a
IR
1871 }
1872
5006671f 1873 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1874
190c2236
JJ
1875 if (slp_node || PURE_SLP_STMT (stmt_info))
1876 ncopies = 1;
1877 else if (modifier == NARROW)
ebfd146a
IR
1878 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1879 else
1880 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1881
1882 /* Sanity check: make sure that at least one copy of the vectorized stmt
1883 needs to be generated. */
1884 gcc_assert (ncopies >= 1);
1885
1886 if (!vec_stmt) /* transformation not required. */
1887 {
1888 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1889 if (dump_enabled_p ())
e645e942
TJ
1890 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1891 "\n");
c3e7ee41 1892 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1893 return true;
1894 }
1895
1896 /** Transform. **/
1897
73fbfcad 1898 if (dump_enabled_p ())
e645e942 1899 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
1900
1901 /* Handle def. */
1902 scalar_dest = gimple_call_lhs (stmt);
1903 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1904
1905 prev_stmt_info = NULL;
1906 switch (modifier)
1907 {
1908 case NONE:
1909 for (j = 0; j < ncopies; ++j)
1910 {
1911 /* Build argument list for the vectorized call. */
1912 if (j == 0)
9771b263 1913 vargs.create (nargs);
ebfd146a 1914 else
9771b263 1915 vargs.truncate (0);
ebfd146a 1916
190c2236
JJ
1917 if (slp_node)
1918 {
37b5ec8f 1919 vec<vec<tree> > vec_defs;
9771b263
DN
1920 vec_defs.create (nargs);
1921 vec<tree> vec_oprnds0;
190c2236
JJ
1922
1923 for (i = 0; i < nargs; i++)
9771b263 1924 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1925 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1926 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1927
1928 /* Arguments are ready. Create the new vector stmt. */
9771b263 1929 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1930 {
1931 size_t k;
1932 for (k = 0; k < nargs; k++)
1933 {
37b5ec8f 1934 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1935 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1936 }
1937 new_stmt = gimple_build_call_vec (fndecl, vargs);
1938 new_temp = make_ssa_name (vec_dest, new_stmt);
1939 gimple_call_set_lhs (new_stmt, new_temp);
1940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1941 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1942 }
1943
1944 for (i = 0; i < nargs; i++)
1945 {
37b5ec8f 1946 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1947 vec_oprndsi.release ();
190c2236 1948 }
9771b263 1949 vec_defs.release ();
190c2236
JJ
1950 continue;
1951 }
1952
ebfd146a
IR
1953 for (i = 0; i < nargs; i++)
1954 {
1955 op = gimple_call_arg (stmt, i);
1956 if (j == 0)
1957 vec_oprnd0
1958 = vect_get_vec_def_for_operand (op, stmt, NULL);
1959 else
63827fb8
IR
1960 {
1961 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1962 vec_oprnd0
1963 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1964 }
ebfd146a 1965
9771b263 1966 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1967 }
1968
74bf76ed
JJ
1969 if (gimple_call_internal_p (stmt)
1970 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1971 {
1972 tree *v = XALLOCAVEC (tree, nunits_out);
1973 int k;
1974 for (k = 0; k < nunits_out; ++k)
1975 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1976 tree cst = build_vector (vectype_out, v);
1977 tree new_var
1978 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1979 gimple init_stmt = gimple_build_assign (new_var, cst);
1980 new_temp = make_ssa_name (new_var, init_stmt);
1981 gimple_assign_set_lhs (init_stmt, new_temp);
1982 vect_init_vector_1 (stmt, init_stmt, NULL);
1983 new_temp = make_ssa_name (vec_dest, NULL);
1984 new_stmt = gimple_build_assign (new_temp,
1985 gimple_assign_lhs (init_stmt));
1986 }
1987 else
1988 {
1989 new_stmt = gimple_build_call_vec (fndecl, vargs);
1990 new_temp = make_ssa_name (vec_dest, new_stmt);
1991 gimple_call_set_lhs (new_stmt, new_temp);
1992 }
ebfd146a
IR
1993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1994
1995 if (j == 0)
1996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1997 else
1998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1999
2000 prev_stmt_info = vinfo_for_stmt (new_stmt);
2001 }
2002
2003 break;
2004
2005 case NARROW:
2006 for (j = 0; j < ncopies; ++j)
2007 {
2008 /* Build argument list for the vectorized call. */
2009 if (j == 0)
9771b263 2010 vargs.create (nargs * 2);
ebfd146a 2011 else
9771b263 2012 vargs.truncate (0);
ebfd146a 2013
190c2236
JJ
2014 if (slp_node)
2015 {
37b5ec8f 2016 vec<vec<tree> > vec_defs;
9771b263
DN
2017 vec_defs.create (nargs);
2018 vec<tree> vec_oprnds0;
190c2236
JJ
2019
2020 for (i = 0; i < nargs; i++)
9771b263 2021 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2022 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2023 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2024
2025 /* Arguments are ready. Create the new vector stmt. */
9771b263 2026 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2027 {
2028 size_t k;
9771b263 2029 vargs.truncate (0);
190c2236
JJ
2030 for (k = 0; k < nargs; k++)
2031 {
37b5ec8f 2032 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2033 vargs.quick_push (vec_oprndsk[i]);
2034 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2035 }
2036 new_stmt = gimple_build_call_vec (fndecl, vargs);
2037 new_temp = make_ssa_name (vec_dest, new_stmt);
2038 gimple_call_set_lhs (new_stmt, new_temp);
2039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2041 }
2042
2043 for (i = 0; i < nargs; i++)
2044 {
37b5ec8f 2045 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2046 vec_oprndsi.release ();
190c2236 2047 }
9771b263 2048 vec_defs.release ();
190c2236
JJ
2049 continue;
2050 }
2051
ebfd146a
IR
2052 for (i = 0; i < nargs; i++)
2053 {
2054 op = gimple_call_arg (stmt, i);
2055 if (j == 0)
2056 {
2057 vec_oprnd0
2058 = vect_get_vec_def_for_operand (op, stmt, NULL);
2059 vec_oprnd1
63827fb8 2060 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2061 }
2062 else
2063 {
336ecb65 2064 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2065 vec_oprnd0
63827fb8 2066 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2067 vec_oprnd1
63827fb8 2068 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2069 }
2070
9771b263
DN
2071 vargs.quick_push (vec_oprnd0);
2072 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2073 }
2074
2075 new_stmt = gimple_build_call_vec (fndecl, vargs);
2076 new_temp = make_ssa_name (vec_dest, new_stmt);
2077 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2079
2080 if (j == 0)
2081 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2082 else
2083 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2084
2085 prev_stmt_info = vinfo_for_stmt (new_stmt);
2086 }
2087
2088 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2089
2090 break;
2091
2092 case WIDEN:
2093 /* No current target implements this case. */
2094 return false;
2095 }
2096
9771b263 2097 vargs.release ();
ebfd146a
IR
2098
2099 /* Update the exception handling table with the vector stmt if necessary. */
2100 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2101 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2102
2103 /* The call in STMT might prevent it from being removed in dce.
2104 We however cannot remove it here, due to the way the ssa name
2105 it defines is mapped to the new definition. So just replace
2106 rhs of the statement with something harmless. */
2107
dd34c087
JJ
2108 if (slp_node)
2109 return true;
2110
ebfd146a 2111 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2112 if (is_pattern_stmt_p (stmt_info))
2113 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2114 else
2115 lhs = gimple_call_lhs (stmt);
2116 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2117 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2118 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2119 STMT_VINFO_STMT (stmt_info) = new_stmt;
2120 gsi_replace (gsi, new_stmt, false);
2121 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2122
2123 return true;
2124}
2125
2126
2127/* Function vect_gen_widened_results_half
2128
2129 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2130 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2131 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2132 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2133 needs to be created (DECL is a function-decl of a target-builtin).
2134 STMT is the original scalar stmt that we are vectorizing. */
2135
2136static gimple
2137vect_gen_widened_results_half (enum tree_code code,
2138 tree decl,
2139 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2140 tree vec_dest, gimple_stmt_iterator *gsi,
2141 gimple stmt)
b8698a0f 2142{
ebfd146a 2143 gimple new_stmt;
b8698a0f
L
2144 tree new_temp;
2145
2146 /* Generate half of the widened result: */
2147 if (code == CALL_EXPR)
2148 {
2149 /* Target specific support */
ebfd146a
IR
2150 if (op_type == binary_op)
2151 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2152 else
2153 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2154 new_temp = make_ssa_name (vec_dest, new_stmt);
2155 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2156 }
2157 else
ebfd146a 2158 {
b8698a0f
L
2159 /* Generic support */
2160 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2161 if (op_type != binary_op)
2162 vec_oprnd1 = NULL;
2163 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2164 vec_oprnd1);
2165 new_temp = make_ssa_name (vec_dest, new_stmt);
2166 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2167 }
ebfd146a
IR
2168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2169
ebfd146a
IR
2170 return new_stmt;
2171}
2172
4a00c761
JJ
2173
2174/* Get vectorized definitions for loop-based vectorization. For the first
2175 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2176 scalar operand), and for the rest we get a copy with
2177 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2178 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2179 The vectors are collected into VEC_OPRNDS. */
2180
2181static void
2182vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2183 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2184{
2185 tree vec_oprnd;
2186
2187 /* Get first vector operand. */
2188 /* All the vector operands except the very first one (that is scalar oprnd)
2189 are stmt copies. */
2190 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2191 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2192 else
2193 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2194
9771b263 2195 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2196
2197 /* Get second vector operand. */
2198 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2199 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2200
2201 *oprnd = vec_oprnd;
2202
2203 /* For conversion in multiple steps, continue to get operands
2204 recursively. */
2205 if (multi_step_cvt)
2206 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2207}
2208
2209
2210/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2211 For multi-step conversions store the resulting vectors and call the function
2212 recursively. */
2213
2214static void
9771b263 2215vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2216 int multi_step_cvt, gimple stmt,
9771b263 2217 vec<tree> vec_dsts,
4a00c761
JJ
2218 gimple_stmt_iterator *gsi,
2219 slp_tree slp_node, enum tree_code code,
2220 stmt_vec_info *prev_stmt_info)
2221{
2222 unsigned int i;
2223 tree vop0, vop1, new_tmp, vec_dest;
2224 gimple new_stmt;
2225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2226
9771b263 2227 vec_dest = vec_dsts.pop ();
4a00c761 2228
9771b263 2229 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2230 {
2231 /* Create demotion operation. */
9771b263
DN
2232 vop0 = (*vec_oprnds)[i];
2233 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2234 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2235 new_tmp = make_ssa_name (vec_dest, new_stmt);
2236 gimple_assign_set_lhs (new_stmt, new_tmp);
2237 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2238
2239 if (multi_step_cvt)
2240 /* Store the resulting vector for next recursive call. */
9771b263 2241 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2242 else
2243 {
2244 /* This is the last step of the conversion sequence. Store the
2245 vectors in SLP_NODE or in vector info of the scalar statement
2246 (or in STMT_VINFO_RELATED_STMT chain). */
2247 if (slp_node)
9771b263 2248 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2249 else
2250 {
2251 if (!*prev_stmt_info)
2252 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2253 else
2254 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2255
2256 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2257 }
2258 }
2259 }
2260
2261 /* For multi-step demotion operations we first generate demotion operations
2262 from the source type to the intermediate types, and then combine the
2263 results (stored in VEC_OPRNDS) in demotion operation to the destination
2264 type. */
2265 if (multi_step_cvt)
2266 {
2267 /* At each level of recursion we have half of the operands we had at the
2268 previous level. */
9771b263 2269 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2270 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2271 stmt, vec_dsts, gsi, slp_node,
2272 VEC_PACK_TRUNC_EXPR,
2273 prev_stmt_info);
2274 }
2275
9771b263 2276 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2277}
2278
2279
2280/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2281 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2282 the resulting vectors and call the function recursively. */
2283
2284static void
9771b263
DN
2285vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2286 vec<tree> *vec_oprnds1,
4a00c761
JJ
2287 gimple stmt, tree vec_dest,
2288 gimple_stmt_iterator *gsi,
2289 enum tree_code code1,
2290 enum tree_code code2, tree decl1,
2291 tree decl2, int op_type)
2292{
2293 int i;
2294 tree vop0, vop1, new_tmp1, new_tmp2;
2295 gimple new_stmt1, new_stmt2;
6e1aa848 2296 vec<tree> vec_tmp = vNULL;
4a00c761 2297
9771b263
DN
2298 vec_tmp.create (vec_oprnds0->length () * 2);
2299 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2300 {
2301 if (op_type == binary_op)
9771b263 2302 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2303 else
2304 vop1 = NULL_TREE;
2305
2306 /* Generate the two halves of promotion operation. */
2307 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2308 op_type, vec_dest, gsi, stmt);
2309 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2310 op_type, vec_dest, gsi, stmt);
2311 if (is_gimple_call (new_stmt1))
2312 {
2313 new_tmp1 = gimple_call_lhs (new_stmt1);
2314 new_tmp2 = gimple_call_lhs (new_stmt2);
2315 }
2316 else
2317 {
2318 new_tmp1 = gimple_assign_lhs (new_stmt1);
2319 new_tmp2 = gimple_assign_lhs (new_stmt2);
2320 }
2321
2322 /* Store the results for the next step. */
9771b263
DN
2323 vec_tmp.quick_push (new_tmp1);
2324 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2325 }
2326
689eaba3 2327 vec_oprnds0->release ();
4a00c761
JJ
2328 *vec_oprnds0 = vec_tmp;
2329}
2330
2331
b8698a0f
L
2332/* Check if STMT performs a conversion operation, that can be vectorized.
2333 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2334 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2335 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2336
2337static bool
2338vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2339 gimple *vec_stmt, slp_tree slp_node)
2340{
2341 tree vec_dest;
2342 tree scalar_dest;
4a00c761 2343 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2344 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2346 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2347 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2348 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2349 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2350 tree new_temp;
2351 tree def;
2352 gimple def_stmt;
2353 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2354 gimple new_stmt = NULL;
2355 stmt_vec_info prev_stmt_info;
2356 int nunits_in;
2357 int nunits_out;
2358 tree vectype_out, vectype_in;
4a00c761
JJ
2359 int ncopies, i, j;
2360 tree lhs_type, rhs_type;
ebfd146a 2361 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2362 vec<tree> vec_oprnds0 = vNULL;
2363 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2364 tree vop0;
4a00c761
JJ
2365 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2366 int multi_step_cvt = 0;
6e1aa848
DN
2367 vec<tree> vec_dsts = vNULL;
2368 vec<tree> interm_types = vNULL;
4a00c761
JJ
2369 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2370 int op_type;
2371 enum machine_mode rhs_mode;
2372 unsigned short fltsz;
ebfd146a
IR
2373
2374 /* Is STMT a vectorizable conversion? */
2375
4a00c761 2376 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2377 return false;
2378
8644a673 2379 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2380 return false;
2381
2382 if (!is_gimple_assign (stmt))
2383 return false;
2384
2385 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2386 return false;
2387
2388 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2389 if (!CONVERT_EXPR_CODE_P (code)
2390 && code != FIX_TRUNC_EXPR
2391 && code != FLOAT_EXPR
2392 && code != WIDEN_MULT_EXPR
2393 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2394 return false;
2395
4a00c761
JJ
2396 op_type = TREE_CODE_LENGTH (code);
2397
ebfd146a 2398 /* Check types of lhs and rhs. */
b690cc0f 2399 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2400 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2401 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2402
ebfd146a
IR
2403 op0 = gimple_assign_rhs1 (stmt);
2404 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2405
2406 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2407 && !((INTEGRAL_TYPE_P (lhs_type)
2408 && INTEGRAL_TYPE_P (rhs_type))
2409 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2410 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2411 return false;
2412
2413 if ((INTEGRAL_TYPE_P (lhs_type)
2414 && (TYPE_PRECISION (lhs_type)
2415 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2416 || (INTEGRAL_TYPE_P (rhs_type)
2417 && (TYPE_PRECISION (rhs_type)
2418 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2419 {
73fbfcad 2420 if (dump_enabled_p ())
78c60e3d 2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2422 "type conversion to/from bit-precision unsupported."
2423 "\n");
4a00c761
JJ
2424 return false;
2425 }
2426
b690cc0f 2427 /* Check the operands of the operation. */
24ee1384 2428 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2429 &def_stmt, &def, &dt[0], &vectype_in))
2430 {
73fbfcad 2431 if (dump_enabled_p ())
78c60e3d 2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2433 "use not simple.\n");
b690cc0f
RG
2434 return false;
2435 }
4a00c761
JJ
2436 if (op_type == binary_op)
2437 {
2438 bool ok;
2439
2440 op1 = gimple_assign_rhs2 (stmt);
2441 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2442 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2443 OP1. */
2444 if (CONSTANT_CLASS_P (op0))
f5709183 2445 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2446 &def_stmt, &def, &dt[1], &vectype_in);
2447 else
f5709183 2448 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2449 &def, &dt[1]);
4a00c761
JJ
2450
2451 if (!ok)
2452 {
73fbfcad 2453 if (dump_enabled_p ())
78c60e3d 2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2455 "use not simple.\n");
4a00c761
JJ
2456 return false;
2457 }
2458 }
2459
b690cc0f
RG
2460 /* If op0 is an external or constant defs use a vector type of
2461 the same size as the output vector type. */
ebfd146a 2462 if (!vectype_in)
b690cc0f 2463 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2464 if (vec_stmt)
2465 gcc_assert (vectype_in);
2466 if (!vectype_in)
2467 {
73fbfcad 2468 if (dump_enabled_p ())
4a00c761 2469 {
78c60e3d
SS
2470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2471 "no vectype for scalar type ");
2472 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2473 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 2474 }
7d8930a0
IR
2475
2476 return false;
2477 }
ebfd146a 2478
b690cc0f
RG
2479 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2480 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2481 if (nunits_in < nunits_out)
ebfd146a
IR
2482 modifier = NARROW;
2483 else if (nunits_out == nunits_in)
2484 modifier = NONE;
ebfd146a 2485 else
4a00c761 2486 modifier = WIDEN;
ebfd146a 2487
ff802fa1
IR
2488 /* Multiple types in SLP are handled by creating the appropriate number of
2489 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2490 case of SLP. */
437f4a00 2491 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2492 ncopies = 1;
4a00c761
JJ
2493 else if (modifier == NARROW)
2494 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2495 else
2496 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2497
ebfd146a
IR
2498 /* Sanity check: make sure that at least one copy of the vectorized stmt
2499 needs to be generated. */
2500 gcc_assert (ncopies >= 1);
2501
ebfd146a 2502 /* Supportable by target? */
4a00c761 2503 switch (modifier)
ebfd146a 2504 {
4a00c761
JJ
2505 case NONE:
2506 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2507 return false;
2508 if (supportable_convert_operation (code, vectype_out, vectype_in,
2509 &decl1, &code1))
2510 break;
2511 /* FALLTHRU */
2512 unsupported:
73fbfcad 2513 if (dump_enabled_p ())
78c60e3d 2514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2515 "conversion not supported by target.\n");
ebfd146a 2516 return false;
ebfd146a 2517
4a00c761
JJ
2518 case WIDEN:
2519 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2520 &code1, &code2, &multi_step_cvt,
2521 &interm_types))
4a00c761
JJ
2522 {
2523 /* Binary widening operation can only be supported directly by the
2524 architecture. */
2525 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2526 break;
2527 }
2528
2529 if (code != FLOAT_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532 goto unsupported;
2533
2534 rhs_mode = TYPE_MODE (rhs_type);
2535 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2536 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2537 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2538 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2539 {
2540 cvt_type
2541 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2542 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2543 if (cvt_type == NULL_TREE)
2544 goto unsupported;
2545
2546 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2547 {
2548 if (!supportable_convert_operation (code, vectype_out,
2549 cvt_type, &decl1, &codecvt1))
2550 goto unsupported;
2551 }
2552 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2553 cvt_type, &codecvt1,
2554 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2555 &interm_types))
2556 continue;
2557 else
2558 gcc_assert (multi_step_cvt == 0);
2559
2560 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2561 vectype_in, &code1, &code2,
2562 &multi_step_cvt, &interm_types))
4a00c761
JJ
2563 break;
2564 }
2565
2566 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2567 goto unsupported;
2568
2569 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2570 codecvt2 = ERROR_MARK;
2571 else
2572 {
2573 multi_step_cvt++;
9771b263 2574 interm_types.safe_push (cvt_type);
4a00c761
JJ
2575 cvt_type = NULL_TREE;
2576 }
2577 break;
2578
2579 case NARROW:
2580 gcc_assert (op_type == unary_op);
2581 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2582 &code1, &multi_step_cvt,
2583 &interm_types))
2584 break;
2585
2586 if (code != FIX_TRUNC_EXPR
2587 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2588 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2589 goto unsupported;
2590
2591 rhs_mode = TYPE_MODE (rhs_type);
2592 cvt_type
2593 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2594 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2595 if (cvt_type == NULL_TREE)
2596 goto unsupported;
2597 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2598 &decl1, &codecvt1))
2599 goto unsupported;
2600 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2601 &code1, &multi_step_cvt,
2602 &interm_types))
2603 break;
2604 goto unsupported;
2605
2606 default:
2607 gcc_unreachable ();
ebfd146a
IR
2608 }
2609
2610 if (!vec_stmt) /* transformation not required. */
2611 {
73fbfcad 2612 if (dump_enabled_p ())
78c60e3d 2613 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2614 "=== vectorizable_conversion ===\n");
4a00c761 2615 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2616 {
2617 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2618 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2619 }
4a00c761
JJ
2620 else if (modifier == NARROW)
2621 {
2622 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2623 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2624 }
2625 else
2626 {
2627 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2628 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2629 }
9771b263 2630 interm_types.release ();
ebfd146a
IR
2631 return true;
2632 }
2633
2634 /** Transform. **/
73fbfcad 2635 if (dump_enabled_p ())
78c60e3d 2636 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2637 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 2638
4a00c761
JJ
2639 if (op_type == binary_op)
2640 {
2641 if (CONSTANT_CLASS_P (op0))
2642 op0 = fold_convert (TREE_TYPE (op1), op0);
2643 else if (CONSTANT_CLASS_P (op1))
2644 op1 = fold_convert (TREE_TYPE (op0), op1);
2645 }
2646
2647 /* In case of multi-step conversion, we first generate conversion operations
2648 to the intermediate types, and then from that types to the final one.
2649 We create vector destinations for the intermediate type (TYPES) received
2650 from supportable_*_operation, and store them in the correct order
2651 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2652 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2653 vec_dest = vect_create_destination_var (scalar_dest,
2654 (cvt_type && modifier == WIDEN)
2655 ? cvt_type : vectype_out);
9771b263 2656 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2657
2658 if (multi_step_cvt)
2659 {
9771b263
DN
2660 for (i = interm_types.length () - 1;
2661 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2662 {
2663 vec_dest = vect_create_destination_var (scalar_dest,
2664 intermediate_type);
9771b263 2665 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2666 }
2667 }
ebfd146a 2668
4a00c761 2669 if (cvt_type)
82294ec1
JJ
2670 vec_dest = vect_create_destination_var (scalar_dest,
2671 modifier == WIDEN
2672 ? vectype_out : cvt_type);
4a00c761
JJ
2673
2674 if (!slp_node)
2675 {
30862efc 2676 if (modifier == WIDEN)
4a00c761 2677 {
9771b263 2678 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
4a00c761 2679 if (op_type == binary_op)
9771b263 2680 vec_oprnds1.create (1);
4a00c761 2681 }
30862efc 2682 else if (modifier == NARROW)
9771b263
DN
2683 vec_oprnds0.create (
2684 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2685 }
2686 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2687 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2688
4a00c761 2689 last_oprnd = op0;
ebfd146a
IR
2690 prev_stmt_info = NULL;
2691 switch (modifier)
2692 {
2693 case NONE:
2694 for (j = 0; j < ncopies; j++)
2695 {
ebfd146a 2696 if (j == 0)
d092494c
IR
2697 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2698 -1);
ebfd146a
IR
2699 else
2700 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2701
9771b263 2702 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2703 {
2704 /* Arguments are ready, create the new vector stmt. */
2705 if (code1 == CALL_EXPR)
2706 {
2707 new_stmt = gimple_build_call (decl1, 1, vop0);
2708 new_temp = make_ssa_name (vec_dest, new_stmt);
2709 gimple_call_set_lhs (new_stmt, new_temp);
2710 }
2711 else
2712 {
2713 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2714 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2715 vop0, NULL);
2716 new_temp = make_ssa_name (vec_dest, new_stmt);
2717 gimple_assign_set_lhs (new_stmt, new_temp);
2718 }
2719
2720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2721 if (slp_node)
9771b263 2722 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2723 }
2724
ebfd146a
IR
2725 if (j == 0)
2726 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2727 else
2728 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2729 prev_stmt_info = vinfo_for_stmt (new_stmt);
2730 }
2731 break;
2732
2733 case WIDEN:
2734 /* In case the vectorization factor (VF) is bigger than the number
2735 of elements that we can fit in a vectype (nunits), we have to
2736 generate more than one vector stmt - i.e - we need to "unroll"
2737 the vector stmt by a factor VF/nunits. */
2738 for (j = 0; j < ncopies; j++)
2739 {
4a00c761 2740 /* Handle uses. */
ebfd146a 2741 if (j == 0)
4a00c761
JJ
2742 {
2743 if (slp_node)
2744 {
2745 if (code == WIDEN_LSHIFT_EXPR)
2746 {
2747 unsigned int k;
ebfd146a 2748
4a00c761
JJ
2749 vec_oprnd1 = op1;
2750 /* Store vec_oprnd1 for every vector stmt to be created
2751 for SLP_NODE. We check during the analysis that all
2752 the shift arguments are the same. */
2753 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2754 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2755
2756 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2757 slp_node, -1);
2758 }
2759 else
2760 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2761 &vec_oprnds1, slp_node, -1);
2762 }
2763 else
2764 {
2765 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2766 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2767 if (op_type == binary_op)
2768 {
2769 if (code == WIDEN_LSHIFT_EXPR)
2770 vec_oprnd1 = op1;
2771 else
2772 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2773 NULL);
9771b263 2774 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2775 }
2776 }
2777 }
ebfd146a 2778 else
4a00c761
JJ
2779 {
2780 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2781 vec_oprnds0.truncate (0);
2782 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2783 if (op_type == binary_op)
2784 {
2785 if (code == WIDEN_LSHIFT_EXPR)
2786 vec_oprnd1 = op1;
2787 else
2788 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2789 vec_oprnd1);
9771b263
DN
2790 vec_oprnds1.truncate (0);
2791 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2792 }
2793 }
ebfd146a 2794
4a00c761
JJ
2795 /* Arguments are ready. Create the new vector stmts. */
2796 for (i = multi_step_cvt; i >= 0; i--)
2797 {
9771b263 2798 tree this_dest = vec_dsts[i];
4a00c761
JJ
2799 enum tree_code c1 = code1, c2 = code2;
2800 if (i == 0 && codecvt2 != ERROR_MARK)
2801 {
2802 c1 = codecvt1;
2803 c2 = codecvt2;
2804 }
2805 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2806 &vec_oprnds1,
2807 stmt, this_dest, gsi,
2808 c1, c2, decl1, decl2,
2809 op_type);
2810 }
2811
9771b263 2812 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2813 {
2814 if (cvt_type)
2815 {
2816 if (codecvt1 == CALL_EXPR)
2817 {
2818 new_stmt = gimple_build_call (decl1, 1, vop0);
2819 new_temp = make_ssa_name (vec_dest, new_stmt);
2820 gimple_call_set_lhs (new_stmt, new_temp);
2821 }
2822 else
2823 {
2824 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2825 new_temp = make_ssa_name (vec_dest, NULL);
2826 new_stmt = gimple_build_assign_with_ops (codecvt1,
2827 new_temp,
2828 vop0, NULL);
2829 }
2830
2831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2832 }
2833 else
2834 new_stmt = SSA_NAME_DEF_STMT (vop0);
2835
2836 if (slp_node)
9771b263 2837 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2838 else
2839 {
2840 if (!prev_stmt_info)
2841 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2842 else
2843 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2844 prev_stmt_info = vinfo_for_stmt (new_stmt);
2845 }
2846 }
ebfd146a 2847 }
4a00c761
JJ
2848
2849 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2850 break;
2851
2852 case NARROW:
2853 /* In case the vectorization factor (VF) is bigger than the number
2854 of elements that we can fit in a vectype (nunits), we have to
2855 generate more than one vector stmt - i.e - we need to "unroll"
2856 the vector stmt by a factor VF/nunits. */
2857 for (j = 0; j < ncopies; j++)
2858 {
2859 /* Handle uses. */
4a00c761
JJ
2860 if (slp_node)
2861 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2862 slp_node, -1);
ebfd146a
IR
2863 else
2864 {
9771b263 2865 vec_oprnds0.truncate (0);
4a00c761
JJ
2866 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2867 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2868 }
2869
4a00c761
JJ
2870 /* Arguments are ready. Create the new vector stmts. */
2871 if (cvt_type)
9771b263 2872 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2873 {
2874 if (codecvt1 == CALL_EXPR)
2875 {
2876 new_stmt = gimple_build_call (decl1, 1, vop0);
2877 new_temp = make_ssa_name (vec_dest, new_stmt);
2878 gimple_call_set_lhs (new_stmt, new_temp);
2879 }
2880 else
2881 {
2882 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2883 new_temp = make_ssa_name (vec_dest, NULL);
2884 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2885 vop0, NULL);
2886 }
ebfd146a 2887
4a00c761 2888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2889 vec_oprnds0[i] = new_temp;
4a00c761 2890 }
ebfd146a 2891
4a00c761
JJ
2892 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2893 stmt, vec_dsts, gsi,
2894 slp_node, code1,
2895 &prev_stmt_info);
ebfd146a
IR
2896 }
2897
2898 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2899 break;
ebfd146a
IR
2900 }
2901
9771b263
DN
2902 vec_oprnds0.release ();
2903 vec_oprnds1.release ();
2904 vec_dsts.release ();
2905 interm_types.release ();
ebfd146a
IR
2906
2907 return true;
2908}
ff802fa1
IR
2909
2910
ebfd146a
IR
2911/* Function vectorizable_assignment.
2912
b8698a0f
L
2913 Check if STMT performs an assignment (copy) that can be vectorized.
2914 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2915 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2916 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2917
2918static bool
2919vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2920 gimple *vec_stmt, slp_tree slp_node)
2921{
2922 tree vec_dest;
2923 tree scalar_dest;
2924 tree op;
2925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2927 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2928 tree new_temp;
2929 tree def;
2930 gimple def_stmt;
2931 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2932 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2933 int ncopies;
f18b55bd 2934 int i, j;
6e1aa848 2935 vec<tree> vec_oprnds = vNULL;
ebfd146a 2936 tree vop;
a70d6342 2937 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2938 gimple new_stmt = NULL;
2939 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2940 enum tree_code code;
2941 tree vectype_in;
ebfd146a
IR
2942
2943 /* Multiple types in SLP are handled by creating the appropriate number of
2944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2945 case of SLP. */
437f4a00 2946 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2947 ncopies = 1;
2948 else
2949 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2950
2951 gcc_assert (ncopies >= 1);
ebfd146a 2952
a70d6342 2953 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2954 return false;
2955
8644a673 2956 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2957 return false;
2958
2959 /* Is vectorizable assignment? */
2960 if (!is_gimple_assign (stmt))
2961 return false;
2962
2963 scalar_dest = gimple_assign_lhs (stmt);
2964 if (TREE_CODE (scalar_dest) != SSA_NAME)
2965 return false;
2966
fde9c428 2967 code = gimple_assign_rhs_code (stmt);
ebfd146a 2968 if (gimple_assign_single_p (stmt)
fde9c428
RG
2969 || code == PAREN_EXPR
2970 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2971 op = gimple_assign_rhs1 (stmt);
2972 else
2973 return false;
2974
7b7ec6c5
RG
2975 if (code == VIEW_CONVERT_EXPR)
2976 op = TREE_OPERAND (op, 0);
2977
24ee1384 2978 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2979 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2980 {
73fbfcad 2981 if (dump_enabled_p ())
78c60e3d 2982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2983 "use not simple.\n");
ebfd146a
IR
2984 return false;
2985 }
2986
fde9c428
RG
2987 /* We can handle NOP_EXPR conversions that do not change the number
2988 of elements or the vector size. */
7b7ec6c5
RG
2989 if ((CONVERT_EXPR_CODE_P (code)
2990 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2991 && (!vectype_in
2992 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2993 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2994 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2995 return false;
2996
7b7b1813
RG
2997 /* We do not handle bit-precision changes. */
2998 if ((CONVERT_EXPR_CODE_P (code)
2999 || code == VIEW_CONVERT_EXPR)
3000 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3001 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3003 || ((TYPE_PRECISION (TREE_TYPE (op))
3004 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
3005 /* But a conversion that does not change the bit-pattern is ok. */
3006 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3007 > TYPE_PRECISION (TREE_TYPE (op)))
3008 && TYPE_UNSIGNED (TREE_TYPE (op))))
3009 {
73fbfcad 3010 if (dump_enabled_p ())
78c60e3d
SS
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3012 "type conversion to/from bit-precision "
e645e942 3013 "unsupported.\n");
7b7b1813
RG
3014 return false;
3015 }
3016
ebfd146a
IR
3017 if (!vec_stmt) /* transformation not required. */
3018 {
3019 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3020 if (dump_enabled_p ())
78c60e3d 3021 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3022 "=== vectorizable_assignment ===\n");
c3e7ee41 3023 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3024 return true;
3025 }
3026
3027 /** Transform. **/
73fbfcad 3028 if (dump_enabled_p ())
e645e942 3029 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
3030
3031 /* Handle def. */
3032 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3033
3034 /* Handle use. */
f18b55bd 3035 for (j = 0; j < ncopies; j++)
ebfd146a 3036 {
f18b55bd
IR
3037 /* Handle uses. */
3038 if (j == 0)
d092494c 3039 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3040 else
3041 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3042
3043 /* Arguments are ready. create the new vector stmt. */
9771b263 3044 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3045 {
7b7ec6c5
RG
3046 if (CONVERT_EXPR_CODE_P (code)
3047 || code == VIEW_CONVERT_EXPR)
4a73490d 3048 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3049 new_stmt = gimple_build_assign (vec_dest, vop);
3050 new_temp = make_ssa_name (vec_dest, new_stmt);
3051 gimple_assign_set_lhs (new_stmt, new_temp);
3052 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3053 if (slp_node)
9771b263 3054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3055 }
ebfd146a
IR
3056
3057 if (slp_node)
f18b55bd
IR
3058 continue;
3059
3060 if (j == 0)
3061 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3062 else
3063 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3064
3065 prev_stmt_info = vinfo_for_stmt (new_stmt);
3066 }
b8698a0f 3067
9771b263 3068 vec_oprnds.release ();
ebfd146a
IR
3069 return true;
3070}
3071
9dc3f7de 3072
1107f3ae
IR
3073/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3074 either as shift by a scalar or by a vector. */
3075
3076bool
3077vect_supportable_shift (enum tree_code code, tree scalar_type)
3078{
3079
3080 enum machine_mode vec_mode;
3081 optab optab;
3082 int icode;
3083 tree vectype;
3084
3085 vectype = get_vectype_for_scalar_type (scalar_type);
3086 if (!vectype)
3087 return false;
3088
3089 optab = optab_for_tree_code (code, vectype, optab_scalar);
3090 if (!optab
3091 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3092 {
3093 optab = optab_for_tree_code (code, vectype, optab_vector);
3094 if (!optab
3095 || (optab_handler (optab, TYPE_MODE (vectype))
3096 == CODE_FOR_nothing))
3097 return false;
3098 }
3099
3100 vec_mode = TYPE_MODE (vectype);
3101 icode = (int) optab_handler (optab, vec_mode);
3102 if (icode == CODE_FOR_nothing)
3103 return false;
3104
3105 return true;
3106}
3107
3108
9dc3f7de
IR
3109/* Function vectorizable_shift.
3110
3111 Check if STMT performs a shift operation that can be vectorized.
3112 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3113 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3114 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3115
3116static bool
3117vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3118 gimple *vec_stmt, slp_tree slp_node)
3119{
3120 tree vec_dest;
3121 tree scalar_dest;
3122 tree op0, op1 = NULL;
3123 tree vec_oprnd1 = NULL_TREE;
3124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3125 tree vectype;
3126 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3127 enum tree_code code;
3128 enum machine_mode vec_mode;
3129 tree new_temp;
3130 optab optab;
3131 int icode;
3132 enum machine_mode optab_op2_mode;
3133 tree def;
3134 gimple def_stmt;
3135 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3136 gimple new_stmt = NULL;
3137 stmt_vec_info prev_stmt_info;
3138 int nunits_in;
3139 int nunits_out;
3140 tree vectype_out;
cede2577 3141 tree op1_vectype;
9dc3f7de
IR
3142 int ncopies;
3143 int j, i;
6e1aa848
DN
3144 vec<tree> vec_oprnds0 = vNULL;
3145 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3146 tree vop0, vop1;
3147 unsigned int k;
49eab32e 3148 bool scalar_shift_arg = true;
9dc3f7de
IR
3149 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3150 int vf;
3151
3152 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3153 return false;
3154
3155 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3156 return false;
3157
3158 /* Is STMT a vectorizable binary/unary operation? */
3159 if (!is_gimple_assign (stmt))
3160 return false;
3161
3162 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3163 return false;
3164
3165 code = gimple_assign_rhs_code (stmt);
3166
3167 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3168 || code == RROTATE_EXPR))
3169 return false;
3170
3171 scalar_dest = gimple_assign_lhs (stmt);
3172 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3173 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3174 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3175 {
73fbfcad 3176 if (dump_enabled_p ())
78c60e3d 3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3178 "bit-precision shifts not supported.\n");
7b7b1813
RG
3179 return false;
3180 }
9dc3f7de
IR
3181
3182 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3183 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3184 &def_stmt, &def, &dt[0], &vectype))
3185 {
73fbfcad 3186 if (dump_enabled_p ())
78c60e3d 3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3188 "use not simple.\n");
9dc3f7de
IR
3189 return false;
3190 }
3191 /* If op0 is an external or constant def use a vector type with
3192 the same size as the output vector type. */
3193 if (!vectype)
3194 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3195 if (vec_stmt)
3196 gcc_assert (vectype);
3197 if (!vectype)
3198 {
73fbfcad 3199 if (dump_enabled_p ())
78c60e3d 3200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3201 "no vectype for scalar type\n");
9dc3f7de
IR
3202 return false;
3203 }
3204
3205 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3206 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3207 if (nunits_out != nunits_in)
3208 return false;
3209
3210 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3211 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3212 &def, &dt[1], &op1_vectype))
9dc3f7de 3213 {
73fbfcad 3214 if (dump_enabled_p ())
78c60e3d 3215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3216 "use not simple.\n");
9dc3f7de
IR
3217 return false;
3218 }
3219
3220 if (loop_vinfo)
3221 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3222 else
3223 vf = 1;
3224
3225 /* Multiple types in SLP are handled by creating the appropriate number of
3226 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3227 case of SLP. */
437f4a00 3228 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3229 ncopies = 1;
3230 else
3231 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3232
3233 gcc_assert (ncopies >= 1);
3234
3235 /* Determine whether the shift amount is a vector, or scalar. If the
3236 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3237
49eab32e
JJ
3238 if (dt[1] == vect_internal_def && !slp_node)
3239 scalar_shift_arg = false;
3240 else if (dt[1] == vect_constant_def
3241 || dt[1] == vect_external_def
3242 || dt[1] == vect_internal_def)
3243 {
3244 /* In SLP, need to check whether the shift count is the same,
3245 in loops if it is a constant or invariant, it is always
3246 a scalar shift. */
3247 if (slp_node)
3248 {
9771b263 3249 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3250 gimple slpstmt;
3251
9771b263 3252 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3253 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3254 scalar_shift_arg = false;
3255 }
3256 }
3257 else
3258 {
73fbfcad 3259 if (dump_enabled_p ())
78c60e3d 3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3261 "operand mode requires invariant argument.\n");
49eab32e
JJ
3262 return false;
3263 }
3264
9dc3f7de 3265 /* Vector shifted by vector. */
49eab32e 3266 if (!scalar_shift_arg)
9dc3f7de
IR
3267 {
3268 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3269 if (dump_enabled_p ())
78c60e3d 3270 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3271 "vector/vector shift/rotate found.\n");
78c60e3d 3272
aa948027
JJ
3273 if (!op1_vectype)
3274 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3275 if (op1_vectype == NULL_TREE
3276 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3277 {
73fbfcad 3278 if (dump_enabled_p ())
78c60e3d
SS
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3280 "unusable type for last operand in"
e645e942 3281 " vector/vector shift/rotate.\n");
cede2577
JJ
3282 return false;
3283 }
9dc3f7de
IR
3284 }
3285 /* See if the machine has a vector shifted by scalar insn and if not
3286 then see if it has a vector shifted by vector insn. */
49eab32e 3287 else
9dc3f7de
IR
3288 {
3289 optab = optab_for_tree_code (code, vectype, optab_scalar);
3290 if (optab
3291 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3292 {
73fbfcad 3293 if (dump_enabled_p ())
78c60e3d 3294 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3295 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
3296 }
3297 else
3298 {
3299 optab = optab_for_tree_code (code, vectype, optab_vector);
3300 if (optab
3301 && (optab_handler (optab, TYPE_MODE (vectype))
3302 != CODE_FOR_nothing))
3303 {
49eab32e
JJ
3304 scalar_shift_arg = false;
3305
73fbfcad 3306 if (dump_enabled_p ())
78c60e3d 3307 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3308 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
3309
3310 /* Unlike the other binary operators, shifts/rotates have
3311 the rhs being int, instead of the same type as the lhs,
3312 so make sure the scalar is the right type if we are
aa948027 3313 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3314 if (dt[1] == vect_constant_def)
3315 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3316 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3317 TREE_TYPE (op1)))
3318 {
3319 if (slp_node
3320 && TYPE_MODE (TREE_TYPE (vectype))
3321 != TYPE_MODE (TREE_TYPE (op1)))
3322 {
73fbfcad 3323 if (dump_enabled_p ())
78c60e3d
SS
3324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3325 "unusable type for last operand in"
e645e942 3326 " vector/vector shift/rotate.\n");
aa948027
JJ
3327 return false;
3328 }
3329 if (vec_stmt && !slp_node)
3330 {
3331 op1 = fold_convert (TREE_TYPE (vectype), op1);
3332 op1 = vect_init_vector (stmt, op1,
3333 TREE_TYPE (vectype), NULL);
3334 }
3335 }
9dc3f7de
IR
3336 }
3337 }
3338 }
9dc3f7de
IR
3339
3340 /* Supportable by target? */
3341 if (!optab)
3342 {
73fbfcad 3343 if (dump_enabled_p ())
78c60e3d 3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3345 "no optab.\n");
9dc3f7de
IR
3346 return false;
3347 }
3348 vec_mode = TYPE_MODE (vectype);
3349 icode = (int) optab_handler (optab, vec_mode);
3350 if (icode == CODE_FOR_nothing)
3351 {
73fbfcad 3352 if (dump_enabled_p ())
78c60e3d 3353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3354 "op not supported by target.\n");
9dc3f7de
IR
3355 /* Check only during analysis. */
3356 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3357 || (vf < vect_min_worthwhile_factor (code)
3358 && !vec_stmt))
3359 return false;
73fbfcad 3360 if (dump_enabled_p ())
e645e942
TJ
3361 dump_printf_loc (MSG_NOTE, vect_location,
3362 "proceeding using word mode.\n");
9dc3f7de
IR
3363 }
3364
3365 /* Worthwhile without SIMD support? Check only during analysis. */
3366 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3367 && vf < vect_min_worthwhile_factor (code)
3368 && !vec_stmt)
3369 {
73fbfcad 3370 if (dump_enabled_p ())
78c60e3d 3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3372 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
3373 return false;
3374 }
3375
3376 if (!vec_stmt) /* transformation not required. */
3377 {
3378 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3379 if (dump_enabled_p ())
e645e942
TJ
3380 dump_printf_loc (MSG_NOTE, vect_location,
3381 "=== vectorizable_shift ===\n");
c3e7ee41 3382 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3383 return true;
3384 }
3385
3386 /** Transform. **/
3387
73fbfcad 3388 if (dump_enabled_p ())
78c60e3d 3389 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3390 "transform binary/unary operation.\n");
9dc3f7de
IR
3391
3392 /* Handle def. */
3393 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3394
9dc3f7de
IR
3395 prev_stmt_info = NULL;
3396 for (j = 0; j < ncopies; j++)
3397 {
3398 /* Handle uses. */
3399 if (j == 0)
3400 {
3401 if (scalar_shift_arg)
3402 {
3403 /* Vector shl and shr insn patterns can be defined with scalar
3404 operand 2 (shift operand). In this case, use constant or loop
3405 invariant op1 directly, without extending it to vector mode
3406 first. */
3407 optab_op2_mode = insn_data[icode].operand[2].mode;
3408 if (!VECTOR_MODE_P (optab_op2_mode))
3409 {
73fbfcad 3410 if (dump_enabled_p ())
78c60e3d 3411 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3412 "operand 1 using scalar mode.\n");
9dc3f7de 3413 vec_oprnd1 = op1;
8930f723 3414 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3415 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3416 if (slp_node)
3417 {
3418 /* Store vec_oprnd1 for every vector stmt to be created
3419 for SLP_NODE. We check during the analysis that all
3420 the shift arguments are the same.
3421 TODO: Allow different constants for different vector
3422 stmts generated for an SLP instance. */
3423 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3424 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3425 }
3426 }
3427 }
3428
3429 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3430 (a special case for certain kind of vector shifts); otherwise,
3431 operand 1 should be of a vector type (the usual case). */
3432 if (vec_oprnd1)
3433 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3434 slp_node, -1);
9dc3f7de
IR
3435 else
3436 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3437 slp_node, -1);
9dc3f7de
IR
3438 }
3439 else
3440 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3441
3442 /* Arguments are ready. Create the new vector stmt. */
9771b263 3443 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3444 {
9771b263 3445 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3446 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3447 new_temp = make_ssa_name (vec_dest, new_stmt);
3448 gimple_assign_set_lhs (new_stmt, new_temp);
3449 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3450 if (slp_node)
9771b263 3451 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3452 }
3453
3454 if (slp_node)
3455 continue;
3456
3457 if (j == 0)
3458 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3459 else
3460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3461 prev_stmt_info = vinfo_for_stmt (new_stmt);
3462 }
3463
9771b263
DN
3464 vec_oprnds0.release ();
3465 vec_oprnds1.release ();
9dc3f7de
IR
3466
3467 return true;
3468}
3469
3470
5deb57cb
JJ
3471static tree permute_vec_elements (tree, tree, tree, gimple,
3472 gimple_stmt_iterator *);
3473
3474
ebfd146a
IR
3475/* Function vectorizable_operation.
3476
16949072
RG
3477 Check if STMT performs a binary, unary or ternary operation that can
3478 be vectorized.
b8698a0f 3479 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3480 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3481 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3482
3483static bool
3484vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3485 gimple *vec_stmt, slp_tree slp_node)
3486{
00f07b86 3487 tree vec_dest;
ebfd146a 3488 tree scalar_dest;
16949072 3489 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3490 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3491 tree vectype;
ebfd146a
IR
3492 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3493 enum tree_code code;
3494 enum machine_mode vec_mode;
3495 tree new_temp;
3496 int op_type;
00f07b86 3497 optab optab;
ebfd146a 3498 int icode;
ebfd146a
IR
3499 tree def;
3500 gimple def_stmt;
16949072
RG
3501 enum vect_def_type dt[3]
3502 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3503 gimple new_stmt = NULL;
3504 stmt_vec_info prev_stmt_info;
b690cc0f 3505 int nunits_in;
ebfd146a
IR
3506 int nunits_out;
3507 tree vectype_out;
3508 int ncopies;
3509 int j, i;
6e1aa848
DN
3510 vec<tree> vec_oprnds0 = vNULL;
3511 vec<tree> vec_oprnds1 = vNULL;
3512 vec<tree> vec_oprnds2 = vNULL;
16949072 3513 tree vop0, vop1, vop2;
a70d6342
IR
3514 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3515 int vf;
3516
a70d6342 3517 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3518 return false;
3519
8644a673 3520 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3521 return false;
3522
3523 /* Is STMT a vectorizable binary/unary operation? */
3524 if (!is_gimple_assign (stmt))
3525 return false;
3526
3527 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3528 return false;
3529
ebfd146a
IR
3530 code = gimple_assign_rhs_code (stmt);
3531
3532 /* For pointer addition, we should use the normal plus for
3533 the vector addition. */
3534 if (code == POINTER_PLUS_EXPR)
3535 code = PLUS_EXPR;
3536
3537 /* Support only unary or binary operations. */
3538 op_type = TREE_CODE_LENGTH (code);
16949072 3539 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3540 {
73fbfcad 3541 if (dump_enabled_p ())
78c60e3d 3542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3543 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 3544 op_type);
ebfd146a
IR
3545 return false;
3546 }
3547
b690cc0f
RG
3548 scalar_dest = gimple_assign_lhs (stmt);
3549 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3550
7b7b1813
RG
3551 /* Most operations cannot handle bit-precision types without extra
3552 truncations. */
3553 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3554 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3555 /* Exception are bitwise binary operations. */
3556 && code != BIT_IOR_EXPR
3557 && code != BIT_XOR_EXPR
3558 && code != BIT_AND_EXPR)
3559 {
73fbfcad 3560 if (dump_enabled_p ())
78c60e3d 3561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3562 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
3563 return false;
3564 }
3565
ebfd146a 3566 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3567 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3568 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3569 {
73fbfcad 3570 if (dump_enabled_p ())
78c60e3d 3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3572 "use not simple.\n");
ebfd146a
IR
3573 return false;
3574 }
b690cc0f
RG
3575 /* If op0 is an external or constant def use a vector type with
3576 the same size as the output vector type. */
3577 if (!vectype)
3578 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3579 if (vec_stmt)
3580 gcc_assert (vectype);
3581 if (!vectype)
3582 {
73fbfcad 3583 if (dump_enabled_p ())
7d8930a0 3584 {
78c60e3d
SS
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3586 "no vectype for scalar type ");
3587 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3588 TREE_TYPE (op0));
e645e942 3589 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3590 }
3591
3592 return false;
3593 }
b690cc0f
RG
3594
3595 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3596 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3597 if (nunits_out != nunits_in)
3598 return false;
ebfd146a 3599
16949072 3600 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3601 {
3602 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3603 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3604 &def, &dt[1]))
ebfd146a 3605 {
73fbfcad 3606 if (dump_enabled_p ())
78c60e3d 3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3608 "use not simple.\n");
ebfd146a
IR
3609 return false;
3610 }
3611 }
16949072
RG
3612 if (op_type == ternary_op)
3613 {
3614 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3615 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3616 &def, &dt[2]))
16949072 3617 {
73fbfcad 3618 if (dump_enabled_p ())
78c60e3d 3619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3620 "use not simple.\n");
16949072
RG
3621 return false;
3622 }
3623 }
ebfd146a 3624
b690cc0f
RG
3625 if (loop_vinfo)
3626 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3627 else
3628 vf = 1;
3629
3630 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3631 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3632 case of SLP. */
437f4a00 3633 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3634 ncopies = 1;
3635 else
3636 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3637
3638 gcc_assert (ncopies >= 1);
3639
9dc3f7de 3640 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3641 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3642 || code == RROTATE_EXPR)
9dc3f7de 3643 return false;
ebfd146a 3644
ebfd146a 3645 /* Supportable by target? */
00f07b86
RH
3646
3647 vec_mode = TYPE_MODE (vectype);
3648 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3649 {
00f07b86 3650 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3651 icode = LAST_INSN_CODE;
00f07b86
RH
3652 else
3653 icode = CODE_FOR_nothing;
ebfd146a 3654 }
00f07b86
RH
3655 else
3656 {
3657 optab = optab_for_tree_code (code, vectype, optab_default);
3658 if (!optab)
5deb57cb 3659 {
73fbfcad 3660 if (dump_enabled_p ())
78c60e3d 3661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3662 "no optab.\n");
00f07b86 3663 return false;
5deb57cb 3664 }
00f07b86 3665 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3666 }
3667
ebfd146a
IR
3668 if (icode == CODE_FOR_nothing)
3669 {
73fbfcad 3670 if (dump_enabled_p ())
78c60e3d 3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3672 "op not supported by target.\n");
ebfd146a
IR
3673 /* Check only during analysis. */
3674 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3675 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3676 return false;
73fbfcad 3677 if (dump_enabled_p ())
e645e942
TJ
3678 dump_printf_loc (MSG_NOTE, vect_location,
3679 "proceeding using word mode.\n");
383d9c83
IR
3680 }
3681
4a00c761 3682 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3683 if (!VECTOR_MODE_P (vec_mode)
3684 && !vec_stmt
3685 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3686 {
73fbfcad 3687 if (dump_enabled_p ())
78c60e3d 3688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3689 "not worthwhile without SIMD support.\n");
e34842c6 3690 return false;
7d8930a0 3691 }
ebfd146a 3692
ebfd146a
IR
3693 if (!vec_stmt) /* transformation not required. */
3694 {
4a00c761 3695 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3696 if (dump_enabled_p ())
78c60e3d 3697 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3698 "=== vectorizable_operation ===\n");
c3e7ee41 3699 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3700 return true;
3701 }
3702
3703 /** Transform. **/
3704
73fbfcad 3705 if (dump_enabled_p ())
78c60e3d 3706 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3707 "transform binary/unary operation.\n");
383d9c83 3708
ebfd146a 3709 /* Handle def. */
00f07b86 3710 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3711
ebfd146a
IR
3712 /* In case the vectorization factor (VF) is bigger than the number
3713 of elements that we can fit in a vectype (nunits), we have to generate
3714 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3715 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3716 from one copy of the vector stmt to the next, in the field
3717 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3718 stages to find the correct vector defs to be used when vectorizing
3719 stmts that use the defs of the current stmt. The example below
3720 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3721 we need to create 4 vectorized stmts):
3722
3723 before vectorization:
3724 RELATED_STMT VEC_STMT
3725 S1: x = memref - -
3726 S2: z = x + 1 - -
3727
3728 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3729 there):
3730 RELATED_STMT VEC_STMT
3731 VS1_0: vx0 = memref0 VS1_1 -
3732 VS1_1: vx1 = memref1 VS1_2 -
3733 VS1_2: vx2 = memref2 VS1_3 -
3734 VS1_3: vx3 = memref3 - -
3735 S1: x = load - VS1_0
3736 S2: z = x + 1 - -
3737
3738 step2: vectorize stmt S2 (done here):
3739 To vectorize stmt S2 we first need to find the relevant vector
3740 def for the first operand 'x'. This is, as usual, obtained from
3741 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3742 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3743 relevant vector def 'vx0'. Having found 'vx0' we can generate
3744 the vector stmt VS2_0, and as usual, record it in the
3745 STMT_VINFO_VEC_STMT of stmt S2.
3746 When creating the second copy (VS2_1), we obtain the relevant vector
3747 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3748 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3749 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3750 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3751 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3752 chain of stmts and pointers:
3753 RELATED_STMT VEC_STMT
3754 VS1_0: vx0 = memref0 VS1_1 -
3755 VS1_1: vx1 = memref1 VS1_2 -
3756 VS1_2: vx2 = memref2 VS1_3 -
3757 VS1_3: vx3 = memref3 - -
3758 S1: x = load - VS1_0
3759 VS2_0: vz0 = vx0 + v1 VS2_1 -
3760 VS2_1: vz1 = vx1 + v1 VS2_2 -
3761 VS2_2: vz2 = vx2 + v1 VS2_3 -
3762 VS2_3: vz3 = vx3 + v1 - -
3763 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3764
3765 prev_stmt_info = NULL;
3766 for (j = 0; j < ncopies; j++)
3767 {
3768 /* Handle uses. */
3769 if (j == 0)
4a00c761
JJ
3770 {
3771 if (op_type == binary_op || op_type == ternary_op)
3772 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3773 slp_node, -1);
3774 else
3775 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3776 slp_node, -1);
3777 if (op_type == ternary_op)
36ba4aae 3778 {
9771b263
DN
3779 vec_oprnds2.create (1);
3780 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3781 stmt,
3782 NULL));
36ba4aae 3783 }
4a00c761 3784 }
ebfd146a 3785 else
4a00c761
JJ
3786 {
3787 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3788 if (op_type == ternary_op)
3789 {
9771b263
DN
3790 tree vec_oprnd = vec_oprnds2.pop ();
3791 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3792 vec_oprnd));
4a00c761
JJ
3793 }
3794 }
3795
3796 /* Arguments are ready. Create the new vector stmt. */
9771b263 3797 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3798 {
4a00c761 3799 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3800 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3801 vop2 = ((op_type == ternary_op)
9771b263 3802 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3803 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3804 vop0, vop1, vop2);
4a00c761
JJ
3805 new_temp = make_ssa_name (vec_dest, new_stmt);
3806 gimple_assign_set_lhs (new_stmt, new_temp);
3807 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3808 if (slp_node)
9771b263 3809 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3810 }
3811
4a00c761
JJ
3812 if (slp_node)
3813 continue;
3814
3815 if (j == 0)
3816 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3817 else
3818 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3819 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3820 }
3821
9771b263
DN
3822 vec_oprnds0.release ();
3823 vec_oprnds1.release ();
3824 vec_oprnds2.release ();
ebfd146a 3825
ebfd146a
IR
3826 return true;
3827}
3828
c716e67f
XDL
3829/* A helper function to ensure data reference DR's base alignment
3830 for STMT_INFO. */
3831
3832static void
3833ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3834{
3835 if (!dr->aux)
3836 return;
3837
3838 if (((dataref_aux *)dr->aux)->base_misaligned)
3839 {
3840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3841 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3842
3843 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3844 DECL_USER_ALIGN (base_decl) = 1;
3845 ((dataref_aux *)dr->aux)->base_misaligned = false;
3846 }
3847}
3848
ebfd146a
IR
3849
3850/* Function vectorizable_store.
3851
b8698a0f
L
3852 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3853 can be vectorized.
3854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3857
3858static bool
3859vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3860 slp_tree slp_node)
ebfd146a
IR
3861{
3862 tree scalar_dest;
3863 tree data_ref;
3864 tree op;
3865 tree vec_oprnd = NULL_TREE;
3866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3867 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3869 tree elem_type;
ebfd146a 3870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3871 struct loop *loop = NULL;
ebfd146a
IR
3872 enum machine_mode vec_mode;
3873 tree dummy;
3874 enum dr_alignment_support alignment_support_scheme;
3875 tree def;
3876 gimple def_stmt;
3877 enum vect_def_type dt;
3878 stmt_vec_info prev_stmt_info = NULL;
3879 tree dataref_ptr = NULL_TREE;
74bf76ed 3880 tree dataref_offset = NULL_TREE;
fef4d2b3 3881 gimple ptr_incr = NULL;
ebfd146a
IR
3882 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3883 int ncopies;
3884 int j;
3885 gimple next_stmt, first_stmt = NULL;
0d0293ac 3886 bool grouped_store = false;
272c6793 3887 bool store_lanes_p = false;
ebfd146a 3888 unsigned int group_size, i;
6e1aa848
DN
3889 vec<tree> dr_chain = vNULL;
3890 vec<tree> oprnds = vNULL;
3891 vec<tree> result_chain = vNULL;
ebfd146a 3892 bool inv_p;
6e1aa848 3893 vec<tree> vec_oprnds = vNULL;
ebfd146a 3894 bool slp = (slp_node != NULL);
ebfd146a 3895 unsigned int vec_num;
a70d6342 3896 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3897 tree aggr_type;
a70d6342
IR
3898
3899 if (loop_vinfo)
3900 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3901
3902 /* Multiple types in SLP are handled by creating the appropriate number of
3903 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3904 case of SLP. */
437f4a00 3905 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3906 ncopies = 1;
3907 else
3908 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3909
3910 gcc_assert (ncopies >= 1);
3911
3912 /* FORNOW. This restriction should be relaxed. */
a70d6342 3913 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3914 {
73fbfcad 3915 if (dump_enabled_p ())
78c60e3d 3916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3917 "multiple types in nested loop.\n");
ebfd146a
IR
3918 return false;
3919 }
3920
a70d6342 3921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3922 return false;
3923
8644a673 3924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3925 return false;
3926
3927 /* Is vectorizable store? */
3928
3929 if (!is_gimple_assign (stmt))
3930 return false;
3931
3932 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3933 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3934 && is_pattern_stmt_p (stmt_info))
3935 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3936 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3937 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3938 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3939 && TREE_CODE (scalar_dest) != COMPONENT_REF
3940 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3941 && TREE_CODE (scalar_dest) != REALPART_EXPR
3942 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3943 return false;
3944
3945 gcc_assert (gimple_assign_single_p (stmt));
3946 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3947 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3948 &def, &dt))
ebfd146a 3949 {
73fbfcad 3950 if (dump_enabled_p ())
78c60e3d 3951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3952 "use not simple.\n");
ebfd146a
IR
3953 return false;
3954 }
3955
272c6793 3956 elem_type = TREE_TYPE (vectype);
ebfd146a 3957 vec_mode = TYPE_MODE (vectype);
7b7b1813 3958
ebfd146a
IR
3959 /* FORNOW. In some cases can vectorize even if data-type not supported
3960 (e.g. - array initialization with 0). */
947131ba 3961 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3962 return false;
3963
3964 if (!STMT_VINFO_DATA_REF (stmt_info))
3965 return false;
3966
a7ce6ec3
RG
3967 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3968 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3969 size_zero_node) < 0)
a1e53f3f 3970 {
73fbfcad 3971 if (dump_enabled_p ())
78c60e3d 3972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3973 "negative step for store.\n");
a1e53f3f
L
3974 return false;
3975 }
3976
0d0293ac 3977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3978 {
0d0293ac 3979 grouped_store = true;
e14c1050 3980 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3981 if (!slp && !PURE_SLP_STMT (stmt_info))
3982 {
e14c1050 3983 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3984 if (vect_store_lanes_supported (vectype, group_size))
3985 store_lanes_p = true;
0d0293ac 3986 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3987 return false;
3988 }
b8698a0f 3989
ebfd146a
IR
3990 if (first_stmt == stmt)
3991 {
3992 /* STMT is the leader of the group. Check the operands of all the
3993 stmts of the group. */
e14c1050 3994 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3995 while (next_stmt)
3996 {
3997 gcc_assert (gimple_assign_single_p (next_stmt));
3998 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3999 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
4000 &def_stmt, &def, &dt))
ebfd146a 4001 {
73fbfcad 4002 if (dump_enabled_p ())
78c60e3d 4003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4004 "use not simple.\n");
ebfd146a
IR
4005 return false;
4006 }
e14c1050 4007 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4008 }
4009 }
4010 }
4011
4012 if (!vec_stmt) /* transformation not required. */
4013 {
4014 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
4015 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4016 NULL, NULL, NULL);
ebfd146a
IR
4017 return true;
4018 }
4019
4020 /** Transform. **/
4021
c716e67f
XDL
4022 ensure_base_align (stmt_info, dr);
4023
0d0293ac 4024 if (grouped_store)
ebfd146a
IR
4025 {
4026 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4027 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4028
e14c1050 4029 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4030
4031 /* FORNOW */
a70d6342 4032 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4033
4034 /* We vectorize all the stmts of the interleaving group when we
4035 reach the last stmt in the group. */
e14c1050
IR
4036 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4037 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4038 && !slp)
4039 {
4040 *vec_stmt = NULL;
4041 return true;
4042 }
4043
4044 if (slp)
4b5caab7 4045 {
0d0293ac 4046 grouped_store = false;
4b5caab7
IR
4047 /* VEC_NUM is the number of vect stmts to be created for this
4048 group. */
4049 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4050 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4051 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4052 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4053 }
ebfd146a 4054 else
4b5caab7
IR
4055 /* VEC_NUM is the number of vect stmts to be created for this
4056 group. */
ebfd146a
IR
4057 vec_num = group_size;
4058 }
b8698a0f 4059 else
ebfd146a
IR
4060 {
4061 first_stmt = stmt;
4062 first_dr = dr;
4063 group_size = vec_num = 1;
ebfd146a 4064 }
b8698a0f 4065
73fbfcad 4066 if (dump_enabled_p ())
78c60e3d 4067 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4068 "transform store. ncopies = %d\n", ncopies);
ebfd146a 4069
9771b263
DN
4070 dr_chain.create (group_size);
4071 oprnds.create (group_size);
ebfd146a 4072
720f5239 4073 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4074 gcc_assert (alignment_support_scheme);
272c6793
RS
4075 /* Targets with store-lane instructions must not require explicit
4076 realignment. */
4077 gcc_assert (!store_lanes_p
4078 || alignment_support_scheme == dr_aligned
4079 || alignment_support_scheme == dr_unaligned_supported);
4080
4081 if (store_lanes_p)
4082 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4083 else
4084 aggr_type = vectype;
ebfd146a
IR
4085
4086 /* In case the vectorization factor (VF) is bigger than the number
4087 of elements that we can fit in a vectype (nunits), we have to generate
4088 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4089 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4090 vect_get_vec_def_for_copy_stmt. */
4091
0d0293ac 4092 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4093
4094 S1: &base + 2 = x2
4095 S2: &base = x0
4096 S3: &base + 1 = x1
4097 S4: &base + 3 = x3
4098
4099 We create vectorized stores starting from base address (the access of the
4100 first stmt in the chain (S2 in the above example), when the last store stmt
4101 of the chain (S4) is reached:
4102
4103 VS1: &base = vx2
4104 VS2: &base + vec_size*1 = vx0
4105 VS3: &base + vec_size*2 = vx1
4106 VS4: &base + vec_size*3 = vx3
4107
4108 Then permutation statements are generated:
4109
3fcc1b55
JJ
4110 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4111 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4112 ...
b8698a0f 4113
ebfd146a
IR
4114 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4115 (the order of the data-refs in the output of vect_permute_store_chain
4116 corresponds to the order of scalar stmts in the interleaving chain - see
4117 the documentation of vect_permute_store_chain()).
4118
4119 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4120 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4121 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4122 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4123 */
4124
4125 prev_stmt_info = NULL;
4126 for (j = 0; j < ncopies; j++)
4127 {
4128 gimple new_stmt;
ebfd146a
IR
4129
4130 if (j == 0)
4131 {
4132 if (slp)
4133 {
4134 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4135 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4136 NULL, slp_node, -1);
ebfd146a 4137
9771b263 4138 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4139 }
4140 else
4141 {
b8698a0f
L
4142 /* For interleaved stores we collect vectorized defs for all the
4143 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4144 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4145 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4146
0d0293ac 4147 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4148 OPRNDS are of size 1. */
b8698a0f 4149 next_stmt = first_stmt;
ebfd146a
IR
4150 for (i = 0; i < group_size; i++)
4151 {
b8698a0f
L
4152 /* Since gaps are not supported for interleaved stores,
4153 GROUP_SIZE is the exact number of stmts in the chain.
4154 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4155 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4156 iteration of the loop will be executed. */
4157 gcc_assert (next_stmt
4158 && gimple_assign_single_p (next_stmt));
4159 op = gimple_assign_rhs1 (next_stmt);
4160
b8698a0f 4161 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4162 NULL);
9771b263
DN
4163 dr_chain.quick_push (vec_oprnd);
4164 oprnds.quick_push (vec_oprnd);
e14c1050 4165 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4166 }
4167 }
4168
4169 /* We should have catched mismatched types earlier. */
4170 gcc_assert (useless_type_conversion_p (vectype,
4171 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4172 bool simd_lane_access_p
4173 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4174 if (simd_lane_access_p
4175 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4176 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4177 && integer_zerop (DR_OFFSET (first_dr))
4178 && integer_zerop (DR_INIT (first_dr))
4179 && alias_sets_conflict_p (get_alias_set (aggr_type),
4180 get_alias_set (DR_REF (first_dr))))
4181 {
4182 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4183 dataref_offset = build_int_cst (reference_alias_ptr_type
4184 (DR_REF (first_dr)), 0);
8928eff3 4185 inv_p = false;
74bf76ed
JJ
4186 }
4187 else
4188 dataref_ptr
4189 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4190 simd_lane_access_p ? loop : NULL,
4191 NULL_TREE, &dummy, gsi, &ptr_incr,
4192 simd_lane_access_p, &inv_p);
a70d6342 4193 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4194 }
b8698a0f 4195 else
ebfd146a 4196 {
b8698a0f
L
4197 /* For interleaved stores we created vectorized defs for all the
4198 defs stored in OPRNDS in the previous iteration (previous copy).
4199 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4200 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4201 next copy.
0d0293ac 4202 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4203 OPRNDS are of size 1. */
4204 for (i = 0; i < group_size; i++)
4205 {
9771b263 4206 op = oprnds[i];
24ee1384
IR
4207 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4208 &def, &dt);
b8698a0f 4209 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4210 dr_chain[i] = vec_oprnd;
4211 oprnds[i] = vec_oprnd;
ebfd146a 4212 }
74bf76ed
JJ
4213 if (dataref_offset)
4214 dataref_offset
4215 = int_const_binop (PLUS_EXPR, dataref_offset,
4216 TYPE_SIZE_UNIT (aggr_type));
4217 else
4218 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4219 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4220 }
4221
272c6793 4222 if (store_lanes_p)
ebfd146a 4223 {
272c6793 4224 tree vec_array;
267d3070 4225
272c6793
RS
4226 /* Combine all the vectors into an array. */
4227 vec_array = create_vector_array (vectype, vec_num);
4228 for (i = 0; i < vec_num; i++)
c2d7ab2a 4229 {
9771b263 4230 vec_oprnd = dr_chain[i];
272c6793 4231 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4232 }
b8698a0f 4233
272c6793
RS
4234 /* Emit:
4235 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4236 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4237 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4238 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4240 }
4241 else
4242 {
4243 new_stmt = NULL;
0d0293ac 4244 if (grouped_store)
272c6793 4245 {
b6b9227d
JJ
4246 if (j == 0)
4247 result_chain.create (group_size);
272c6793
RS
4248 /* Permute. */
4249 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4250 &result_chain);
4251 }
c2d7ab2a 4252
272c6793
RS
4253 next_stmt = first_stmt;
4254 for (i = 0; i < vec_num; i++)
4255 {
644ffefd 4256 unsigned align, misalign;
272c6793
RS
4257
4258 if (i > 0)
4259 /* Bump the vector pointer. */
4260 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4261 stmt, NULL_TREE);
4262
4263 if (slp)
9771b263 4264 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4265 else if (grouped_store)
4266 /* For grouped stores vectorized defs are interleaved in
272c6793 4267 vect_permute_store_chain(). */
9771b263 4268 vec_oprnd = result_chain[i];
272c6793
RS
4269
4270 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4271 dataref_offset
4272 ? dataref_offset
4273 : build_int_cst (reference_alias_ptr_type
4274 (DR_REF (first_dr)), 0));
644ffefd 4275 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4276 if (aligned_access_p (first_dr))
644ffefd 4277 misalign = 0;
272c6793
RS
4278 else if (DR_MISALIGNMENT (first_dr) == -1)
4279 {
4280 TREE_TYPE (data_ref)
4281 = build_aligned_type (TREE_TYPE (data_ref),
4282 TYPE_ALIGN (elem_type));
644ffefd
MJ
4283 align = TYPE_ALIGN_UNIT (elem_type);
4284 misalign = 0;
272c6793
RS
4285 }
4286 else
4287 {
4288 TREE_TYPE (data_ref)
4289 = build_aligned_type (TREE_TYPE (data_ref),
4290 TYPE_ALIGN (elem_type));
644ffefd 4291 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4292 }
74bf76ed
JJ
4293 if (dataref_offset == NULL_TREE)
4294 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4295 misalign);
c2d7ab2a 4296
272c6793
RS
4297 /* Arguments are ready. Create the new vector stmt. */
4298 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4299 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4300
4301 if (slp)
4302 continue;
4303
e14c1050 4304 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4305 if (!next_stmt)
4306 break;
4307 }
ebfd146a 4308 }
1da0876c
RS
4309 if (!slp)
4310 {
4311 if (j == 0)
4312 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4313 else
4314 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4315 prev_stmt_info = vinfo_for_stmt (new_stmt);
4316 }
ebfd146a
IR
4317 }
4318
9771b263
DN
4319 dr_chain.release ();
4320 oprnds.release ();
4321 result_chain.release ();
4322 vec_oprnds.release ();
ebfd146a
IR
4323
4324 return true;
4325}
4326
aec7ae7d
JJ
4327/* Given a vector type VECTYPE and permutation SEL returns
4328 the VECTOR_CST mask that implements the permutation of the
4329 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4330
3fcc1b55
JJ
4331tree
4332vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4333{
d2a12ae7 4334 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4335 int i, nunits;
a1e53f3f 4336
22e4dee7 4337 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4338
4339 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4340 return NULL;
4341
96f9265a
RG
4342 mask_elt_type = lang_hooks.types.type_for_mode
4343 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4344 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4345
d2a12ae7 4346 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4347 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4348 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4349 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4350
2635892a 4351 return mask_vec;
a1e53f3f
L
4352}
4353
aec7ae7d
JJ
4354/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4355 reversal of the vector elements. If that is impossible to do,
4356 returns NULL. */
4357
4358static tree
4359perm_mask_for_reverse (tree vectype)
4360{
4361 int i, nunits;
4362 unsigned char *sel;
4363
4364 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4365 sel = XALLOCAVEC (unsigned char, nunits);
4366
4367 for (i = 0; i < nunits; ++i)
4368 sel[i] = nunits - 1 - i;
4369
3fcc1b55 4370 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4371}
4372
4373/* Given a vector variable X and Y, that was generated for the scalar
4374 STMT, generate instructions to permute the vector elements of X and Y
4375 using permutation mask MASK_VEC, insert them at *GSI and return the
4376 permuted vector variable. */
a1e53f3f
L
4377
4378static tree
aec7ae7d
JJ
4379permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4380 gimple_stmt_iterator *gsi)
a1e53f3f
L
4381{
4382 tree vectype = TREE_TYPE (x);
aec7ae7d 4383 tree perm_dest, data_ref;
a1e53f3f
L
4384 gimple perm_stmt;
4385
a1e53f3f 4386 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4387 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4388
4389 /* Generate the permute statement. */
73804b12
RG
4390 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4391 x, y, mask_vec);
a1e53f3f
L
4392 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4393
4394 return data_ref;
4395}
4396
ebfd146a
IR
4397/* vectorizable_load.
4398
b8698a0f
L
4399 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4400 can be vectorized.
4401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4404
4405static bool
4406vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4407 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4408{
4409 tree scalar_dest;
4410 tree vec_dest = NULL;
4411 tree data_ref = NULL;
4412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4413 stmt_vec_info prev_stmt_info;
ebfd146a 4414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4415 struct loop *loop = NULL;
ebfd146a 4416 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4417 bool nested_in_vect_loop = false;
c716e67f 4418 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4419 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4420 tree elem_type;
ebfd146a 4421 tree new_temp;
947131ba 4422 enum machine_mode mode;
ebfd146a
IR
4423 gimple new_stmt = NULL;
4424 tree dummy;
4425 enum dr_alignment_support alignment_support_scheme;
4426 tree dataref_ptr = NULL_TREE;
74bf76ed 4427 tree dataref_offset = NULL_TREE;
fef4d2b3 4428 gimple ptr_incr = NULL;
ebfd146a
IR
4429 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4430 int ncopies;
a64b9c26 4431 int i, j, group_size, group_gap;
ebfd146a
IR
4432 tree msq = NULL_TREE, lsq;
4433 tree offset = NULL_TREE;
4434 tree realignment_token = NULL_TREE;
4435 gimple phi = NULL;
6e1aa848 4436 vec<tree> dr_chain = vNULL;
0d0293ac 4437 bool grouped_load = false;
272c6793 4438 bool load_lanes_p = false;
ebfd146a 4439 gimple first_stmt;
ebfd146a 4440 bool inv_p;
319e6439 4441 bool negative = false;
ebfd146a
IR
4442 bool compute_in_loop = false;
4443 struct loop *at_loop;
4444 int vec_num;
4445 bool slp = (slp_node != NULL);
4446 bool slp_perm = false;
4447 enum tree_code code;
a70d6342
IR
4448 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4449 int vf;
272c6793 4450 tree aggr_type;
aec7ae7d
JJ
4451 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4452 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4453 int gather_scale = 1;
4454 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4455
4456 if (loop_vinfo)
4457 {
4458 loop = LOOP_VINFO_LOOP (loop_vinfo);
4459 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4460 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4461 }
4462 else
3533e503 4463 vf = 1;
ebfd146a
IR
4464
4465 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4466 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4467 case of SLP. */
437f4a00 4468 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4469 ncopies = 1;
4470 else
4471 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4472
4473 gcc_assert (ncopies >= 1);
4474
4475 /* FORNOW. This restriction should be relaxed. */
4476 if (nested_in_vect_loop && ncopies > 1)
4477 {
73fbfcad 4478 if (dump_enabled_p ())
78c60e3d 4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4480 "multiple types in nested loop.\n");
ebfd146a
IR
4481 return false;
4482 }
4483
a70d6342 4484 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4485 return false;
4486
8644a673 4487 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4488 return false;
4489
4490 /* Is vectorizable load? */
4491 if (!is_gimple_assign (stmt))
4492 return false;
4493
4494 scalar_dest = gimple_assign_lhs (stmt);
4495 if (TREE_CODE (scalar_dest) != SSA_NAME)
4496 return false;
4497
4498 code = gimple_assign_rhs_code (stmt);
4499 if (code != ARRAY_REF
38000232 4500 && code != BIT_FIELD_REF
ebfd146a 4501 && code != INDIRECT_REF
e9dbe7bb
IR
4502 && code != COMPONENT_REF
4503 && code != IMAGPART_EXPR
70f34814 4504 && code != REALPART_EXPR
42373e0b
RG
4505 && code != MEM_REF
4506 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4507 return false;
4508
4509 if (!STMT_VINFO_DATA_REF (stmt_info))
4510 return false;
4511
7b7b1813 4512 elem_type = TREE_TYPE (vectype);
947131ba 4513 mode = TYPE_MODE (vectype);
ebfd146a
IR
4514
4515 /* FORNOW. In some cases can vectorize even if data-type not supported
4516 (e.g. - data copies). */
947131ba 4517 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4518 {
73fbfcad 4519 if (dump_enabled_p ())
78c60e3d 4520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4521 "Aligned load, but unsupported type.\n");
ebfd146a
IR
4522 return false;
4523 }
4524
ebfd146a 4525 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4526 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4527 {
0d0293ac 4528 grouped_load = true;
ebfd146a 4529 /* FORNOW */
aec7ae7d 4530 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4531
e14c1050 4532 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4533 if (!slp && !PURE_SLP_STMT (stmt_info))
4534 {
e14c1050 4535 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4536 if (vect_load_lanes_supported (vectype, group_size))
4537 load_lanes_p = true;
0d0293ac 4538 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4539 return false;
4540 }
ebfd146a
IR
4541 }
4542
a1e53f3f 4543
aec7ae7d
JJ
4544 if (STMT_VINFO_GATHER_P (stmt_info))
4545 {
4546 gimple def_stmt;
4547 tree def;
4548 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4549 &gather_off, &gather_scale);
4550 gcc_assert (gather_decl);
24ee1384 4551 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4552 &def_stmt, &def, &gather_dt,
4553 &gather_off_vectype))
4554 {
73fbfcad 4555 if (dump_enabled_p ())
78c60e3d 4556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4557 "gather index use not simple.\n");
aec7ae7d
JJ
4558 return false;
4559 }
4560 }
7d75abc8 4561 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4562 ;
319e6439
RG
4563 else
4564 {
4565 negative = tree_int_cst_compare (nested_in_vect_loop
4566 ? STMT_VINFO_DR_STEP (stmt_info)
4567 : DR_STEP (dr),
4568 size_zero_node) < 0;
4569 if (negative && ncopies > 1)
4570 {
73fbfcad 4571 if (dump_enabled_p ())
78c60e3d 4572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4573 "multiple types with negative step.\n");
319e6439
RG
4574 return false;
4575 }
4576
4577 if (negative)
4578 {
08940f33
RB
4579 if (grouped_load)
4580 {
4581 if (dump_enabled_p ())
4582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4583 "negative step for group load not supported"
4584 "\n");
08940f33
RB
4585 return false;
4586 }
319e6439
RG
4587 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4588 if (alignment_support_scheme != dr_aligned
4589 && alignment_support_scheme != dr_unaligned_supported)
4590 {
73fbfcad 4591 if (dump_enabled_p ())
78c60e3d 4592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4593 "negative step but alignment required.\n");
319e6439
RG
4594 return false;
4595 }
4596 if (!perm_mask_for_reverse (vectype))
4597 {
73fbfcad 4598 if (dump_enabled_p ())
78c60e3d 4599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4600 "negative step and reversing not supported."
4601 "\n");
319e6439
RG
4602 return false;
4603 }
4604 }
7d75abc8 4605 }
aec7ae7d 4606
ebfd146a
IR
4607 if (!vec_stmt) /* transformation not required. */
4608 {
4609 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4610 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4611 return true;
4612 }
4613
73fbfcad 4614 if (dump_enabled_p ())
78c60e3d 4615 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4616 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
4617
4618 /** Transform. **/
4619
c716e67f
XDL
4620 ensure_base_align (stmt_info, dr);
4621
aec7ae7d
JJ
4622 if (STMT_VINFO_GATHER_P (stmt_info))
4623 {
4624 tree vec_oprnd0 = NULL_TREE, op;
4625 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4626 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4627 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4628 edge pe = loop_preheader_edge (loop);
4629 gimple_seq seq;
4630 basic_block new_bb;
4631 enum { NARROW, NONE, WIDEN } modifier;
4632 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4633
4634 if (nunits == gather_off_nunits)
4635 modifier = NONE;
4636 else if (nunits == gather_off_nunits / 2)
4637 {
4638 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4639 modifier = WIDEN;
4640
4641 for (i = 0; i < gather_off_nunits; ++i)
4642 sel[i] = i | nunits;
4643
3fcc1b55 4644 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4645 gcc_assert (perm_mask != NULL_TREE);
4646 }
4647 else if (nunits == gather_off_nunits * 2)
4648 {
4649 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4650 modifier = NARROW;
4651
4652 for (i = 0; i < nunits; ++i)
4653 sel[i] = i < gather_off_nunits
4654 ? i : i + nunits - gather_off_nunits;
4655
3fcc1b55 4656 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4657 gcc_assert (perm_mask != NULL_TREE);
4658 ncopies *= 2;
4659 }
4660 else
4661 gcc_unreachable ();
4662
4663 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4664 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4665 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4666 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4667 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4668 scaletype = TREE_VALUE (arglist);
4669 gcc_checking_assert (types_compatible_p (srctype, rettype)
4670 && types_compatible_p (srctype, masktype));
4671
4672 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4673
4674 ptr = fold_convert (ptrtype, gather_base);
4675 if (!is_gimple_min_invariant (ptr))
4676 {
4677 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4678 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4679 gcc_assert (!new_bb);
4680 }
4681
4682 /* Currently we support only unconditional gather loads,
4683 so mask should be all ones. */
4684 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4685 mask = build_int_cst (TREE_TYPE (masktype), -1);
4686 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4687 {
4688 REAL_VALUE_TYPE r;
4689 long tmp[6];
4690 for (j = 0; j < 6; ++j)
4691 tmp[j] = -1;
4692 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4693 mask = build_real (TREE_TYPE (masktype), r);
4694 }
4695 else
4696 gcc_unreachable ();
4697 mask = build_vector_from_val (masktype, mask);
4698 mask = vect_init_vector (stmt, mask, masktype, NULL);
4699
4700 scale = build_int_cst (scaletype, gather_scale);
4701
4702 prev_stmt_info = NULL;
4703 for (j = 0; j < ncopies; ++j)
4704 {
4705 if (modifier == WIDEN && (j & 1))
4706 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4707 perm_mask, stmt, gsi);
4708 else if (j == 0)
4709 op = vec_oprnd0
4710 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4711 else
4712 op = vec_oprnd0
4713 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4714
4715 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4716 {
4717 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4718 == TYPE_VECTOR_SUBPARTS (idxtype));
4719 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4720 var = make_ssa_name (var, NULL);
4721 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4722 new_stmt
4723 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4724 op, NULL_TREE);
4725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4726 op = var;
4727 }
4728
4729 new_stmt
4730 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4731
4732 if (!useless_type_conversion_p (vectype, rettype))
4733 {
4734 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4735 == TYPE_VECTOR_SUBPARTS (rettype));
4736 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4737 op = make_ssa_name (var, new_stmt);
4738 gimple_call_set_lhs (new_stmt, op);
4739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4740 var = make_ssa_name (vec_dest, NULL);
4741 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4742 new_stmt
4743 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4744 NULL_TREE);
4745 }
4746 else
4747 {
4748 var = make_ssa_name (vec_dest, new_stmt);
4749 gimple_call_set_lhs (new_stmt, var);
4750 }
4751
4752 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4753
4754 if (modifier == NARROW)
4755 {
4756 if ((j & 1) == 0)
4757 {
4758 prev_res = var;
4759 continue;
4760 }
4761 var = permute_vec_elements (prev_res, var,
4762 perm_mask, stmt, gsi);
4763 new_stmt = SSA_NAME_DEF_STMT (var);
4764 }
4765
4766 if (prev_stmt_info == NULL)
4767 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4768 else
4769 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4770 prev_stmt_info = vinfo_for_stmt (new_stmt);
4771 }
4772 return true;
4773 }
7d75abc8
MM
4774 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4775 {
4776 gimple_stmt_iterator incr_gsi;
4777 bool insert_after;
4778 gimple incr;
4779 tree offvar;
7d75abc8
MM
4780 tree ivstep;
4781 tree running_off;
9771b263 4782 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4783 gimple_seq stmts = NULL;
14ac6aa2
RB
4784 tree stride_base, stride_step, alias_off;
4785
4786 gcc_assert (!nested_in_vect_loop);
7d75abc8 4787
14ac6aa2
RB
4788 stride_base
4789 = fold_build_pointer_plus
4790 (unshare_expr (DR_BASE_ADDRESS (dr)),
4791 size_binop (PLUS_EXPR,
4792 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4793 convert_to_ptrofftype (DR_INIT(dr))));
4794 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4795
4796 /* For a load with loop-invariant (but other than power-of-2)
4797 stride (i.e. not a grouped access) like so:
4798
4799 for (i = 0; i < n; i += stride)
4800 ... = array[i];
4801
4802 we generate a new induction variable and new accesses to
4803 form a new vector (or vectors, depending on ncopies):
4804
4805 for (j = 0; ; j += VF*stride)
4806 tmp1 = array[j];
4807 tmp2 = array[j + stride];
4808 ...
4809 vectemp = {tmp1, tmp2, ...}
4810 */
4811
4812 ivstep = stride_step;
4813 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4814 build_int_cst (TREE_TYPE (ivstep), vf));
4815
4816 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4817
4818 create_iv (stride_base, ivstep, NULL,
4819 loop, &incr_gsi, insert_after,
4820 &offvar, NULL);
4821 incr = gsi_stmt (incr_gsi);
4822 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4823
4824 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4825 if (stmts)
4826 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4827
4828 prev_stmt_info = NULL;
4829 running_off = offvar;
14ac6aa2 4830 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4831 for (j = 0; j < ncopies; j++)
4832 {
4833 tree vec_inv;
4834
9771b263 4835 vec_alloc (v, nunits);
7d75abc8
MM
4836 for (i = 0; i < nunits; i++)
4837 {
4838 tree newref, newoff;
4839 gimple incr;
14ac6aa2
RB
4840 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4841 running_off, alias_off);
7d75abc8
MM
4842
4843 newref = force_gimple_operand_gsi (gsi, newref, true,
4844 NULL_TREE, true,
4845 GSI_SAME_STMT);
4846 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4847 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4848 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4849 running_off, stride_step);
7d75abc8
MM
4850 vect_finish_stmt_generation (stmt, incr, gsi);
4851
4852 running_off = newoff;
4853 }
4854
4855 vec_inv = build_constructor (vectype, v);
4856 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4857 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4858
4859 if (j == 0)
4860 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4861 else
4862 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4863 prev_stmt_info = vinfo_for_stmt (new_stmt);
4864 }
4865 return true;
4866 }
aec7ae7d 4867
0d0293ac 4868 if (grouped_load)
ebfd146a 4869 {
e14c1050 4870 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4871 if (slp
01d8bf07 4872 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4873 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4874 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4875
ebfd146a 4876 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4877 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4878 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4879 ??? But we can only do so if there is exactly one
4880 as we have no way to get at the rest. Leave the CSE
4881 opportunity alone.
4882 ??? With the group load eventually participating
4883 in multiple different permutations (having multiple
4884 slp nodes which refer to the same group) the CSE
4885 is even wrong code. See PR56270. */
4886 && !slp)
ebfd146a
IR
4887 {
4888 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4889 return true;
4890 }
4891 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4892 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4893
4894 /* VEC_NUM is the number of vect stmts to be created for this group. */
4895 if (slp)
4896 {
0d0293ac 4897 grouped_load = false;
ebfd146a 4898 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4899 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4900 slp_perm = true;
a64b9c26 4901 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4902 }
ebfd146a 4903 else
a64b9c26
RB
4904 {
4905 vec_num = group_size;
4906 group_gap = 0;
4907 }
ebfd146a
IR
4908 }
4909 else
4910 {
4911 first_stmt = stmt;
4912 first_dr = dr;
4913 group_size = vec_num = 1;
a64b9c26 4914 group_gap = 0;
ebfd146a
IR
4915 }
4916
720f5239 4917 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4918 gcc_assert (alignment_support_scheme);
272c6793
RS
4919 /* Targets with load-lane instructions must not require explicit
4920 realignment. */
4921 gcc_assert (!load_lanes_p
4922 || alignment_support_scheme == dr_aligned
4923 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4924
4925 /* In case the vectorization factor (VF) is bigger than the number
4926 of elements that we can fit in a vectype (nunits), we have to generate
4927 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4928 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4929 from one copy of the vector stmt to the next, in the field
ff802fa1 4930 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4931 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4932 stmts that use the defs of the current stmt. The example below
4933 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4934 need to create 4 vectorized stmts):
ebfd146a
IR
4935
4936 before vectorization:
4937 RELATED_STMT VEC_STMT
4938 S1: x = memref - -
4939 S2: z = x + 1 - -
4940
4941 step 1: vectorize stmt S1:
4942 We first create the vector stmt VS1_0, and, as usual, record a
4943 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4944 Next, we create the vector stmt VS1_1, and record a pointer to
4945 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4946 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4947 stmts and pointers:
4948 RELATED_STMT VEC_STMT
4949 VS1_0: vx0 = memref0 VS1_1 -
4950 VS1_1: vx1 = memref1 VS1_2 -
4951 VS1_2: vx2 = memref2 VS1_3 -
4952 VS1_3: vx3 = memref3 - -
4953 S1: x = load - VS1_0
4954 S2: z = x + 1 - -
4955
b8698a0f
L
4956 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4957 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4958 stmt S2. */
4959
0d0293ac 4960 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4961
4962 S1: x2 = &base + 2
4963 S2: x0 = &base
4964 S3: x1 = &base + 1
4965 S4: x3 = &base + 3
4966
b8698a0f 4967 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4968 starting from the access of the first stmt of the chain:
4969
4970 VS1: vx0 = &base
4971 VS2: vx1 = &base + vec_size*1
4972 VS3: vx3 = &base + vec_size*2
4973 VS4: vx4 = &base + vec_size*3
4974
4975 Then permutation statements are generated:
4976
e2c83630
RH
4977 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4978 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4979 ...
4980
4981 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4982 (the order of the data-refs in the output of vect_permute_load_chain
4983 corresponds to the order of scalar stmts in the interleaving chain - see
4984 the documentation of vect_permute_load_chain()).
4985 The generation of permutation stmts and recording them in
0d0293ac 4986 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4987
b8698a0f 4988 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4989 permutation stmts above are created for every copy. The result vector
4990 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4991 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4992
4993 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4994 on a target that supports unaligned accesses (dr_unaligned_supported)
4995 we generate the following code:
4996 p = initial_addr;
4997 indx = 0;
4998 loop {
4999 p = p + indx * vectype_size;
5000 vec_dest = *(p);
5001 indx = indx + 1;
5002 }
5003
5004 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 5005 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
5006 then generate the following code, in which the data in each iteration is
5007 obtained by two vector loads, one from the previous iteration, and one
5008 from the current iteration:
5009 p1 = initial_addr;
5010 msq_init = *(floor(p1))
5011 p2 = initial_addr + VS - 1;
5012 realignment_token = call target_builtin;
5013 indx = 0;
5014 loop {
5015 p2 = p2 + indx * vectype_size
5016 lsq = *(floor(p2))
5017 vec_dest = realign_load (msq, lsq, realignment_token)
5018 indx = indx + 1;
5019 msq = lsq;
5020 } */
5021
5022 /* If the misalignment remains the same throughout the execution of the
5023 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5024 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5025 This can only occur when vectorizing memory accesses in the inner-loop
5026 nested within an outer-loop that is being vectorized. */
5027
d1e4b493 5028 if (nested_in_vect_loop
211bea38 5029 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5030 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5031 {
5032 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5033 compute_in_loop = true;
5034 }
5035
5036 if ((alignment_support_scheme == dr_explicit_realign_optimized
5037 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5038 && !compute_in_loop)
ebfd146a
IR
5039 {
5040 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5041 alignment_support_scheme, NULL_TREE,
5042 &at_loop);
5043 if (alignment_support_scheme == dr_explicit_realign_optimized)
5044 {
5045 phi = SSA_NAME_DEF_STMT (msq);
5046 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5047 }
5048 }
5049 else
5050 at_loop = loop;
5051
a1e53f3f
L
5052 if (negative)
5053 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5054
272c6793
RS
5055 if (load_lanes_p)
5056 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5057 else
5058 aggr_type = vectype;
5059
ebfd146a
IR
5060 prev_stmt_info = NULL;
5061 for (j = 0; j < ncopies; j++)
b8698a0f 5062 {
272c6793 5063 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5064 if (j == 0)
74bf76ed
JJ
5065 {
5066 bool simd_lane_access_p
5067 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5068 if (simd_lane_access_p
5069 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5070 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5071 && integer_zerop (DR_OFFSET (first_dr))
5072 && integer_zerop (DR_INIT (first_dr))
5073 && alias_sets_conflict_p (get_alias_set (aggr_type),
5074 get_alias_set (DR_REF (first_dr)))
5075 && (alignment_support_scheme == dr_aligned
5076 || alignment_support_scheme == dr_unaligned_supported))
5077 {
5078 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5079 dataref_offset = build_int_cst (reference_alias_ptr_type
5080 (DR_REF (first_dr)), 0);
8928eff3 5081 inv_p = false;
74bf76ed
JJ
5082 }
5083 else
5084 dataref_ptr
5085 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5086 offset, &dummy, gsi, &ptr_incr,
5087 simd_lane_access_p, &inv_p);
5088 }
5089 else if (dataref_offset)
5090 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5091 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5092 else
272c6793
RS
5093 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5094 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5095
0d0293ac 5096 if (grouped_load || slp_perm)
9771b263 5097 dr_chain.create (vec_num);
5ce1ee7f 5098
272c6793 5099 if (load_lanes_p)
ebfd146a 5100 {
272c6793
RS
5101 tree vec_array;
5102
5103 vec_array = create_vector_array (vectype, vec_num);
5104
5105 /* Emit:
5106 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5107 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5108 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5109 gimple_call_set_lhs (new_stmt, vec_array);
5110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5111
272c6793
RS
5112 /* Extract each vector into an SSA_NAME. */
5113 for (i = 0; i < vec_num; i++)
ebfd146a 5114 {
272c6793
RS
5115 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5116 vec_array, i);
9771b263 5117 dr_chain.quick_push (new_temp);
272c6793
RS
5118 }
5119
5120 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5121 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5122 }
5123 else
5124 {
5125 for (i = 0; i < vec_num; i++)
5126 {
5127 if (i > 0)
5128 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5129 stmt, NULL_TREE);
5130
5131 /* 2. Create the vector-load in the loop. */
5132 switch (alignment_support_scheme)
5133 {
5134 case dr_aligned:
5135 case dr_unaligned_supported:
be1ac4ec 5136 {
644ffefd
MJ
5137 unsigned int align, misalign;
5138
272c6793
RS
5139 data_ref
5140 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5141 dataref_offset
5142 ? dataref_offset
5143 : build_int_cst (reference_alias_ptr_type
5144 (DR_REF (first_dr)), 0));
644ffefd 5145 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5146 if (alignment_support_scheme == dr_aligned)
5147 {
5148 gcc_assert (aligned_access_p (first_dr));
644ffefd 5149 misalign = 0;
272c6793
RS
5150 }
5151 else if (DR_MISALIGNMENT (first_dr) == -1)
5152 {
5153 TREE_TYPE (data_ref)
5154 = build_aligned_type (TREE_TYPE (data_ref),
5155 TYPE_ALIGN (elem_type));
644ffefd
MJ
5156 align = TYPE_ALIGN_UNIT (elem_type);
5157 misalign = 0;
272c6793
RS
5158 }
5159 else
5160 {
5161 TREE_TYPE (data_ref)
5162 = build_aligned_type (TREE_TYPE (data_ref),
5163 TYPE_ALIGN (elem_type));
644ffefd 5164 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5165 }
74bf76ed
JJ
5166 if (dataref_offset == NULL_TREE)
5167 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5168 align, misalign);
272c6793 5169 break;
be1ac4ec 5170 }
272c6793 5171 case dr_explicit_realign:
267d3070 5172 {
272c6793
RS
5173 tree ptr, bump;
5174 tree vs_minus_1;
5175
5176 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5177
5178 if (compute_in_loop)
5179 msq = vect_setup_realignment (first_stmt, gsi,
5180 &realignment_token,
5181 dr_explicit_realign,
5182 dataref_ptr, NULL);
5183
070ecdfd 5184 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5185 new_stmt = gimple_build_assign_with_ops
070ecdfd 5186 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5187 build_int_cst
5188 (TREE_TYPE (dataref_ptr),
5189 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5191 data_ref
5192 = build2 (MEM_REF, vectype, ptr,
5193 build_int_cst (reference_alias_ptr_type
5194 (DR_REF (first_dr)), 0));
5195 vec_dest = vect_create_destination_var (scalar_dest,
5196 vectype);
5197 new_stmt = gimple_build_assign (vec_dest, data_ref);
5198 new_temp = make_ssa_name (vec_dest, new_stmt);
5199 gimple_assign_set_lhs (new_stmt, new_temp);
5200 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5201 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5203 msq = new_temp;
5204
5205 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5206 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5207 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5208 new_stmt = gimple_build_assign_with_ops
5209 (BIT_AND_EXPR, NULL_TREE, ptr,
5210 build_int_cst
5211 (TREE_TYPE (ptr),
5212 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5213 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5214 gimple_assign_set_lhs (new_stmt, ptr);
5215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5216 data_ref
5217 = build2 (MEM_REF, vectype, ptr,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr)), 0));
5220 break;
267d3070 5221 }
272c6793 5222 case dr_explicit_realign_optimized:
070ecdfd 5223 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5224 new_stmt = gimple_build_assign_with_ops
070ecdfd 5225 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5226 build_int_cst
5227 (TREE_TYPE (dataref_ptr),
5228 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5230 data_ref
5231 = build2 (MEM_REF, vectype, new_temp,
5232 build_int_cst (reference_alias_ptr_type
5233 (DR_REF (first_dr)), 0));
5234 break;
5235 default:
5236 gcc_unreachable ();
5237 }
ebfd146a 5238 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5239 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5240 new_temp = make_ssa_name (vec_dest, new_stmt);
5241 gimple_assign_set_lhs (new_stmt, new_temp);
5242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5243
272c6793
RS
5244 /* 3. Handle explicit realignment if necessary/supported.
5245 Create in loop:
5246 vec_dest = realign_load (msq, lsq, realignment_token) */
5247 if (alignment_support_scheme == dr_explicit_realign_optimized
5248 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5249 {
272c6793
RS
5250 lsq = gimple_assign_lhs (new_stmt);
5251 if (!realignment_token)
5252 realignment_token = dataref_ptr;
5253 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5254 new_stmt
73804b12
RG
5255 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5256 vec_dest, msq, lsq,
5257 realignment_token);
272c6793
RS
5258 new_temp = make_ssa_name (vec_dest, new_stmt);
5259 gimple_assign_set_lhs (new_stmt, new_temp);
5260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5261
5262 if (alignment_support_scheme == dr_explicit_realign_optimized)
5263 {
5264 gcc_assert (phi);
5265 if (i == vec_num - 1 && j == ncopies - 1)
5266 add_phi_arg (phi, lsq,
5267 loop_latch_edge (containing_loop),
9e227d60 5268 UNKNOWN_LOCATION);
272c6793
RS
5269 msq = lsq;
5270 }
ebfd146a 5271 }
ebfd146a 5272
59fd17e3
RB
5273 /* 4. Handle invariant-load. */
5274 if (inv_p && !bb_vinfo)
5275 {
5276 gimple_stmt_iterator gsi2 = *gsi;
5277 gcc_assert (!grouped_load);
5278 gsi_next (&gsi2);
5279 new_temp = vect_init_vector (stmt, scalar_dest,
5280 vectype, &gsi2);
5281 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5282 }
5283
272c6793
RS
5284 if (negative)
5285 {
aec7ae7d
JJ
5286 tree perm_mask = perm_mask_for_reverse (vectype);
5287 new_temp = permute_vec_elements (new_temp, new_temp,
5288 perm_mask, stmt, gsi);
ebfd146a
IR
5289 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5290 }
267d3070 5291
272c6793 5292 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5293 vect_transform_grouped_load (). */
5294 if (grouped_load || slp_perm)
9771b263 5295 dr_chain.quick_push (new_temp);
267d3070 5296
272c6793
RS
5297 /* Store vector loads in the corresponding SLP_NODE. */
5298 if (slp && !slp_perm)
9771b263 5299 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5300 }
a64b9c26
RB
5301 /* Bump the vector pointer to account for a gap. */
5302 if (slp && group_gap != 0)
5303 {
5304 tree bump = size_binop (MULT_EXPR,
5305 TYPE_SIZE_UNIT (elem_type),
5306 size_int (group_gap));
5307 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5308 stmt, bump);
5309 }
ebfd146a
IR
5310 }
5311
5312 if (slp && !slp_perm)
5313 continue;
5314
5315 if (slp_perm)
5316 {
01d8bf07 5317 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5318 slp_node_instance, false))
5319 {
9771b263 5320 dr_chain.release ();
ebfd146a
IR
5321 return false;
5322 }
5323 }
5324 else
5325 {
0d0293ac 5326 if (grouped_load)
ebfd146a 5327 {
272c6793 5328 if (!load_lanes_p)
0d0293ac 5329 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5330 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5331 }
5332 else
5333 {
5334 if (j == 0)
5335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5336 else
5337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5338 prev_stmt_info = vinfo_for_stmt (new_stmt);
5339 }
5340 }
9771b263 5341 dr_chain.release ();
ebfd146a
IR
5342 }
5343
ebfd146a
IR
5344 return true;
5345}
5346
5347/* Function vect_is_simple_cond.
b8698a0f 5348
ebfd146a
IR
5349 Input:
5350 LOOP - the loop that is being vectorized.
5351 COND - Condition that is checked for simple use.
5352
e9e1d143
RG
5353 Output:
5354 *COMP_VECTYPE - the vector type for the comparison.
5355
ebfd146a
IR
5356 Returns whether a COND can be vectorized. Checks whether
5357 condition operands are supportable using vec_is_simple_use. */
5358
87aab9b2 5359static bool
24ee1384
IR
5360vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5361 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5362{
5363 tree lhs, rhs;
5364 tree def;
5365 enum vect_def_type dt;
e9e1d143 5366 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5367
5368 if (!COMPARISON_CLASS_P (cond))
5369 return false;
5370
5371 lhs = TREE_OPERAND (cond, 0);
5372 rhs = TREE_OPERAND (cond, 1);
5373
5374 if (TREE_CODE (lhs) == SSA_NAME)
5375 {
5376 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5377 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5378 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5379 return false;
5380 }
5381 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5382 && TREE_CODE (lhs) != FIXED_CST)
5383 return false;
5384
5385 if (TREE_CODE (rhs) == SSA_NAME)
5386 {
5387 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5388 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5389 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5390 return false;
5391 }
f7e531cf 5392 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5393 && TREE_CODE (rhs) != FIXED_CST)
5394 return false;
5395
e9e1d143 5396 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5397 return true;
5398}
5399
5400/* vectorizable_condition.
5401
b8698a0f
L
5402 Check if STMT is conditional modify expression that can be vectorized.
5403 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5404 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5405 at GSI.
5406
5407 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5408 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5409 else caluse if it is 2).
ebfd146a
IR
5410
5411 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5412
4bbe8262 5413bool
ebfd146a 5414vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5415 gimple *vec_stmt, tree reduc_def, int reduc_index,
5416 slp_tree slp_node)
ebfd146a
IR
5417{
5418 tree scalar_dest = NULL_TREE;
5419 tree vec_dest = NULL_TREE;
ebfd146a
IR
5420 tree cond_expr, then_clause, else_clause;
5421 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5422 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5423 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5424 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5425 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5426 tree vec_compare, vec_cond_expr;
5427 tree new_temp;
5428 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5429 tree def;
a855b1b1 5430 enum vect_def_type dt, dts[4];
ebfd146a 5431 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5432 int ncopies;
ebfd146a 5433 enum tree_code code;
a855b1b1 5434 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5435 int i, j;
5436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5437 vec<tree> vec_oprnds0 = vNULL;
5438 vec<tree> vec_oprnds1 = vNULL;
5439 vec<tree> vec_oprnds2 = vNULL;
5440 vec<tree> vec_oprnds3 = vNULL;
74946978 5441 tree vec_cmp_type;
b8698a0f 5442
f7e531cf
IR
5443 if (slp_node || PURE_SLP_STMT (stmt_info))
5444 ncopies = 1;
5445 else
5446 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5447
ebfd146a 5448 gcc_assert (ncopies >= 1);
a855b1b1 5449 if (reduc_index && ncopies > 1)
ebfd146a
IR
5450 return false; /* FORNOW */
5451
f7e531cf
IR
5452 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5453 return false;
5454
5455 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5456 return false;
5457
4bbe8262
IR
5458 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5459 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5460 && reduc_def))
ebfd146a
IR
5461 return false;
5462
ebfd146a 5463 /* FORNOW: not yet supported. */
b8698a0f 5464 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5465 {
73fbfcad 5466 if (dump_enabled_p ())
78c60e3d 5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5468 "value used after loop.\n");
ebfd146a
IR
5469 return false;
5470 }
5471
5472 /* Is vectorizable conditional operation? */
5473 if (!is_gimple_assign (stmt))
5474 return false;
5475
5476 code = gimple_assign_rhs_code (stmt);
5477
5478 if (code != COND_EXPR)
5479 return false;
5480
4e71066d
RG
5481 cond_expr = gimple_assign_rhs1 (stmt);
5482 then_clause = gimple_assign_rhs2 (stmt);
5483 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5484
24ee1384
IR
5485 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5486 &comp_vectype)
e9e1d143 5487 || !comp_vectype)
ebfd146a
IR
5488 return false;
5489
5490 if (TREE_CODE (then_clause) == SSA_NAME)
5491 {
5492 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5493 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5494 &then_def_stmt, &def, &dt))
5495 return false;
5496 }
b8698a0f 5497 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5498 && TREE_CODE (then_clause) != REAL_CST
5499 && TREE_CODE (then_clause) != FIXED_CST)
5500 return false;
5501
5502 if (TREE_CODE (else_clause) == SSA_NAME)
5503 {
5504 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5505 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5506 &else_def_stmt, &def, &dt))
5507 return false;
5508 }
b8698a0f 5509 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5510 && TREE_CODE (else_clause) != REAL_CST
5511 && TREE_CODE (else_clause) != FIXED_CST)
5512 return false;
5513
74946978
MP
5514 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5515 /* The result of a vector comparison should be signed type. */
5516 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5517 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5518 if (vec_cmp_type == NULL_TREE)
5519 return false;
784fb9b3 5520
b8698a0f 5521 if (!vec_stmt)
ebfd146a
IR
5522 {
5523 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5524 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5525 }
5526
f7e531cf
IR
5527 /* Transform. */
5528
5529 if (!slp_node)
5530 {
9771b263
DN
5531 vec_oprnds0.create (1);
5532 vec_oprnds1.create (1);
5533 vec_oprnds2.create (1);
5534 vec_oprnds3.create (1);
f7e531cf 5535 }
ebfd146a
IR
5536
5537 /* Handle def. */
5538 scalar_dest = gimple_assign_lhs (stmt);
5539 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5540
5541 /* Handle cond expr. */
a855b1b1
MM
5542 for (j = 0; j < ncopies; j++)
5543 {
f7e531cf 5544 gimple new_stmt = NULL;
a855b1b1
MM
5545 if (j == 0)
5546 {
f7e531cf
IR
5547 if (slp_node)
5548 {
9771b263
DN
5549 vec<tree> ops;
5550 ops.create (4);
37b5ec8f 5551 vec<vec<tree> > vec_defs;
9771b263
DN
5552
5553 vec_defs.create (4);
5554 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5555 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5556 ops.safe_push (then_clause);
5557 ops.safe_push (else_clause);
f7e531cf 5558 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5559 vec_oprnds3 = vec_defs.pop ();
5560 vec_oprnds2 = vec_defs.pop ();
5561 vec_oprnds1 = vec_defs.pop ();
5562 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5563
9771b263
DN
5564 ops.release ();
5565 vec_defs.release ();
f7e531cf
IR
5566 }
5567 else
5568 {
5569 gimple gtemp;
5570 vec_cond_lhs =
a855b1b1
MM
5571 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5572 stmt, NULL);
24ee1384
IR
5573 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5574 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5575
5576 vec_cond_rhs =
5577 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5578 stmt, NULL);
24ee1384
IR
5579 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5580 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5581 if (reduc_index == 1)
5582 vec_then_clause = reduc_def;
5583 else
5584 {
5585 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5586 stmt, NULL);
24ee1384 5587 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5588 NULL, &gtemp, &def, &dts[2]);
5589 }
5590 if (reduc_index == 2)
5591 vec_else_clause = reduc_def;
5592 else
5593 {
5594 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5595 stmt, NULL);
24ee1384 5596 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5597 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5598 }
a855b1b1
MM
5599 }
5600 }
5601 else
5602 {
f7e531cf 5603 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5604 vec_oprnds0.pop ());
f7e531cf 5605 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5606 vec_oprnds1.pop ());
a855b1b1 5607 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5608 vec_oprnds2.pop ());
a855b1b1 5609 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5610 vec_oprnds3.pop ());
f7e531cf
IR
5611 }
5612
5613 if (!slp_node)
5614 {
9771b263
DN
5615 vec_oprnds0.quick_push (vec_cond_lhs);
5616 vec_oprnds1.quick_push (vec_cond_rhs);
5617 vec_oprnds2.quick_push (vec_then_clause);
5618 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5619 }
5620
9dc3f7de 5621 /* Arguments are ready. Create the new vector stmt. */
9771b263 5622 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5623 {
9771b263
DN
5624 vec_cond_rhs = vec_oprnds1[i];
5625 vec_then_clause = vec_oprnds2[i];
5626 vec_else_clause = vec_oprnds3[i];
a855b1b1 5627
784fb9b3
JJ
5628 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5629 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5630 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5631 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5632
f7e531cf
IR
5633 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5634 new_temp = make_ssa_name (vec_dest, new_stmt);
5635 gimple_assign_set_lhs (new_stmt, new_temp);
5636 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5637 if (slp_node)
9771b263 5638 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5639 }
5640
5641 if (slp_node)
5642 continue;
5643
5644 if (j == 0)
5645 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5646 else
5647 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5648
5649 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5650 }
b8698a0f 5651
9771b263
DN
5652 vec_oprnds0.release ();
5653 vec_oprnds1.release ();
5654 vec_oprnds2.release ();
5655 vec_oprnds3.release ();
f7e531cf 5656
ebfd146a
IR
5657 return true;
5658}
5659
5660
8644a673 5661/* Make sure the statement is vectorizable. */
ebfd146a
IR
5662
5663bool
a70d6342 5664vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5665{
8644a673 5666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5668 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5669 bool ok;
a70d6342 5670 tree scalar_type, vectype;
363477c0
JJ
5671 gimple pattern_stmt;
5672 gimple_seq pattern_def_seq;
ebfd146a 5673
73fbfcad 5674 if (dump_enabled_p ())
ebfd146a 5675 {
78c60e3d
SS
5676 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5678 dump_printf (MSG_NOTE, "\n");
8644a673 5679 }
ebfd146a 5680
1825a1f3 5681 if (gimple_has_volatile_ops (stmt))
b8698a0f 5682 {
73fbfcad 5683 if (dump_enabled_p ())
78c60e3d 5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5685 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
5686
5687 return false;
5688 }
b8698a0f
L
5689
5690 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5691 to include:
5692 - the COND_EXPR which is the loop exit condition
5693 - any LABEL_EXPRs in the loop
b8698a0f 5694 - computations that are used only for array indexing or loop control.
8644a673 5695 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5696 instance, therefore, all the statements are relevant.
ebfd146a 5697
d092494c 5698 Pattern statement needs to be analyzed instead of the original statement
83197f37 5699 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5700 statements. In basic blocks we are called from some SLP instance
5701 traversal, don't analyze pattern stmts instead, the pattern stmts
5702 already will be part of SLP instance. */
83197f37
IR
5703
5704 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5705 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5706 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5707 {
9d5e7640 5708 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5709 && pattern_stmt
9d5e7640
IR
5710 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5711 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5712 {
83197f37 5713 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5714 stmt = pattern_stmt;
5715 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5716 if (dump_enabled_p ())
9d5e7640 5717 {
78c60e3d
SS
5718 dump_printf_loc (MSG_NOTE, vect_location,
5719 "==> examining pattern statement: ");
5720 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5721 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
5722 }
5723 }
5724 else
5725 {
73fbfcad 5726 if (dump_enabled_p ())
e645e942 5727 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 5728
9d5e7640
IR
5729 return true;
5730 }
8644a673 5731 }
83197f37 5732 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5733 && node == NULL
83197f37
IR
5734 && pattern_stmt
5735 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5736 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5737 {
5738 /* Analyze PATTERN_STMT too. */
73fbfcad 5739 if (dump_enabled_p ())
83197f37 5740 {
78c60e3d
SS
5741 dump_printf_loc (MSG_NOTE, vect_location,
5742 "==> examining pattern statement: ");
5743 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5744 dump_printf (MSG_NOTE, "\n");
83197f37
IR
5745 }
5746
5747 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5748 return false;
5749 }
ebfd146a 5750
1107f3ae 5751 if (is_pattern_stmt_p (stmt_info)
079c527f 5752 && node == NULL
363477c0 5753 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5754 {
363477c0 5755 gimple_stmt_iterator si;
1107f3ae 5756
363477c0
JJ
5757 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5758 {
5759 gimple pattern_def_stmt = gsi_stmt (si);
5760 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5761 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5762 {
5763 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5764 if (dump_enabled_p ())
363477c0 5765 {
78c60e3d
SS
5766 dump_printf_loc (MSG_NOTE, vect_location,
5767 "==> examining pattern def statement: ");
5768 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 5769 dump_printf (MSG_NOTE, "\n");
363477c0 5770 }
1107f3ae 5771
363477c0
JJ
5772 if (!vect_analyze_stmt (pattern_def_stmt,
5773 need_to_vectorize, node))
5774 return false;
5775 }
5776 }
5777 }
1107f3ae 5778
8644a673
IR
5779 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5780 {
5781 case vect_internal_def:
5782 break;
ebfd146a 5783
8644a673 5784 case vect_reduction_def:
7c5222ff 5785 case vect_nested_cycle:
a70d6342 5786 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5787 || relevance == vect_used_in_outer_by_reduction
a70d6342 5788 || relevance == vect_unused_in_scope));
8644a673
IR
5789 break;
5790
5791 case vect_induction_def:
5792 case vect_constant_def:
5793 case vect_external_def:
5794 case vect_unknown_def_type:
5795 default:
5796 gcc_unreachable ();
5797 }
ebfd146a 5798
a70d6342
IR
5799 if (bb_vinfo)
5800 {
5801 gcc_assert (PURE_SLP_STMT (stmt_info));
5802
b690cc0f 5803 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5804 if (dump_enabled_p ())
a70d6342 5805 {
78c60e3d
SS
5806 dump_printf_loc (MSG_NOTE, vect_location,
5807 "get vectype for scalar type: ");
5808 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 5809 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5810 }
5811
5812 vectype = get_vectype_for_scalar_type (scalar_type);
5813 if (!vectype)
5814 {
73fbfcad 5815 if (dump_enabled_p ())
a70d6342 5816 {
78c60e3d
SS
5817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5818 "not SLPed: unsupported data-type ");
5819 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5820 scalar_type);
e645e942 5821 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
5822 }
5823 return false;
5824 }
5825
73fbfcad 5826 if (dump_enabled_p ())
a70d6342 5827 {
78c60e3d
SS
5828 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5829 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 5830 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5831 }
5832
5833 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5834 }
5835
8644a673 5836 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5837 {
8644a673
IR
5838 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5839 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5840 *need_to_vectorize = true;
ebfd146a
IR
5841 }
5842
8644a673 5843 ok = true;
b8698a0f 5844 if (!bb_vinfo
a70d6342
IR
5845 && (STMT_VINFO_RELEVANT_P (stmt_info)
5846 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5847 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5848 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5849 || vectorizable_operation (stmt, NULL, NULL, NULL)
5850 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5851 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5852 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5853 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5854 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5855 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5856 else
5857 {
5858 if (bb_vinfo)
4a00c761
JJ
5859 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5860 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5861 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5862 || vectorizable_assignment (stmt, NULL, NULL, node)
5863 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5864 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5865 || vectorizable_store (stmt, NULL, NULL, node)
5866 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5867 }
8644a673
IR
5868
5869 if (!ok)
ebfd146a 5870 {
73fbfcad 5871 if (dump_enabled_p ())
8644a673 5872 {
78c60e3d
SS
5873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5874 "not vectorized: relevant stmt not ");
5875 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5876 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5877 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5878 }
b8698a0f 5879
ebfd146a
IR
5880 return false;
5881 }
5882
a70d6342
IR
5883 if (bb_vinfo)
5884 return true;
5885
8644a673
IR
5886 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5887 need extra handling, except for vectorizable reductions. */
5888 if (STMT_VINFO_LIVE_P (stmt_info)
5889 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5890 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5891
8644a673 5892 if (!ok)
ebfd146a 5893 {
73fbfcad 5894 if (dump_enabled_p ())
8644a673 5895 {
78c60e3d
SS
5896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5897 "not vectorized: live stmt not ");
5898 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5900 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5901 }
b8698a0f 5902
8644a673 5903 return false;
ebfd146a
IR
5904 }
5905
ebfd146a
IR
5906 return true;
5907}
5908
5909
5910/* Function vect_transform_stmt.
5911
5912 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5913
5914bool
5915vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5916 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5917 slp_instance slp_node_instance)
5918{
5919 bool is_store = false;
5920 gimple vec_stmt = NULL;
5921 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5922 bool done;
ebfd146a
IR
5923
5924 switch (STMT_VINFO_TYPE (stmt_info))
5925 {
5926 case type_demotion_vec_info_type:
ebfd146a 5927 case type_promotion_vec_info_type:
ebfd146a
IR
5928 case type_conversion_vec_info_type:
5929 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5930 gcc_assert (done);
5931 break;
5932
5933 case induc_vec_info_type:
5934 gcc_assert (!slp_node);
5935 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5936 gcc_assert (done);
5937 break;
5938
9dc3f7de
IR
5939 case shift_vec_info_type:
5940 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5941 gcc_assert (done);
5942 break;
5943
ebfd146a
IR
5944 case op_vec_info_type:
5945 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5946 gcc_assert (done);
5947 break;
5948
5949 case assignment_vec_info_type:
5950 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5951 gcc_assert (done);
5952 break;
5953
5954 case load_vec_info_type:
b8698a0f 5955 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5956 slp_node_instance);
5957 gcc_assert (done);
5958 break;
5959
5960 case store_vec_info_type:
5961 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5962 gcc_assert (done);
0d0293ac 5963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5964 {
5965 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5966 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5967 one are skipped, and there vec_stmt_info shouldn't be freed
5968 meanwhile. */
0d0293ac 5969 *grouped_store = true;
ebfd146a
IR
5970 if (STMT_VINFO_VEC_STMT (stmt_info))
5971 is_store = true;
5972 }
5973 else
5974 is_store = true;
5975 break;
5976
5977 case condition_vec_info_type:
f7e531cf 5978 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5979 gcc_assert (done);
5980 break;
5981
5982 case call_vec_info_type:
190c2236 5983 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5984 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5985 break;
5986
5987 case reduc_vec_info_type:
b5aeb3bb 5988 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5989 gcc_assert (done);
5990 break;
5991
5992 default:
5993 if (!STMT_VINFO_LIVE_P (stmt_info))
5994 {
73fbfcad 5995 if (dump_enabled_p ())
78c60e3d 5996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5997 "stmt not supported.\n");
ebfd146a
IR
5998 gcc_unreachable ();
5999 }
6000 }
6001
6002 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
6003 is being vectorized, but outside the immediately enclosing loop. */
6004 if (vec_stmt
a70d6342
IR
6005 && STMT_VINFO_LOOP_VINFO (stmt_info)
6006 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
6007 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
6008 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
6009 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 6010 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 6011 vect_used_in_outer_by_reduction))
ebfd146a 6012 {
a70d6342
IR
6013 struct loop *innerloop = LOOP_VINFO_LOOP (
6014 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
6015 imm_use_iterator imm_iter;
6016 use_operand_p use_p;
6017 tree scalar_dest;
6018 gimple exit_phi;
6019
73fbfcad 6020 if (dump_enabled_p ())
78c60e3d 6021 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6022 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
6023
6024 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6025 (to be used when vectorizing outer-loop stmts that use the DEF of
6026 STMT). */
6027 if (gimple_code (stmt) == GIMPLE_PHI)
6028 scalar_dest = PHI_RESULT (stmt);
6029 else
6030 scalar_dest = gimple_assign_lhs (stmt);
6031
6032 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6033 {
6034 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6035 {
6036 exit_phi = USE_STMT (use_p);
6037 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6038 }
6039 }
6040 }
6041
6042 /* Handle stmts whose DEF is used outside the loop-nest that is
6043 being vectorized. */
6044 if (STMT_VINFO_LIVE_P (stmt_info)
6045 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6046 {
6047 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6048 gcc_assert (done);
6049 }
6050
6051 if (vec_stmt)
83197f37 6052 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6053
b8698a0f 6054 return is_store;
ebfd146a
IR
6055}
6056
6057
b8698a0f 6058/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6059 stmt_vec_info. */
6060
6061void
6062vect_remove_stores (gimple first_stmt)
6063{
6064 gimple next = first_stmt;
6065 gimple tmp;
6066 gimple_stmt_iterator next_si;
6067
6068 while (next)
6069 {
78048b1c
JJ
6070 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6071
6072 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6073 if (is_pattern_stmt_p (stmt_info))
6074 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6075 /* Free the attached stmt_vec_info and remove the stmt. */
6076 next_si = gsi_for_stmt (next);
3d3f2249 6077 unlink_stmt_vdef (next);
ebfd146a 6078 gsi_remove (&next_si, true);
3d3f2249 6079 release_defs (next);
ebfd146a
IR
6080 free_stmt_vec_info (next);
6081 next = tmp;
6082 }
6083}
6084
6085
6086/* Function new_stmt_vec_info.
6087
6088 Create and initialize a new stmt_vec_info struct for STMT. */
6089
6090stmt_vec_info
b8698a0f 6091new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6092 bb_vec_info bb_vinfo)
ebfd146a
IR
6093{
6094 stmt_vec_info res;
6095 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6096
6097 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6098 STMT_VINFO_STMT (res) = stmt;
6099 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6100 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6101 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6102 STMT_VINFO_LIVE_P (res) = false;
6103 STMT_VINFO_VECTYPE (res) = NULL;
6104 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6105 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6106 STMT_VINFO_IN_PATTERN_P (res) = false;
6107 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6108 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6109 STMT_VINFO_DATA_REF (res) = NULL;
6110
6111 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6112 STMT_VINFO_DR_OFFSET (res) = NULL;
6113 STMT_VINFO_DR_INIT (res) = NULL;
6114 STMT_VINFO_DR_STEP (res) = NULL;
6115 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6116
6117 if (gimple_code (stmt) == GIMPLE_PHI
6118 && is_loop_header_bb_p (gimple_bb (stmt)))
6119 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6120 else
8644a673
IR
6121 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6122
9771b263 6123 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6124 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6125 GROUP_FIRST_ELEMENT (res) = NULL;
6126 GROUP_NEXT_ELEMENT (res) = NULL;
6127 GROUP_SIZE (res) = 0;
6128 GROUP_STORE_COUNT (res) = 0;
6129 GROUP_GAP (res) = 0;
6130 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6131
6132 return res;
6133}
6134
6135
6136/* Create a hash table for stmt_vec_info. */
6137
6138void
6139init_stmt_vec_info_vec (void)
6140{
9771b263
DN
6141 gcc_assert (!stmt_vec_info_vec.exists ());
6142 stmt_vec_info_vec.create (50);
ebfd146a
IR
6143}
6144
6145
6146/* Free hash table for stmt_vec_info. */
6147
6148void
6149free_stmt_vec_info_vec (void)
6150{
93675444
JJ
6151 unsigned int i;
6152 vec_void_p info;
6153 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6154 if (info != NULL)
6155 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6156 gcc_assert (stmt_vec_info_vec.exists ());
6157 stmt_vec_info_vec.release ();
ebfd146a
IR
6158}
6159
6160
6161/* Free stmt vectorization related info. */
6162
6163void
6164free_stmt_vec_info (gimple stmt)
6165{
6166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6167
6168 if (!stmt_info)
6169 return;
6170
78048b1c
JJ
6171 /* Check if this statement has a related "pattern stmt"
6172 (introduced by the vectorizer during the pattern recognition
6173 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6174 too. */
6175 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6176 {
6177 stmt_vec_info patt_info
6178 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6179 if (patt_info)
6180 {
363477c0
JJ
6181 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6182 if (seq)
6183 {
6184 gimple_stmt_iterator si;
6185 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6186 free_stmt_vec_info (gsi_stmt (si));
6187 }
78048b1c
JJ
6188 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6189 }
6190 }
6191
9771b263 6192 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6193 set_vinfo_for_stmt (stmt, NULL);
6194 free (stmt_info);
6195}
6196
6197
bb67d9c7 6198/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6199
bb67d9c7 6200 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6201 by the target. */
6202
bb67d9c7
RG
6203static tree
6204get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6205{
6206 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6207 enum machine_mode simd_mode;
2f816591 6208 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6209 int nunits;
6210 tree vectype;
6211
cc4b5170 6212 if (nbytes == 0)
ebfd146a
IR
6213 return NULL_TREE;
6214
48f2e373
RB
6215 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6216 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6217 return NULL_TREE;
6218
7b7b1813
RG
6219 /* For vector types of elements whose mode precision doesn't
6220 match their types precision we use a element type of mode
6221 precision. The vectorization routines will have to make sure
48f2e373
RB
6222 they support the proper result truncation/extension.
6223 We also make sure to build vector types with INTEGER_TYPE
6224 component type only. */
6d7971b8 6225 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6226 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6227 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6228 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6229 TYPE_UNSIGNED (scalar_type));
6d7971b8 6230
ccbf5bb4
RG
6231 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6232 When the component mode passes the above test simply use a type
6233 corresponding to that mode. The theory is that any use that
6234 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 6235 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 6236 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
6237 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6238
6239 /* We can't build a vector type of elements with alignment bigger than
6240 their size. */
dfc2e2ac 6241 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6242 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6243 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6244
dfc2e2ac
RB
6245 /* If we felt back to using the mode fail if there was
6246 no scalar type for it. */
6247 if (scalar_type == NULL_TREE)
6248 return NULL_TREE;
6249
bb67d9c7
RG
6250 /* If no size was supplied use the mode the target prefers. Otherwise
6251 lookup a vector mode of the specified size. */
6252 if (size == 0)
6253 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6254 else
6255 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6256 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6257 if (nunits <= 1)
6258 return NULL_TREE;
ebfd146a
IR
6259
6260 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6261
6262 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6263 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6264 return NULL_TREE;
ebfd146a
IR
6265
6266 return vectype;
6267}
6268
bb67d9c7
RG
6269unsigned int current_vector_size;
6270
6271/* Function get_vectype_for_scalar_type.
6272
6273 Returns the vector type corresponding to SCALAR_TYPE as supported
6274 by the target. */
6275
6276tree
6277get_vectype_for_scalar_type (tree scalar_type)
6278{
6279 tree vectype;
6280 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6281 current_vector_size);
6282 if (vectype
6283 && current_vector_size == 0)
6284 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6285 return vectype;
6286}
6287
b690cc0f
RG
6288/* Function get_same_sized_vectype
6289
6290 Returns a vector type corresponding to SCALAR_TYPE of size
6291 VECTOR_TYPE if supported by the target. */
6292
6293tree
bb67d9c7 6294get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6295{
bb67d9c7
RG
6296 return get_vectype_for_scalar_type_and_size
6297 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6298}
6299
ebfd146a
IR
6300/* Function vect_is_simple_use.
6301
6302 Input:
a70d6342
IR
6303 LOOP_VINFO - the vect info of the loop that is being vectorized.
6304 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6305 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6306 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6307
6308 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6309 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6310 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6311 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6312 is the case in reduction/induction computations).
6313 For basic blocks, supportable operands are constants and bb invariants.
6314 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6315
6316bool
24ee1384 6317vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6318 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6319 tree *def, enum vect_def_type *dt)
b8698a0f 6320{
ebfd146a
IR
6321 basic_block bb;
6322 stmt_vec_info stmt_vinfo;
a70d6342 6323 struct loop *loop = NULL;
b8698a0f 6324
a70d6342
IR
6325 if (loop_vinfo)
6326 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6327
6328 *def_stmt = NULL;
6329 *def = NULL_TREE;
b8698a0f 6330
73fbfcad 6331 if (dump_enabled_p ())
ebfd146a 6332 {
78c60e3d
SS
6333 dump_printf_loc (MSG_NOTE, vect_location,
6334 "vect_is_simple_use: operand ");
6335 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 6336 dump_printf (MSG_NOTE, "\n");
ebfd146a 6337 }
b8698a0f 6338
b758f602 6339 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6340 {
6341 *dt = vect_constant_def;
6342 return true;
6343 }
b8698a0f 6344
ebfd146a
IR
6345 if (is_gimple_min_invariant (operand))
6346 {
6347 *def = operand;
8644a673 6348 *dt = vect_external_def;
ebfd146a
IR
6349 return true;
6350 }
6351
6352 if (TREE_CODE (operand) == PAREN_EXPR)
6353 {
73fbfcad 6354 if (dump_enabled_p ())
e645e942 6355 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
6356 operand = TREE_OPERAND (operand, 0);
6357 }
b8698a0f 6358
ebfd146a
IR
6359 if (TREE_CODE (operand) != SSA_NAME)
6360 {
73fbfcad 6361 if (dump_enabled_p ())
78c60e3d 6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6363 "not ssa-name.\n");
ebfd146a
IR
6364 return false;
6365 }
b8698a0f 6366
ebfd146a
IR
6367 *def_stmt = SSA_NAME_DEF_STMT (operand);
6368 if (*def_stmt == NULL)
6369 {
73fbfcad 6370 if (dump_enabled_p ())
78c60e3d 6371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6372 "no def_stmt.\n");
ebfd146a
IR
6373 return false;
6374 }
6375
73fbfcad 6376 if (dump_enabled_p ())
ebfd146a 6377 {
78c60e3d
SS
6378 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6379 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 6380 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
6381 }
6382
8644a673 6383 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6384 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6385 if (gimple_nop_p (*def_stmt))
6386 {
6387 *def = operand;
8644a673 6388 *dt = vect_external_def;
ebfd146a
IR
6389 return true;
6390 }
6391
6392 bb = gimple_bb (*def_stmt);
a70d6342
IR
6393
6394 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6395 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6396 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6397 *dt = vect_external_def;
ebfd146a
IR
6398 else
6399 {
6400 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6401 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6402 }
6403
24ee1384
IR
6404 if (*dt == vect_unknown_def_type
6405 || (stmt
6406 && *dt == vect_double_reduction_def
6407 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6408 {
73fbfcad 6409 if (dump_enabled_p ())
78c60e3d 6410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6411 "Unsupported pattern.\n");
ebfd146a
IR
6412 return false;
6413 }
6414
73fbfcad 6415 if (dump_enabled_p ())
e645e942 6416 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
6417
6418 switch (gimple_code (*def_stmt))
6419 {
6420 case GIMPLE_PHI:
6421 *def = gimple_phi_result (*def_stmt);
6422 break;
6423
6424 case GIMPLE_ASSIGN:
6425 *def = gimple_assign_lhs (*def_stmt);
6426 break;
6427
6428 case GIMPLE_CALL:
6429 *def = gimple_call_lhs (*def_stmt);
6430 if (*def != NULL)
6431 break;
6432 /* FALLTHRU */
6433 default:
73fbfcad 6434 if (dump_enabled_p ())
78c60e3d 6435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6436 "unsupported defining stmt:\n");
ebfd146a
IR
6437 return false;
6438 }
6439
6440 return true;
6441}
6442
b690cc0f
RG
6443/* Function vect_is_simple_use_1.
6444
6445 Same as vect_is_simple_use_1 but also determines the vector operand
6446 type of OPERAND and stores it to *VECTYPE. If the definition of
6447 OPERAND is vect_uninitialized_def, vect_constant_def or
6448 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6449 is responsible to compute the best suited vector type for the
6450 scalar operand. */
6451
6452bool
24ee1384 6453vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6454 bb_vec_info bb_vinfo, gimple *def_stmt,
6455 tree *def, enum vect_def_type *dt, tree *vectype)
6456{
24ee1384
IR
6457 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6458 def, dt))
b690cc0f
RG
6459 return false;
6460
6461 /* Now get a vector type if the def is internal, otherwise supply
6462 NULL_TREE and leave it up to the caller to figure out a proper
6463 type for the use stmt. */
6464 if (*dt == vect_internal_def
6465 || *dt == vect_induction_def
6466 || *dt == vect_reduction_def
6467 || *dt == vect_double_reduction_def
6468 || *dt == vect_nested_cycle)
6469 {
6470 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6471
6472 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6473 && !STMT_VINFO_RELEVANT (stmt_info)
6474 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6475 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6476
b690cc0f
RG
6477 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6478 gcc_assert (*vectype != NULL_TREE);
6479 }
6480 else if (*dt == vect_uninitialized_def
6481 || *dt == vect_constant_def
6482 || *dt == vect_external_def)
6483 *vectype = NULL_TREE;
6484 else
6485 gcc_unreachable ();
6486
6487 return true;
6488}
6489
ebfd146a
IR
6490
6491/* Function supportable_widening_operation
6492
b8698a0f
L
6493 Check whether an operation represented by the code CODE is a
6494 widening operation that is supported by the target platform in
b690cc0f
RG
6495 vector form (i.e., when operating on arguments of type VECTYPE_IN
6496 producing a result of type VECTYPE_OUT).
b8698a0f 6497
ebfd146a
IR
6498 Widening operations we currently support are NOP (CONVERT), FLOAT
6499 and WIDEN_MULT. This function checks if these operations are supported
6500 by the target platform either directly (via vector tree-codes), or via
6501 target builtins.
6502
6503 Output:
b8698a0f
L
6504 - CODE1 and CODE2 are codes of vector operations to be used when
6505 vectorizing the operation, if available.
ebfd146a
IR
6506 - MULTI_STEP_CVT determines the number of required intermediate steps in
6507 case of multi-step conversion (like char->short->int - in that case
6508 MULTI_STEP_CVT will be 1).
b8698a0f
L
6509 - INTERM_TYPES contains the intermediate type required to perform the
6510 widening operation (short in the above example). */
ebfd146a
IR
6511
6512bool
b690cc0f
RG
6513supportable_widening_operation (enum tree_code code, gimple stmt,
6514 tree vectype_out, tree vectype_in,
ebfd146a
IR
6515 enum tree_code *code1, enum tree_code *code2,
6516 int *multi_step_cvt,
9771b263 6517 vec<tree> *interm_types)
ebfd146a
IR
6518{
6519 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6520 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6521 struct loop *vect_loop = NULL;
ebfd146a 6522 enum machine_mode vec_mode;
81f40b79 6523 enum insn_code icode1, icode2;
ebfd146a 6524 optab optab1, optab2;
b690cc0f
RG
6525 tree vectype = vectype_in;
6526 tree wide_vectype = vectype_out;
ebfd146a 6527 enum tree_code c1, c2;
4a00c761
JJ
6528 int i;
6529 tree prev_type, intermediate_type;
6530 enum machine_mode intermediate_mode, prev_mode;
6531 optab optab3, optab4;
ebfd146a 6532
4a00c761 6533 *multi_step_cvt = 0;
4ef69dfc
IR
6534 if (loop_info)
6535 vect_loop = LOOP_VINFO_LOOP (loop_info);
6536
ebfd146a
IR
6537 switch (code)
6538 {
6539 case WIDEN_MULT_EXPR:
6ae6116f
RH
6540 /* The result of a vectorized widening operation usually requires
6541 two vectors (because the widened results do not fit into one vector).
6542 The generated vector results would normally be expected to be
6543 generated in the same order as in the original scalar computation,
6544 i.e. if 8 results are generated in each vector iteration, they are
6545 to be organized as follows:
6546 vect1: [res1,res2,res3,res4],
6547 vect2: [res5,res6,res7,res8].
6548
6549 However, in the special case that the result of the widening
6550 operation is used in a reduction computation only, the order doesn't
6551 matter (because when vectorizing a reduction we change the order of
6552 the computation). Some targets can take advantage of this and
6553 generate more efficient code. For example, targets like Altivec,
6554 that support widen_mult using a sequence of {mult_even,mult_odd}
6555 generate the following vectors:
6556 vect1: [res1,res3,res5,res7],
6557 vect2: [res2,res4,res6,res8].
6558
6559 When vectorizing outer-loops, we execute the inner-loop sequentially
6560 (each vectorized inner-loop iteration contributes to VF outer-loop
6561 iterations in parallel). We therefore don't allow to change the
6562 order of the computation in the inner-loop during outer-loop
6563 vectorization. */
6564 /* TODO: Another case in which order doesn't *really* matter is when we
6565 widen and then contract again, e.g. (short)((int)x * y >> 8).
6566 Normally, pack_trunc performs an even/odd permute, whereas the
6567 repack from an even/odd expansion would be an interleave, which
6568 would be significantly simpler for e.g. AVX2. */
6569 /* In any case, in order to avoid duplicating the code below, recurse
6570 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6571 are properly set up for the caller. If we fail, we'll continue with
6572 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6573 if (vect_loop
6574 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6575 && !nested_in_vect_loop_p (vect_loop, stmt)
6576 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6577 stmt, vectype_out, vectype_in,
a86ec597
RH
6578 code1, code2, multi_step_cvt,
6579 interm_types))
6ae6116f 6580 return true;
4a00c761
JJ
6581 c1 = VEC_WIDEN_MULT_LO_EXPR;
6582 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6583 break;
6584
6ae6116f
RH
6585 case VEC_WIDEN_MULT_EVEN_EXPR:
6586 /* Support the recursion induced just above. */
6587 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6588 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6589 break;
6590
36ba4aae 6591 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6592 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6593 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6594 break;
6595
ebfd146a 6596 CASE_CONVERT:
4a00c761
JJ
6597 c1 = VEC_UNPACK_LO_EXPR;
6598 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6599 break;
6600
6601 case FLOAT_EXPR:
4a00c761
JJ
6602 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6603 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6604 break;
6605
6606 case FIX_TRUNC_EXPR:
6607 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6608 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6609 computing the operation. */
6610 return false;
6611
6612 default:
6613 gcc_unreachable ();
6614 }
6615
6ae6116f 6616 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6617 {
6618 enum tree_code ctmp = c1;
6619 c1 = c2;
6620 c2 = ctmp;
6621 }
6622
ebfd146a
IR
6623 if (code == FIX_TRUNC_EXPR)
6624 {
6625 /* The signedness is determined from output operand. */
b690cc0f
RG
6626 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6627 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6628 }
6629 else
6630 {
6631 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6632 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6633 }
6634
6635 if (!optab1 || !optab2)
6636 return false;
6637
6638 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6639 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6640 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6641 return false;
6642
4a00c761
JJ
6643 *code1 = c1;
6644 *code2 = c2;
6645
6646 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6647 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6648 return true;
6649
b8698a0f 6650 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6651 types. */
ebfd146a 6652
4a00c761
JJ
6653 prev_type = vectype;
6654 prev_mode = vec_mode;
b8698a0f 6655
4a00c761
JJ
6656 if (!CONVERT_EXPR_CODE_P (code))
6657 return false;
b8698a0f 6658
4a00c761
JJ
6659 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6660 intermediate steps in promotion sequence. We try
6661 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6662 not. */
9771b263 6663 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6664 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6665 {
6666 intermediate_mode = insn_data[icode1].operand[0].mode;
6667 intermediate_type
6668 = lang_hooks.types.type_for_mode (intermediate_mode,
6669 TYPE_UNSIGNED (prev_type));
6670 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6671 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6672
6673 if (!optab3 || !optab4
6674 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6675 || insn_data[icode1].operand[0].mode != intermediate_mode
6676 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6677 || insn_data[icode2].operand[0].mode != intermediate_mode
6678 || ((icode1 = optab_handler (optab3, intermediate_mode))
6679 == CODE_FOR_nothing)
6680 || ((icode2 = optab_handler (optab4, intermediate_mode))
6681 == CODE_FOR_nothing))
6682 break;
ebfd146a 6683
9771b263 6684 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6685 (*multi_step_cvt)++;
6686
6687 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6688 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6689 return true;
6690
6691 prev_type = intermediate_type;
6692 prev_mode = intermediate_mode;
ebfd146a
IR
6693 }
6694
9771b263 6695 interm_types->release ();
4a00c761 6696 return false;
ebfd146a
IR
6697}
6698
6699
6700/* Function supportable_narrowing_operation
6701
b8698a0f
L
6702 Check whether an operation represented by the code CODE is a
6703 narrowing operation that is supported by the target platform in
b690cc0f
RG
6704 vector form (i.e., when operating on arguments of type VECTYPE_IN
6705 and producing a result of type VECTYPE_OUT).
b8698a0f 6706
ebfd146a 6707 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6708 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6709 the target platform directly via vector tree-codes.
6710
6711 Output:
b8698a0f
L
6712 - CODE1 is the code of a vector operation to be used when
6713 vectorizing the operation, if available.
ebfd146a
IR
6714 - MULTI_STEP_CVT determines the number of required intermediate steps in
6715 case of multi-step conversion (like int->short->char - in that case
6716 MULTI_STEP_CVT will be 1).
6717 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6718 narrowing operation (short in the above example). */
ebfd146a
IR
6719
6720bool
6721supportable_narrowing_operation (enum tree_code code,
b690cc0f 6722 tree vectype_out, tree vectype_in,
ebfd146a 6723 enum tree_code *code1, int *multi_step_cvt,
9771b263 6724 vec<tree> *interm_types)
ebfd146a
IR
6725{
6726 enum machine_mode vec_mode;
6727 enum insn_code icode1;
6728 optab optab1, interm_optab;
b690cc0f
RG
6729 tree vectype = vectype_in;
6730 tree narrow_vectype = vectype_out;
ebfd146a 6731 enum tree_code c1;
4a00c761
JJ
6732 tree intermediate_type;
6733 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6734 int i;
4a00c761 6735 bool uns;
ebfd146a 6736
4a00c761 6737 *multi_step_cvt = 0;
ebfd146a
IR
6738 switch (code)
6739 {
6740 CASE_CONVERT:
6741 c1 = VEC_PACK_TRUNC_EXPR;
6742 break;
6743
6744 case FIX_TRUNC_EXPR:
6745 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6746 break;
6747
6748 case FLOAT_EXPR:
6749 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6750 tree code and optabs used for computing the operation. */
6751 return false;
6752
6753 default:
6754 gcc_unreachable ();
6755 }
6756
6757 if (code == FIX_TRUNC_EXPR)
6758 /* The signedness is determined from output operand. */
b690cc0f 6759 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6760 else
6761 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6762
6763 if (!optab1)
6764 return false;
6765
6766 vec_mode = TYPE_MODE (vectype);
947131ba 6767 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6768 return false;
6769
4a00c761
JJ
6770 *code1 = c1;
6771
6772 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6773 return true;
6774
ebfd146a
IR
6775 /* Check if it's a multi-step conversion that can be done using intermediate
6776 types. */
4a00c761
JJ
6777 prev_mode = vec_mode;
6778 if (code == FIX_TRUNC_EXPR)
6779 uns = TYPE_UNSIGNED (vectype_out);
6780 else
6781 uns = TYPE_UNSIGNED (vectype);
6782
6783 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6784 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6785 costly than signed. */
6786 if (code == FIX_TRUNC_EXPR && uns)
6787 {
6788 enum insn_code icode2;
6789
6790 intermediate_type
6791 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6792 interm_optab
6793 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6794 if (interm_optab != unknown_optab
4a00c761
JJ
6795 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6796 && insn_data[icode1].operand[0].mode
6797 == insn_data[icode2].operand[0].mode)
6798 {
6799 uns = false;
6800 optab1 = interm_optab;
6801 icode1 = icode2;
6802 }
6803 }
ebfd146a 6804
4a00c761
JJ
6805 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6806 intermediate steps in promotion sequence. We try
6807 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6808 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6809 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6810 {
6811 intermediate_mode = insn_data[icode1].operand[0].mode;
6812 intermediate_type
6813 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6814 interm_optab
6815 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6816 optab_default);
6817 if (!interm_optab
6818 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6819 || insn_data[icode1].operand[0].mode != intermediate_mode
6820 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6821 == CODE_FOR_nothing))
6822 break;
6823
9771b263 6824 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6825 (*multi_step_cvt)++;
6826
6827 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6828 return true;
6829
6830 prev_mode = intermediate_mode;
6831 optab1 = interm_optab;
ebfd146a
IR
6832 }
6833
9771b263 6834 interm_types->release ();
4a00c761 6835 return false;
ebfd146a 6836}