]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR target/59233 (C++ failures after revision 205058 on *-apple-darwin* with -m32)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
d8a2d370 29#include "stor-layout.h"
ebfd146a
IR
30#include "target.h"
31#include "basic-block.h"
cf835838 32#include "gimple-pretty-print.h"
18f429e2 33#include "gimple.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
442b4905
AM
37#include "gimple-ssa.h"
38#include "tree-cfg.h"
39#include "tree-phinodes.h"
40#include "ssa-iterators.h"
d8a2d370 41#include "stringpool.h"
442b4905 42#include "tree-ssanames.h"
e28030cf 43#include "tree-ssa-loop-manip.h"
ebfd146a 44#include "cfgloop.h"
ebfd146a 45#include "expr.h"
7ee2468b 46#include "recog.h" /* FIXME: for insn_data */
ebfd146a 47#include "optabs.h"
718f9c0f 48#include "diagnostic-core.h"
ebfd146a 49#include "tree-vectorizer.h"
7ee2468b 50#include "dumpfile.h"
ebfd146a 51
7ee2468b
SB
52/* For lang_hooks.types.type_for_mode. */
53#include "langhooks.h"
ebfd146a 54
c3e7ee41
BS
55/* Return the vectorized type for the given statement. */
56
57tree
58stmt_vectype (struct _stmt_vec_info *stmt_info)
59{
60 return STMT_VINFO_VECTYPE (stmt_info);
61}
62
63/* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65bool
66stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
67{
68 gimple stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
72
73 if (!loop_vinfo)
74 return false;
75
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
77
78 return (bb->loop_father == loop->inner);
79}
80
81/* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
84
85unsigned
92345349 86record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 88 int misalign, enum vect_cost_model_location where)
c3e7ee41 89{
92345349 90 if (body_cost_vec)
c3e7ee41 91 {
92345349
BS
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 add_stmt_info_to_vec (body_cost_vec, count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign);
c3e7ee41 96 return (unsigned)
92345349 97 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
98
99 }
100 else
101 {
102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
103 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
104 void *target_cost_data;
105
106 if (loop_vinfo)
107 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
108 else
109 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
110
92345349
BS
111 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
112 misalign, where);
c3e7ee41
BS
113 }
114}
115
272c6793
RS
116/* Return a variable of type ELEM_TYPE[NELEMS]. */
117
118static tree
119create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120{
121 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
122 "vect_array");
123}
124
125/* ARRAY is an array of vectors created by create_vector_array.
126 Return an SSA_NAME for the vector in index N. The reference
127 is part of the vectorization of STMT and the vector is associated
128 with scalar destination SCALAR_DEST. */
129
130static tree
131read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
132 tree array, unsigned HOST_WIDE_INT n)
133{
134 tree vect_type, vect, vect_name, array_ref;
135 gimple new_stmt;
136
137 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
138 vect_type = TREE_TYPE (TREE_TYPE (array));
139 vect = vect_create_destination_var (scalar_dest, vect_type);
140 array_ref = build4 (ARRAY_REF, vect_type, array,
141 build_int_cst (size_type_node, n),
142 NULL_TREE, NULL_TREE);
143
144 new_stmt = gimple_build_assign (vect, array_ref);
145 vect_name = make_ssa_name (vect, new_stmt);
146 gimple_assign_set_lhs (new_stmt, vect_name);
147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
148
149 return vect_name;
150}
151
152/* ARRAY is an array of vectors created by create_vector_array.
153 Emit code to store SSA_NAME VECT in index N of the array.
154 The store is part of the vectorization of STMT. */
155
156static void
157write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
158 tree array, unsigned HOST_WIDE_INT n)
159{
160 tree array_ref;
161 gimple new_stmt;
162
163 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
164 build_int_cst (size_type_node, n),
165 NULL_TREE, NULL_TREE);
166
167 new_stmt = gimple_build_assign (array_ref, vect);
168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
169}
170
171/* PTR is a pointer to an array of type TYPE. Return a representation
172 of *PTR. The memory reference replaces those in FIRST_DR
173 (and its group). */
174
175static tree
176create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
177{
272c6793
RS
178 tree mem_ref, alias_ptr_type;
179
180 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
181 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
182 /* Arrays have the same alignment as their type. */
644ffefd 183 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
184 return mem_ref;
185}
186
ebfd146a
IR
187/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
188
189/* Function vect_mark_relevant.
190
191 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
192
193static void
9771b263 194vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
195 enum vect_relevant relevant, bool live_p,
196 bool used_in_pattern)
ebfd146a
IR
197{
198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
199 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
200 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 201 gimple pattern_stmt;
ebfd146a 202
73fbfcad 203 if (dump_enabled_p ())
78c60e3d 204 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 205 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 206
83197f37
IR
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
ebfd146a
IR
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 {
83197f37
IR
213 bool found = false;
214 if (!used_in_pattern)
215 {
216 imm_use_iterator imm_iter;
217 use_operand_p use_p;
218 gimple use_stmt;
219 tree lhs;
13c931c9
JJ
220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 222
83197f37
IR
223 if (is_gimple_assign (stmt))
224 lhs = gimple_assign_lhs (stmt);
225 else
226 lhs = gimple_call_lhs (stmt);
ebfd146a 227
83197f37
IR
228 /* This use is out of pattern use, if LHS has other uses that are
229 pattern uses, we should mark the stmt itself, and not the pattern
230 stmt. */
ab0ef706
JJ
231 if (TREE_CODE (lhs) == SSA_NAME)
232 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
233 {
234 if (is_gimple_debug (USE_STMT (use_p)))
235 continue;
236 use_stmt = USE_STMT (use_p);
237
13c931c9
JJ
238 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
239 continue;
240
ab0ef706
JJ
241 if (vinfo_for_stmt (use_stmt)
242 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
243 {
244 found = true;
245 break;
246 }
247 }
83197f37
IR
248 }
249
250 if (!found)
251 {
252 /* This is the last stmt in a sequence that was detected as a
253 pattern that can potentially be vectorized. Don't mark the stmt
254 as relevant/live because it's not going to be vectorized.
255 Instead mark the pattern-stmt that replaces it. */
256
257 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
258
73fbfcad 259 if (dump_enabled_p ())
78c60e3d
SS
260 dump_printf_loc (MSG_NOTE, vect_location,
261 "last stmt in pattern. don't mark"
e645e942 262 " relevant/live.\n");
83197f37
IR
263 stmt_info = vinfo_for_stmt (pattern_stmt);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267 stmt = pattern_stmt;
268 }
ebfd146a
IR
269 }
270
271 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
272 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
273 STMT_VINFO_RELEVANT (stmt_info) = relevant;
274
275 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
276 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
277 {
73fbfcad 278 if (dump_enabled_p ())
78c60e3d 279 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 280 "already marked relevant/live.\n");
ebfd146a
IR
281 return;
282 }
283
9771b263 284 worklist->safe_push (stmt);
ebfd146a
IR
285}
286
287
288/* Function vect_stmt_relevant_p.
289
290 Return true if STMT in loop that is represented by LOOP_VINFO is
291 "relevant for vectorization".
292
293 A stmt is considered "relevant for vectorization" if:
294 - it has uses outside the loop.
295 - it has vdefs (it alters memory).
296 - control stmts in the loop (except for the exit condition).
297
298 CHECKME: what other side effects would the vectorizer allow? */
299
300static bool
301vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
302 enum vect_relevant *relevant, bool *live_p)
303{
304 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
305 ssa_op_iter op_iter;
306 imm_use_iterator imm_iter;
307 use_operand_p use_p;
308 def_operand_p def_p;
309
8644a673 310 *relevant = vect_unused_in_scope;
ebfd146a
IR
311 *live_p = false;
312
313 /* cond stmt other than loop exit cond. */
b8698a0f
L
314 if (is_ctrl_stmt (stmt)
315 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
316 != loop_exit_ctrl_vec_info_type)
8644a673 317 *relevant = vect_used_in_scope;
ebfd146a
IR
318
319 /* changing memory. */
320 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 321 if (gimple_vdef (stmt))
ebfd146a 322 {
73fbfcad 323 if (dump_enabled_p ())
78c60e3d 324 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 325 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 326 *relevant = vect_used_in_scope;
ebfd146a
IR
327 }
328
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 {
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 {
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
336 {
73fbfcad 337 if (dump_enabled_p ())
78c60e3d 338 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 339 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 340
3157b0c2
AO
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
343
ebfd146a
IR
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
348
349 *live_p = true;
350 }
351 }
352 }
353
354 return (*live_p || *relevant);
355}
356
357
b8698a0f 358/* Function exist_non_indexing_operands_for_use_p
ebfd146a 359
ff802fa1 360 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
361 used in STMT for anything other than indexing an array. */
362
363static bool
364exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
365{
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 368
ff802fa1 369 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
59a05b0c 374
ebfd146a
IR
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
380
381 'var' in the second case corresponds to a def, not a use,
b8698a0f 382 so USE cannot correspond to any operands that are not used
ebfd146a
IR
383 for array indexing.
384
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
ebfd146a
IR
387
388 if (!gimple_assign_copy_p (stmt))
389 return false;
59a05b0c
EB
390 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
391 return false;
ebfd146a 392 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
393 if (TREE_CODE (operand) != SSA_NAME)
394 return false;
395
396 if (operand == use)
397 return true;
398
399 return false;
400}
401
402
b8698a0f 403/*
ebfd146a
IR
404 Function process_use.
405
406 Inputs:
407 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 408 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 409 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 410 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
411 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
412 be performed.
ebfd146a
IR
413
414 Outputs:
415 Generally, LIVE_P and RELEVANT are used to define the liveness and
416 relevance info of the DEF_STMT of this USE:
417 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
418 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
419 Exceptions:
420 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 421 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 422 of the respective DEF_STMT is left unchanged.
b8698a0f
L
423 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
424 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
425 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
426 be modified accordingly.
427
428 Return true if everything is as expected. Return false otherwise. */
429
430static bool
b8698a0f 431process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 432 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 433 bool force)
ebfd146a
IR
434{
435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
436 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
437 stmt_vec_info dstmt_vinfo;
438 basic_block bb, def_bb;
439 tree def;
440 gimple def_stmt;
441 enum vect_def_type dt;
442
b8698a0f 443 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 444 that are used for address computation are not considered relevant. */
aec7ae7d 445 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
446 return true;
447
24ee1384 448 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 449 {
73fbfcad 450 if (dump_enabled_p ())
78c60e3d 451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 452 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
453 return false;
454 }
455
456 if (!def_stmt || gimple_nop_p (def_stmt))
457 return true;
458
459 def_bb = gimple_bb (def_stmt);
460 if (!flow_bb_inside_loop_p (loop, def_bb))
461 {
73fbfcad 462 if (dump_enabled_p ())
e645e942 463 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
464 return true;
465 }
466
b8698a0f
L
467 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
468 DEF_STMT must have already been processed, because this should be the
469 only way that STMT, which is a reduction-phi, was put in the worklist,
470 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
471 check that everything is as expected, and we are done. */
472 dstmt_vinfo = vinfo_for_stmt (def_stmt);
473 bb = gimple_bb (stmt);
474 if (gimple_code (stmt) == GIMPLE_PHI
475 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
476 && gimple_code (def_stmt) != GIMPLE_PHI
477 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
478 && bb->loop_father == def_bb->loop_father)
479 {
73fbfcad 480 if (dump_enabled_p ())
78c60e3d 481 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 482 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
483 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
484 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
485 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 486 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 487 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
488 return true;
489 }
490
491 /* case 3a: outer-loop stmt defining an inner-loop stmt:
492 outer-loop-header-bb:
493 d = def_stmt
494 inner-loop:
495 stmt # use (d)
496 outer-loop-tail-bb:
497 ... */
498 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
499 {
73fbfcad 500 if (dump_enabled_p ())
78c60e3d 501 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 502 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 503
ebfd146a
IR
504 switch (relevant)
505 {
8644a673 506 case vect_unused_in_scope:
7c5222ff
IR
507 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
508 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 509 break;
7c5222ff 510
ebfd146a 511 case vect_used_in_outer_by_reduction:
7c5222ff 512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
513 relevant = vect_used_by_reduction;
514 break;
7c5222ff 515
ebfd146a 516 case vect_used_in_outer:
7c5222ff 517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 518 relevant = vect_used_in_scope;
ebfd146a 519 break;
7c5222ff 520
8644a673 521 case vect_used_in_scope:
ebfd146a
IR
522 break;
523
524 default:
525 gcc_unreachable ();
b8698a0f 526 }
ebfd146a
IR
527 }
528
529 /* case 3b: inner-loop stmt defining an outer-loop stmt:
530 outer-loop-header-bb:
531 ...
532 inner-loop:
533 d = def_stmt
06066f92 534 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
535 stmt # use (d) */
536 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
537 {
73fbfcad 538 if (dump_enabled_p ())
78c60e3d 539 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 540 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 541
ebfd146a
IR
542 switch (relevant)
543 {
8644a673 544 case vect_unused_in_scope:
b8698a0f 545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 546 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 547 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
548 break;
549
ebfd146a
IR
550 case vect_used_by_reduction:
551 relevant = vect_used_in_outer_by_reduction;
552 break;
553
8644a673 554 case vect_used_in_scope:
ebfd146a
IR
555 relevant = vect_used_in_outer;
556 break;
557
558 default:
559 gcc_unreachable ();
560 }
561 }
562
83197f37
IR
563 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
564 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
565 return true;
566}
567
568
569/* Function vect_mark_stmts_to_be_vectorized.
570
571 Not all stmts in the loop need to be vectorized. For example:
572
573 for i...
574 for j...
575 1. T0 = i + j
576 2. T1 = a[T0]
577
578 3. j = j + 1
579
580 Stmt 1 and 3 do not need to be vectorized, because loop control and
581 addressing of vectorized data-refs are handled differently.
582
583 This pass detects such stmts. */
584
585bool
586vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
587{
ebfd146a
IR
588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
589 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
590 unsigned int nbbs = loop->num_nodes;
591 gimple_stmt_iterator si;
592 gimple stmt;
593 unsigned int i;
594 stmt_vec_info stmt_vinfo;
595 basic_block bb;
596 gimple phi;
597 bool live_p;
06066f92
IR
598 enum vect_relevant relevant, tmp_relevant;
599 enum vect_def_type def_type;
ebfd146a 600
73fbfcad 601 if (dump_enabled_p ())
78c60e3d 602 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 603 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 604
07687835 605 stack_vec<gimple, 64> worklist;
ebfd146a
IR
606
607 /* 1. Init worklist. */
608 for (i = 0; i < nbbs; i++)
609 {
610 bb = bbs[i];
611 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 612 {
ebfd146a 613 phi = gsi_stmt (si);
73fbfcad 614 if (dump_enabled_p ())
ebfd146a 615 {
78c60e3d
SS
616 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 618 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
619 }
620
621 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 622 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
623 }
624 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
625 {
626 stmt = gsi_stmt (si);
73fbfcad 627 if (dump_enabled_p ())
ebfd146a 628 {
78c60e3d
SS
629 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
630 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 631 dump_printf (MSG_NOTE, "\n");
b8698a0f 632 }
ebfd146a
IR
633
634 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 635 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
636 }
637 }
638
639 /* 2. Process_worklist */
9771b263 640 while (worklist.length () > 0)
ebfd146a
IR
641 {
642 use_operand_p use_p;
643 ssa_op_iter iter;
644
9771b263 645 stmt = worklist.pop ();
73fbfcad 646 if (dump_enabled_p ())
ebfd146a 647 {
78c60e3d
SS
648 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 650 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
651 }
652
b8698a0f
L
653 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
654 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
655 liveness and relevance properties of STMT. */
656 stmt_vinfo = vinfo_for_stmt (stmt);
657 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
658 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
659
660 /* Generally, the liveness and relevance properties of STMT are
661 propagated as is to the DEF_STMTs of its USEs:
662 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
663 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
664
665 One exception is when STMT has been identified as defining a reduction
666 variable; in this case we set the liveness/relevance as follows:
667 live_p = false
668 relevant = vect_used_by_reduction
669 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 670 those that are used by a reduction computation, and those that are
ff802fa1 671 (also) used by a regular computation. This allows us later on to
b8698a0f 672 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 673 order of the results that they produce does not have to be kept. */
ebfd146a 674
06066f92
IR
675 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
676 tmp_relevant = relevant;
677 switch (def_type)
ebfd146a 678 {
06066f92
IR
679 case vect_reduction_def:
680 switch (tmp_relevant)
681 {
682 case vect_unused_in_scope:
683 relevant = vect_used_by_reduction;
684 break;
685
686 case vect_used_by_reduction:
687 if (gimple_code (stmt) == GIMPLE_PHI)
688 break;
689 /* fall through */
690
691 default:
73fbfcad 692 if (dump_enabled_p ())
78c60e3d 693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 694 "unsupported use of reduction.\n");
06066f92
IR
695 return false;
696 }
697
b8698a0f 698 live_p = false;
06066f92 699 break;
b8698a0f 700
06066f92
IR
701 case vect_nested_cycle:
702 if (tmp_relevant != vect_unused_in_scope
703 && tmp_relevant != vect_used_in_outer_by_reduction
704 && tmp_relevant != vect_used_in_outer)
705 {
73fbfcad 706 if (dump_enabled_p ())
78c60e3d 707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 708 "unsupported use of nested cycle.\n");
7c5222ff 709
06066f92
IR
710 return false;
711 }
7c5222ff 712
b8698a0f
L
713 live_p = false;
714 break;
715
06066f92
IR
716 case vect_double_reduction_def:
717 if (tmp_relevant != vect_unused_in_scope
718 && tmp_relevant != vect_used_by_reduction)
719 {
73fbfcad 720 if (dump_enabled_p ())
78c60e3d 721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 722 "unsupported use of double reduction.\n");
7c5222ff 723
7c5222ff 724 return false;
06066f92
IR
725 }
726
727 live_p = false;
b8698a0f 728 break;
7c5222ff 729
06066f92
IR
730 default:
731 break;
7c5222ff 732 }
b8698a0f 733
aec7ae7d 734 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
735 {
736 /* Pattern statements are not inserted into the code, so
737 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
738 have to scan the RHS or function arguments instead. */
739 if (is_gimple_assign (stmt))
740 {
69d2aade
JJ
741 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
742 tree op = gimple_assign_rhs1 (stmt);
743
744 i = 1;
745 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
746 {
747 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 748 live_p, relevant, &worklist, false)
69d2aade 749 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 750 live_p, relevant, &worklist, false))
07687835 751 return false;
69d2aade
JJ
752 i = 2;
753 }
754 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 755 {
69d2aade 756 op = gimple_op (stmt, i);
9d5e7640 757 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 758 &worklist, false))
07687835 759 return false;
9d5e7640
IR
760 }
761 }
762 else if (is_gimple_call (stmt))
763 {
764 for (i = 0; i < gimple_call_num_args (stmt); i++)
765 {
766 tree arg = gimple_call_arg (stmt, i);
767 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 768 &worklist, false))
07687835 769 return false;
9d5e7640
IR
770 }
771 }
772 }
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 {
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 778 &worklist, false))
07687835 779 return false;
9d5e7640 780 }
aec7ae7d
JJ
781
782 if (STMT_VINFO_GATHER_P (stmt_vinfo))
783 {
784 tree off;
785 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
786 gcc_assert (decl);
787 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
788 &worklist, true))
07687835 789 return false;
aec7ae7d 790 }
ebfd146a
IR
791 } /* while worklist */
792
ebfd146a
IR
793 return true;
794}
795
796
b8698a0f 797/* Function vect_model_simple_cost.
ebfd146a 798
b8698a0f 799 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
800 single op. Right now, this does not account for multiple insns that could
801 be generated for the single vector op. We will handle that shortly. */
802
803void
b8698a0f 804vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
805 enum vect_def_type *dt,
806 stmt_vector_for_cost *prologue_cost_vec,
807 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
808{
809 int i;
92345349 810 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
811
812 /* The SLP costs were already calculated during SLP tree build. */
813 if (PURE_SLP_STMT (stmt_info))
814 return;
815
ebfd146a
IR
816 /* FORNOW: Assuming maximum 2 args per stmts. */
817 for (i = 0; i < 2; i++)
92345349
BS
818 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
819 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
820 stmt_info, 0, vect_prologue);
c3e7ee41
BS
821
822 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
823 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
824 stmt_info, 0, vect_body);
c3e7ee41 825
73fbfcad 826 if (dump_enabled_p ())
78c60e3d
SS
827 dump_printf_loc (MSG_NOTE, vect_location,
828 "vect_model_simple_cost: inside_cost = %d, "
e645e942 829 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
830}
831
832
8bd37302
BS
833/* Model cost for type demotion and promotion operations. PWR is normally
834 zero for single-step promotions and demotions. It will be one if
835 two-step promotion/demotion is required, and so on. Each additional
836 step doubles the number of instructions required. */
837
838static void
839vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
840 enum vect_def_type *dt, int pwr)
841{
842 int i, tmp;
92345349 843 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
845 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
846 void *target_cost_data;
8bd37302
BS
847
848 /* The SLP costs were already calculated during SLP tree build. */
849 if (PURE_SLP_STMT (stmt_info))
850 return;
851
c3e7ee41
BS
852 if (loop_vinfo)
853 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
854 else
855 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
856
8bd37302
BS
857 for (i = 0; i < pwr + 1; i++)
858 {
859 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
860 (i + 1) : i;
c3e7ee41 861 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
862 vec_promote_demote, stmt_info, 0,
863 vect_body);
8bd37302
BS
864 }
865
866 /* FORNOW: Assuming maximum 2 args per stmts. */
867 for (i = 0; i < 2; i++)
92345349
BS
868 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
869 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
870 stmt_info, 0, vect_prologue);
8bd37302 871
73fbfcad 872 if (dump_enabled_p ())
78c60e3d
SS
873 dump_printf_loc (MSG_NOTE, vect_location,
874 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 875 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
876}
877
0d0293ac 878/* Function vect_cost_group_size
b8698a0f 879
0d0293ac 880 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
881 load or store of a group, else return 1. This ensures that group size is
882 only returned once per group. */
883
884static int
0d0293ac 885vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 886{
e14c1050 887 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
888
889 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 890 return GROUP_SIZE (stmt_info);
ebfd146a
IR
891
892 return 1;
893}
894
895
896/* Function vect_model_store_cost
897
0d0293ac
MM
898 Models cost for stores. In the case of grouped accesses, one access
899 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
900
901void
b8698a0f 902vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 903 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
904 slp_tree slp_node,
905 stmt_vector_for_cost *prologue_cost_vec,
906 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
907{
908 int group_size;
92345349 909 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
910 struct data_reference *first_dr;
911 gimple first_stmt;
ebfd146a
IR
912
913 /* The SLP costs were already calculated during SLP tree build. */
914 if (PURE_SLP_STMT (stmt_info))
915 return;
916
8644a673 917 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
918 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
919 stmt_info, 0, vect_prologue);
ebfd146a 920
0d0293ac
MM
921 /* Grouped access? */
922 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
923 {
924 if (slp_node)
925 {
9771b263 926 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
927 group_size = 1;
928 }
929 else
930 {
e14c1050 931 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 932 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
933 }
934
935 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
936 }
0d0293ac 937 /* Not a grouped access. */
ebfd146a 938 else
720f5239
IR
939 {
940 group_size = 1;
941 first_dr = STMT_VINFO_DATA_REF (stmt_info);
942 }
ebfd146a 943
272c6793 944 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 945 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
946 access is instead being provided by a permute-and-store operation,
947 include the cost of the permutes. */
948 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
949 {
950 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
951
952 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
953 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
ebfd146a 955
73fbfcad 956 if (dump_enabled_p ())
78c60e3d 957 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 958 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 959 group_size);
ebfd146a
IR
960 }
961
962 /* Costs of the stores. */
92345349 963 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 964
73fbfcad 965 if (dump_enabled_p ())
78c60e3d
SS
966 dump_printf_loc (MSG_NOTE, vect_location,
967 "vect_model_store_cost: inside_cost = %d, "
e645e942 968 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
969}
970
971
720f5239
IR
972/* Calculate cost of DR's memory access. */
973void
974vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 975 unsigned int *inside_cost,
92345349 976 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
977{
978 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
979 gimple stmt = DR_STMT (dr);
980 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
981
982 switch (alignment_support_scheme)
983 {
984 case dr_aligned:
985 {
92345349
BS
986 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
987 vector_store, stmt_info, 0,
988 vect_body);
720f5239 989
73fbfcad 990 if (dump_enabled_p ())
78c60e3d 991 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 992 "vect_model_store_cost: aligned.\n");
720f5239
IR
993 break;
994 }
995
996 case dr_unaligned_supported:
997 {
720f5239 998 /* Here, we assign an additional cost for the unaligned store. */
92345349 999 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1000 unaligned_store, stmt_info,
92345349 1001 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1002 if (dump_enabled_p ())
78c60e3d
SS
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "vect_model_store_cost: unaligned supported by "
e645e942 1005 "hardware.\n");
720f5239
IR
1006 break;
1007 }
1008
38eec4c6
UW
1009 case dr_unaligned_unsupported:
1010 {
1011 *inside_cost = VECT_MAX_COST;
1012
73fbfcad 1013 if (dump_enabled_p ())
78c60e3d 1014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1015 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1016 break;
1017 }
1018
720f5239
IR
1019 default:
1020 gcc_unreachable ();
1021 }
1022}
1023
1024
ebfd146a
IR
1025/* Function vect_model_load_cost
1026
0d0293ac
MM
1027 Models cost for loads. In the case of grouped accesses, the last access
1028 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1029 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1030 access scheme chosen. */
1031
1032void
92345349
BS
1033vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1034 bool load_lanes_p, slp_tree slp_node,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1037{
1038 int group_size;
ebfd146a
IR
1039 gimple first_stmt;
1040 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1041 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1042
1043 /* The SLP costs were already calculated during SLP tree build. */
1044 if (PURE_SLP_STMT (stmt_info))
1045 return;
1046
0d0293ac 1047 /* Grouped accesses? */
e14c1050 1048 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1049 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1050 {
0d0293ac 1051 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1052 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1053 }
0d0293ac 1054 /* Not a grouped access. */
ebfd146a
IR
1055 else
1056 {
1057 group_size = 1;
1058 first_dr = dr;
1059 }
1060
272c6793 1061 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1062 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1063 access is instead being provided by a load-and-permute operation,
1064 include the cost of the permutes. */
1065 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1066 {
1067 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1068 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1069 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
ebfd146a 1071
73fbfcad 1072 if (dump_enabled_p ())
e645e942
TJ
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1075 group_size);
ebfd146a
IR
1076 }
1077
1078 /* The loads themselves. */
a82960aa
RG
1079 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1080 {
a21892ad
BS
1081 /* N scalar loads plus gathering them into a vector. */
1082 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1083 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1084 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1085 scalar_load, stmt_info, 0, vect_body);
1086 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1087 stmt_info, 0, vect_body);
a82960aa
RG
1088 }
1089 else
1090 vect_get_load_cost (first_dr, ncopies,
1091 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1092 || group_size > 1 || slp_node),
92345349
BS
1093 &inside_cost, &prologue_cost,
1094 prologue_cost_vec, body_cost_vec, true);
720f5239 1095
73fbfcad 1096 if (dump_enabled_p ())
78c60e3d
SS
1097 dump_printf_loc (MSG_NOTE, vect_location,
1098 "vect_model_load_cost: inside_cost = %d, "
e645e942 1099 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1100}
1101
1102
1103/* Calculate cost of DR's memory access. */
1104void
1105vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1106 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1107 unsigned int *prologue_cost,
1108 stmt_vector_for_cost *prologue_cost_vec,
1109 stmt_vector_for_cost *body_cost_vec,
1110 bool record_prologue_costs)
720f5239
IR
1111{
1112 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1113 gimple stmt = DR_STMT (dr);
1114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1115
1116 switch (alignment_support_scheme)
ebfd146a
IR
1117 {
1118 case dr_aligned:
1119 {
92345349
BS
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1121 stmt_info, 0, vect_body);
ebfd146a 1122
73fbfcad 1123 if (dump_enabled_p ())
78c60e3d 1124 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1125 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1126
1127 break;
1128 }
1129 case dr_unaligned_supported:
1130 {
720f5239 1131 /* Here, we assign an additional cost for the unaligned load. */
92345349 1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1133 unaligned_load, stmt_info,
92345349 1134 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1135
73fbfcad 1136 if (dump_enabled_p ())
78c60e3d
SS
1137 dump_printf_loc (MSG_NOTE, vect_location,
1138 "vect_model_load_cost: unaligned supported by "
e645e942 1139 "hardware.\n");
ebfd146a
IR
1140
1141 break;
1142 }
1143 case dr_explicit_realign:
1144 {
92345349
BS
1145 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1146 vector_load, stmt_info, 0, vect_body);
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1148 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1149
1150 /* FIXME: If the misalignment remains fixed across the iterations of
1151 the containing loop, the following cost should be added to the
92345349 1152 prologue costs. */
ebfd146a 1153 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1155 stmt_info, 0, vect_body);
ebfd146a 1156
73fbfcad 1157 if (dump_enabled_p ())
e645e942
TJ
1158 dump_printf_loc (MSG_NOTE, vect_location,
1159 "vect_model_load_cost: explicit realign\n");
8bd37302 1160
ebfd146a
IR
1161 break;
1162 }
1163 case dr_explicit_realign_optimized:
1164 {
73fbfcad 1165 if (dump_enabled_p ())
e645e942 1166 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1167 "vect_model_load_cost: unaligned software "
e645e942 1168 "pipelined.\n");
ebfd146a
IR
1169
1170 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1171 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1172 if this is an access in a group of loads, which provide grouped
ebfd146a 1173 access, then the above cost should only be considered for one
ff802fa1 1174 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1175 and a realignment op. */
1176
92345349 1177 if (add_realign_cost && record_prologue_costs)
ebfd146a 1178 {
92345349
BS
1179 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1180 vector_stmt, stmt_info,
1181 0, vect_prologue);
ebfd146a 1182 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1183 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1184 vector_stmt, stmt_info,
1185 0, vect_prologue);
ebfd146a
IR
1186 }
1187
92345349
BS
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1189 stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1191 stmt_info, 0, vect_body);
8bd37302 1192
73fbfcad 1193 if (dump_enabled_p ())
78c60e3d 1194 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1195 "vect_model_load_cost: explicit realign optimized"
1196 "\n");
8bd37302 1197
ebfd146a
IR
1198 break;
1199 }
1200
38eec4c6
UW
1201 case dr_unaligned_unsupported:
1202 {
1203 *inside_cost = VECT_MAX_COST;
1204
73fbfcad 1205 if (dump_enabled_p ())
78c60e3d 1206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1207 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1208 break;
1209 }
1210
ebfd146a
IR
1211 default:
1212 gcc_unreachable ();
1213 }
ebfd146a
IR
1214}
1215
418b7df3
RG
1216/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1217 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1218
418b7df3
RG
1219static void
1220vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1221{
ebfd146a 1222 if (gsi)
418b7df3 1223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1224 else
1225 {
418b7df3 1226 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1228
a70d6342
IR
1229 if (loop_vinfo)
1230 {
1231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1232 basic_block new_bb;
1233 edge pe;
a70d6342
IR
1234
1235 if (nested_in_vect_loop_p (loop, stmt))
1236 loop = loop->inner;
b8698a0f 1237
a70d6342 1238 pe = loop_preheader_edge (loop);
418b7df3 1239 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1240 gcc_assert (!new_bb);
1241 }
1242 else
1243 {
1244 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1245 basic_block bb;
1246 gimple_stmt_iterator gsi_bb_start;
1247
1248 gcc_assert (bb_vinfo);
1249 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1250 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1251 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1252 }
ebfd146a
IR
1253 }
1254
73fbfcad 1255 if (dump_enabled_p ())
ebfd146a 1256 {
78c60e3d
SS
1257 dump_printf_loc (MSG_NOTE, vect_location,
1258 "created new init_stmt: ");
1259 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1260 dump_printf (MSG_NOTE, "\n");
ebfd146a 1261 }
418b7df3
RG
1262}
1263
1264/* Function vect_init_vector.
ebfd146a 1265
5467ee52
RG
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at BSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
418b7df3
RG
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT. */
1273
1274tree
5467ee52 1275vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1276{
1277 tree new_var;
1278 gimple init_stmt;
1279 tree vec_oprnd;
1280 tree new_temp;
1281
5467ee52
RG
1282 if (TREE_CODE (type) == VECTOR_TYPE
1283 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1284 {
5467ee52 1285 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1286 {
5467ee52
RG
1287 if (CONSTANT_CLASS_P (val))
1288 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1289 else
1290 {
83d5977e 1291 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1292 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1293 new_temp, val,
418b7df3 1294 NULL_TREE);
418b7df3 1295 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1296 val = new_temp;
418b7df3
RG
1297 }
1298 }
5467ee52 1299 val = build_vector_from_val (type, val);
418b7df3
RG
1300 }
1301
5467ee52 1302 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1303 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1304 new_temp = make_ssa_name (new_var, init_stmt);
1305 gimple_assign_set_lhs (init_stmt, new_temp);
1306 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1307 vec_oprnd = gimple_assign_lhs (init_stmt);
1308 return vec_oprnd;
1309}
1310
a70d6342 1311
ebfd146a
IR
1312/* Function vect_get_vec_def_for_operand.
1313
ff802fa1 1314 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1315 used in the vectorized stmt for STMT.
1316
1317 In the case that OP is an SSA_NAME which is defined in the loop, then
1318 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1319
1320 In case OP is an invariant or constant, a new stmt that creates a vector def
1321 needs to be introduced. */
1322
1323tree
1324vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1325{
1326 tree vec_oprnd;
1327 gimple vec_stmt;
1328 gimple def_stmt;
1329 stmt_vec_info def_stmt_info = NULL;
1330 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1331 unsigned int nunits;
ebfd146a 1332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1333 tree def;
ebfd146a
IR
1334 enum vect_def_type dt;
1335 bool is_simple_use;
1336 tree vector_type;
1337
73fbfcad 1338 if (dump_enabled_p ())
ebfd146a 1339 {
78c60e3d
SS
1340 dump_printf_loc (MSG_NOTE, vect_location,
1341 "vect_get_vec_def_for_operand: ");
1342 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1343 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1344 }
1345
24ee1384
IR
1346 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1347 &def_stmt, &def, &dt);
ebfd146a 1348 gcc_assert (is_simple_use);
73fbfcad 1349 if (dump_enabled_p ())
ebfd146a 1350 {
78c60e3d 1351 int loc_printed = 0;
ebfd146a
IR
1352 if (def)
1353 {
78c60e3d
SS
1354 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1355 loc_printed = 1;
1356 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1357 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1358 }
1359 if (def_stmt)
1360 {
78c60e3d
SS
1361 if (loc_printed)
1362 dump_printf (MSG_NOTE, " def_stmt = ");
1363 else
1364 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1366 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1367 }
1368 }
1369
1370 switch (dt)
1371 {
1372 /* Case 1: operand is a constant. */
1373 case vect_constant_def:
1374 {
7569a6cc
RG
1375 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1376 gcc_assert (vector_type);
9dc3f7de 1377 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1378
b8698a0f 1379 if (scalar_def)
ebfd146a
IR
1380 *scalar_def = op;
1381
1382 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1383 if (dump_enabled_p ())
78c60e3d 1384 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1385 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1386
418b7df3 1387 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1388 }
1389
1390 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1391 case vect_external_def:
ebfd146a
IR
1392 {
1393 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1394 gcc_assert (vector_type);
ebfd146a 1395
b8698a0f 1396 if (scalar_def)
ebfd146a
IR
1397 *scalar_def = def;
1398
1399 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1400 if (dump_enabled_p ())
e645e942 1401 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1402
418b7df3 1403 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1404 }
1405
1406 /* Case 3: operand is defined inside the loop. */
8644a673 1407 case vect_internal_def:
ebfd146a 1408 {
b8698a0f 1409 if (scalar_def)
ebfd146a
IR
1410 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1411
1412 /* Get the def from the vectorized stmt. */
1413 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1414
ebfd146a 1415 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1416 /* Get vectorized pattern statement. */
1417 if (!vec_stmt
1418 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1419 && !STMT_VINFO_RELEVANT (def_stmt_info))
1420 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1421 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1422 gcc_assert (vec_stmt);
1423 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1424 vec_oprnd = PHI_RESULT (vec_stmt);
1425 else if (is_gimple_call (vec_stmt))
1426 vec_oprnd = gimple_call_lhs (vec_stmt);
1427 else
1428 vec_oprnd = gimple_assign_lhs (vec_stmt);
1429 return vec_oprnd;
1430 }
1431
1432 /* Case 4: operand is defined by a loop header phi - reduction */
1433 case vect_reduction_def:
06066f92 1434 case vect_double_reduction_def:
7c5222ff 1435 case vect_nested_cycle:
ebfd146a
IR
1436 {
1437 struct loop *loop;
1438
1439 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1440 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1441
1442 /* Get the def before the loop */
1443 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1444 return get_initial_def_for_reduction (stmt, op, scalar_def);
1445 }
1446
1447 /* Case 5: operand is defined by loop-header phi - induction. */
1448 case vect_induction_def:
1449 {
1450 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1451
1452 /* Get the def from the vectorized stmt. */
1453 def_stmt_info = vinfo_for_stmt (def_stmt);
1454 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1455 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1456 vec_oprnd = PHI_RESULT (vec_stmt);
1457 else
1458 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1459 return vec_oprnd;
1460 }
1461
1462 default:
1463 gcc_unreachable ();
1464 }
1465}
1466
1467
1468/* Function vect_get_vec_def_for_stmt_copy
1469
ff802fa1 1470 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1471 vectorized stmt to be created (by the caller to this function) is a "copy"
1472 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1473 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1474 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1475 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1476 DT is the type of the vector def VEC_OPRND.
1477
1478 Context:
1479 In case the vectorization factor (VF) is bigger than the number
1480 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1481 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1482 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1483 smallest data-type determines the VF, and as a result, when vectorizing
1484 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1485 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1486 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1487 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1488 which VF=16 and nunits=4, so the number of copies required is 4):
1489
1490 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1491
ebfd146a
IR
1492 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1493 VS1.1: vx.1 = memref1 VS1.2
1494 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1495 VS1.3: vx.3 = memref3
ebfd146a
IR
1496
1497 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1498 VSnew.1: vz1 = vx.1 + ... VSnew.2
1499 VSnew.2: vz2 = vx.2 + ... VSnew.3
1500 VSnew.3: vz3 = vx.3 + ...
1501
1502 The vectorization of S1 is explained in vectorizable_load.
1503 The vectorization of S2:
b8698a0f
L
1504 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1505 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1506 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1507 returns the vector-def 'vx.0'.
1508
b8698a0f
L
1509 To create the remaining copies of the vector-stmt (VSnew.j), this
1510 function is called to get the relevant vector-def for each operand. It is
1511 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1512 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1513
b8698a0f
L
1514 For example, to obtain the vector-def 'vx.1' in order to create the
1515 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1516 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1517 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1518 and return its def ('vx.1').
1519 Overall, to create the above sequence this function will be called 3 times:
1520 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1521 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1522 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1523
1524tree
1525vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1526{
1527 gimple vec_stmt_for_operand;
1528 stmt_vec_info def_stmt_info;
1529
1530 /* Do nothing; can reuse same def. */
8644a673 1531 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1532 return vec_oprnd;
1533
1534 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1535 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1536 gcc_assert (def_stmt_info);
1537 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1538 gcc_assert (vec_stmt_for_operand);
1539 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1540 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1541 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1542 else
1543 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1544 return vec_oprnd;
1545}
1546
1547
1548/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1549 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1550
1551static void
b8698a0f 1552vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1553 vec<tree> *vec_oprnds0,
1554 vec<tree> *vec_oprnds1)
ebfd146a 1555{
9771b263 1556 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1557
1558 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1559 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1560
9771b263 1561 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1562 {
9771b263 1563 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1564 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1565 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1566 }
1567}
1568
1569
d092494c
IR
1570/* Get vectorized definitions for OP0 and OP1.
1571 REDUC_INDEX is the index of reduction operand in case of reduction,
1572 and -1 otherwise. */
ebfd146a 1573
d092494c 1574void
ebfd146a 1575vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1576 vec<tree> *vec_oprnds0,
1577 vec<tree> *vec_oprnds1,
d092494c 1578 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1579{
1580 if (slp_node)
d092494c
IR
1581 {
1582 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1583 vec<tree> ops;
1584 ops.create (nops);
37b5ec8f 1585 vec<vec<tree> > vec_defs;
9771b263 1586 vec_defs.create (nops);
d092494c 1587
9771b263 1588 ops.quick_push (op0);
d092494c 1589 if (op1)
9771b263 1590 ops.quick_push (op1);
d092494c
IR
1591
1592 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1593
37b5ec8f 1594 *vec_oprnds0 = vec_defs[0];
d092494c 1595 if (op1)
37b5ec8f 1596 *vec_oprnds1 = vec_defs[1];
d092494c 1597
9771b263
DN
1598 ops.release ();
1599 vec_defs.release ();
d092494c 1600 }
ebfd146a
IR
1601 else
1602 {
1603 tree vec_oprnd;
1604
9771b263 1605 vec_oprnds0->create (1);
b8698a0f 1606 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1607 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1608
1609 if (op1)
1610 {
9771b263 1611 vec_oprnds1->create (1);
b8698a0f 1612 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1613 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1614 }
1615 }
1616}
1617
1618
1619/* Function vect_finish_stmt_generation.
1620
1621 Insert a new stmt. */
1622
1623void
1624vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1625 gimple_stmt_iterator *gsi)
1626{
1627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1628 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1630
1631 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1632
54e8e2c3
RG
1633 if (!gsi_end_p (*gsi)
1634 && gimple_has_mem_ops (vec_stmt))
1635 {
1636 gimple at_stmt = gsi_stmt (*gsi);
1637 tree vuse = gimple_vuse (at_stmt);
1638 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1639 {
1640 tree vdef = gimple_vdef (at_stmt);
1641 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1642 /* If we have an SSA vuse and insert a store, update virtual
1643 SSA form to avoid triggering the renamer. Do so only
1644 if we can easily see all uses - which is what almost always
1645 happens with the way vectorized stmts are inserted. */
1646 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1647 && ((is_gimple_assign (vec_stmt)
1648 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1649 || (is_gimple_call (vec_stmt)
1650 && !(gimple_call_flags (vec_stmt)
1651 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1652 {
1653 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1654 gimple_set_vdef (vec_stmt, new_vdef);
1655 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1656 }
1657 }
1658 }
ebfd146a
IR
1659 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1660
b8698a0f 1661 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1662 bb_vinfo));
ebfd146a 1663
73fbfcad 1664 if (dump_enabled_p ())
ebfd146a 1665 {
78c60e3d
SS
1666 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1668 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1669 }
1670
ad885386 1671 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1672}
1673
1674/* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1677
1678tree
1679vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680{
1681 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1682
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1687
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1692
62f7fd21 1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1694 vectype_in);
1695}
1696
1697/* Function vectorizable_call.
1698
b8698a0f
L
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704static bool
190c2236
JJ
1705vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
ebfd146a
IR
1707{
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1718 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1719 gimple def_stmt;
0502fb85
UB
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1722 gimple new_stmt = NULL;
ebfd146a 1723 int ncopies, j;
6e1aa848 1724 vec<tree> vargs = vNULL;
ebfd146a
IR
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
9d5e7640 1727 tree lhs;
ebfd146a 1728
190c2236 1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1730 return false;
1731
8644a673 1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1733 return false;
1734
ebfd146a
IR
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1738
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1741
822ba6d7 1742 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1743 return false;
1744
b690cc0f
RG
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
ebfd146a
IR
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
b690cc0f 1749 vectype_in = NULL_TREE;
ebfd146a
IR
1750 nargs = gimple_call_num_args (stmt);
1751
1b1562a5
MM
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1756 return false;
1757
74bf76ed
JJ
1758 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1759 if (gimple_call_internal_p (stmt)
1760 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1761 {
1762 nargs = 0;
1763 rhs_type = unsigned_type_node;
1764 }
1765
ebfd146a
IR
1766 for (i = 0; i < nargs; i++)
1767 {
b690cc0f
RG
1768 tree opvectype;
1769
ebfd146a
IR
1770 op = gimple_call_arg (stmt, i);
1771
1772 /* We can only handle calls with arguments of the same type. */
1773 if (rhs_type
8533c9d8 1774 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1775 {
73fbfcad 1776 if (dump_enabled_p ())
78c60e3d 1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1778 "argument types differ.\n");
ebfd146a
IR
1779 return false;
1780 }
b690cc0f
RG
1781 if (!rhs_type)
1782 rhs_type = TREE_TYPE (op);
ebfd146a 1783
24ee1384 1784 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1785 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1786 {
73fbfcad 1787 if (dump_enabled_p ())
78c60e3d 1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1789 "use not simple.\n");
ebfd146a
IR
1790 return false;
1791 }
ebfd146a 1792
b690cc0f
RG
1793 if (!vectype_in)
1794 vectype_in = opvectype;
1795 else if (opvectype
1796 && opvectype != vectype_in)
1797 {
73fbfcad 1798 if (dump_enabled_p ())
78c60e3d 1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1800 "argument vector types differ.\n");
b690cc0f
RG
1801 return false;
1802 }
1803 }
1804 /* If all arguments are external or constant defs use a vector type with
1805 the same size as the output vector type. */
ebfd146a 1806 if (!vectype_in)
b690cc0f 1807 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1808 if (vec_stmt)
1809 gcc_assert (vectype_in);
1810 if (!vectype_in)
1811 {
73fbfcad 1812 if (dump_enabled_p ())
7d8930a0 1813 {
78c60e3d
SS
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "no vectype for scalar type ");
1816 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 1817 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
1818 }
1819
1820 return false;
1821 }
ebfd146a
IR
1822
1823 /* FORNOW */
b690cc0f
RG
1824 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1825 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1826 if (nunits_in == nunits_out / 2)
1827 modifier = NARROW;
1828 else if (nunits_out == nunits_in)
1829 modifier = NONE;
1830 else if (nunits_out == nunits_in / 2)
1831 modifier = WIDEN;
1832 else
1833 return false;
1834
1835 /* For now, we only vectorize functions if a target specific builtin
1836 is available. TODO -- in some cases, it might be profitable to
1837 insert the calls for pieces of the vector, in order to be able
1838 to vectorize other operations in the loop. */
1839 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1840 if (fndecl == NULL_TREE)
1841 {
74bf76ed
JJ
1842 if (gimple_call_internal_p (stmt)
1843 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1844 && !slp_node
1845 && loop_vinfo
1846 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1847 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1848 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1849 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1850 {
1851 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1852 { 0, 1, 2, ... vf - 1 } vector. */
1853 gcc_assert (nargs == 0);
1854 }
1855 else
1856 {
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1859 "function is not vectorizable.\n");
74bf76ed
JJ
1860 return false;
1861 }
ebfd146a
IR
1862 }
1863
5006671f 1864 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1865
190c2236
JJ
1866 if (slp_node || PURE_SLP_STMT (stmt_info))
1867 ncopies = 1;
1868 else if (modifier == NARROW)
ebfd146a
IR
1869 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1870 else
1871 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1872
1873 /* Sanity check: make sure that at least one copy of the vectorized stmt
1874 needs to be generated. */
1875 gcc_assert (ncopies >= 1);
1876
1877 if (!vec_stmt) /* transformation not required. */
1878 {
1879 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1880 if (dump_enabled_p ())
e645e942
TJ
1881 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1882 "\n");
c3e7ee41 1883 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1884 return true;
1885 }
1886
1887 /** Transform. **/
1888
73fbfcad 1889 if (dump_enabled_p ())
e645e942 1890 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
1891
1892 /* Handle def. */
1893 scalar_dest = gimple_call_lhs (stmt);
1894 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1895
1896 prev_stmt_info = NULL;
1897 switch (modifier)
1898 {
1899 case NONE:
1900 for (j = 0; j < ncopies; ++j)
1901 {
1902 /* Build argument list for the vectorized call. */
1903 if (j == 0)
9771b263 1904 vargs.create (nargs);
ebfd146a 1905 else
9771b263 1906 vargs.truncate (0);
ebfd146a 1907
190c2236
JJ
1908 if (slp_node)
1909 {
37b5ec8f 1910 vec<vec<tree> > vec_defs;
9771b263
DN
1911 vec_defs.create (nargs);
1912 vec<tree> vec_oprnds0;
190c2236
JJ
1913
1914 for (i = 0; i < nargs; i++)
9771b263 1915 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1916 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1917 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1918
1919 /* Arguments are ready. Create the new vector stmt. */
9771b263 1920 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1921 {
1922 size_t k;
1923 for (k = 0; k < nargs; k++)
1924 {
37b5ec8f 1925 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1926 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1927 }
1928 new_stmt = gimple_build_call_vec (fndecl, vargs);
1929 new_temp = make_ssa_name (vec_dest, new_stmt);
1930 gimple_call_set_lhs (new_stmt, new_temp);
1931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1932 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1933 }
1934
1935 for (i = 0; i < nargs; i++)
1936 {
37b5ec8f 1937 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1938 vec_oprndsi.release ();
190c2236 1939 }
9771b263 1940 vec_defs.release ();
190c2236
JJ
1941 continue;
1942 }
1943
ebfd146a
IR
1944 for (i = 0; i < nargs; i++)
1945 {
1946 op = gimple_call_arg (stmt, i);
1947 if (j == 0)
1948 vec_oprnd0
1949 = vect_get_vec_def_for_operand (op, stmt, NULL);
1950 else
63827fb8
IR
1951 {
1952 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1953 vec_oprnd0
1954 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1955 }
ebfd146a 1956
9771b263 1957 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1958 }
1959
74bf76ed
JJ
1960 if (gimple_call_internal_p (stmt)
1961 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1962 {
1963 tree *v = XALLOCAVEC (tree, nunits_out);
1964 int k;
1965 for (k = 0; k < nunits_out; ++k)
1966 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1967 tree cst = build_vector (vectype_out, v);
1968 tree new_var
1969 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1970 gimple init_stmt = gimple_build_assign (new_var, cst);
1971 new_temp = make_ssa_name (new_var, init_stmt);
1972 gimple_assign_set_lhs (init_stmt, new_temp);
1973 vect_init_vector_1 (stmt, init_stmt, NULL);
1974 new_temp = make_ssa_name (vec_dest, NULL);
1975 new_stmt = gimple_build_assign (new_temp,
1976 gimple_assign_lhs (init_stmt));
1977 }
1978 else
1979 {
1980 new_stmt = gimple_build_call_vec (fndecl, vargs);
1981 new_temp = make_ssa_name (vec_dest, new_stmt);
1982 gimple_call_set_lhs (new_stmt, new_temp);
1983 }
ebfd146a
IR
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985
1986 if (j == 0)
1987 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1988 else
1989 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1990
1991 prev_stmt_info = vinfo_for_stmt (new_stmt);
1992 }
1993
1994 break;
1995
1996 case NARROW:
1997 for (j = 0; j < ncopies; ++j)
1998 {
1999 /* Build argument list for the vectorized call. */
2000 if (j == 0)
9771b263 2001 vargs.create (nargs * 2);
ebfd146a 2002 else
9771b263 2003 vargs.truncate (0);
ebfd146a 2004
190c2236
JJ
2005 if (slp_node)
2006 {
37b5ec8f 2007 vec<vec<tree> > vec_defs;
9771b263
DN
2008 vec_defs.create (nargs);
2009 vec<tree> vec_oprnds0;
190c2236
JJ
2010
2011 for (i = 0; i < nargs; i++)
9771b263 2012 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2013 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2014 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2015
2016 /* Arguments are ready. Create the new vector stmt. */
9771b263 2017 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2018 {
2019 size_t k;
9771b263 2020 vargs.truncate (0);
190c2236
JJ
2021 for (k = 0; k < nargs; k++)
2022 {
37b5ec8f 2023 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2024 vargs.quick_push (vec_oprndsk[i]);
2025 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2026 }
2027 new_stmt = gimple_build_call_vec (fndecl, vargs);
2028 new_temp = make_ssa_name (vec_dest, new_stmt);
2029 gimple_call_set_lhs (new_stmt, new_temp);
2030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2031 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2032 }
2033
2034 for (i = 0; i < nargs; i++)
2035 {
37b5ec8f 2036 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2037 vec_oprndsi.release ();
190c2236 2038 }
9771b263 2039 vec_defs.release ();
190c2236
JJ
2040 continue;
2041 }
2042
ebfd146a
IR
2043 for (i = 0; i < nargs; i++)
2044 {
2045 op = gimple_call_arg (stmt, i);
2046 if (j == 0)
2047 {
2048 vec_oprnd0
2049 = vect_get_vec_def_for_operand (op, stmt, NULL);
2050 vec_oprnd1
63827fb8 2051 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2052 }
2053 else
2054 {
336ecb65 2055 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2056 vec_oprnd0
63827fb8 2057 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2058 vec_oprnd1
63827fb8 2059 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2060 }
2061
9771b263
DN
2062 vargs.quick_push (vec_oprnd0);
2063 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2064 }
2065
2066 new_stmt = gimple_build_call_vec (fndecl, vargs);
2067 new_temp = make_ssa_name (vec_dest, new_stmt);
2068 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070
2071 if (j == 0)
2072 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2073 else
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2075
2076 prev_stmt_info = vinfo_for_stmt (new_stmt);
2077 }
2078
2079 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2080
2081 break;
2082
2083 case WIDEN:
2084 /* No current target implements this case. */
2085 return false;
2086 }
2087
9771b263 2088 vargs.release ();
ebfd146a
IR
2089
2090 /* Update the exception handling table with the vector stmt if necessary. */
2091 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2092 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2093
2094 /* The call in STMT might prevent it from being removed in dce.
2095 We however cannot remove it here, due to the way the ssa name
2096 it defines is mapped to the new definition. So just replace
2097 rhs of the statement with something harmless. */
2098
dd34c087
JJ
2099 if (slp_node)
2100 return true;
2101
ebfd146a 2102 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2103 if (is_pattern_stmt_p (stmt_info))
2104 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2105 else
2106 lhs = gimple_call_lhs (stmt);
2107 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2108 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2109 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2110 STMT_VINFO_STMT (stmt_info) = new_stmt;
2111 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2112
2113 return true;
2114}
2115
2116
2117/* Function vect_gen_widened_results_half
2118
2119 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2120 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2121 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2122 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2123 needs to be created (DECL is a function-decl of a target-builtin).
2124 STMT is the original scalar stmt that we are vectorizing. */
2125
2126static gimple
2127vect_gen_widened_results_half (enum tree_code code,
2128 tree decl,
2129 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2130 tree vec_dest, gimple_stmt_iterator *gsi,
2131 gimple stmt)
b8698a0f 2132{
ebfd146a 2133 gimple new_stmt;
b8698a0f
L
2134 tree new_temp;
2135
2136 /* Generate half of the widened result: */
2137 if (code == CALL_EXPR)
2138 {
2139 /* Target specific support */
ebfd146a
IR
2140 if (op_type == binary_op)
2141 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2142 else
2143 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2144 new_temp = make_ssa_name (vec_dest, new_stmt);
2145 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2146 }
2147 else
ebfd146a 2148 {
b8698a0f
L
2149 /* Generic support */
2150 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2151 if (op_type != binary_op)
2152 vec_oprnd1 = NULL;
2153 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2154 vec_oprnd1);
2155 new_temp = make_ssa_name (vec_dest, new_stmt);
2156 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2157 }
ebfd146a
IR
2158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2159
ebfd146a
IR
2160 return new_stmt;
2161}
2162
4a00c761
JJ
2163
2164/* Get vectorized definitions for loop-based vectorization. For the first
2165 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2166 scalar operand), and for the rest we get a copy with
2167 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2168 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2169 The vectors are collected into VEC_OPRNDS. */
2170
2171static void
2172vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2173 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2174{
2175 tree vec_oprnd;
2176
2177 /* Get first vector operand. */
2178 /* All the vector operands except the very first one (that is scalar oprnd)
2179 are stmt copies. */
2180 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2181 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2182 else
2183 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2184
9771b263 2185 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2186
2187 /* Get second vector operand. */
2188 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2189 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2190
2191 *oprnd = vec_oprnd;
2192
2193 /* For conversion in multiple steps, continue to get operands
2194 recursively. */
2195 if (multi_step_cvt)
2196 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2197}
2198
2199
2200/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2201 For multi-step conversions store the resulting vectors and call the function
2202 recursively. */
2203
2204static void
9771b263 2205vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2206 int multi_step_cvt, gimple stmt,
9771b263 2207 vec<tree> vec_dsts,
4a00c761
JJ
2208 gimple_stmt_iterator *gsi,
2209 slp_tree slp_node, enum tree_code code,
2210 stmt_vec_info *prev_stmt_info)
2211{
2212 unsigned int i;
2213 tree vop0, vop1, new_tmp, vec_dest;
2214 gimple new_stmt;
2215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2216
9771b263 2217 vec_dest = vec_dsts.pop ();
4a00c761 2218
9771b263 2219 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2220 {
2221 /* Create demotion operation. */
9771b263
DN
2222 vop0 = (*vec_oprnds)[i];
2223 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2224 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2225 new_tmp = make_ssa_name (vec_dest, new_stmt);
2226 gimple_assign_set_lhs (new_stmt, new_tmp);
2227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2228
2229 if (multi_step_cvt)
2230 /* Store the resulting vector for next recursive call. */
9771b263 2231 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2232 else
2233 {
2234 /* This is the last step of the conversion sequence. Store the
2235 vectors in SLP_NODE or in vector info of the scalar statement
2236 (or in STMT_VINFO_RELATED_STMT chain). */
2237 if (slp_node)
9771b263 2238 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2239 else
2240 {
2241 if (!*prev_stmt_info)
2242 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2243 else
2244 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2245
2246 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2247 }
2248 }
2249 }
2250
2251 /* For multi-step demotion operations we first generate demotion operations
2252 from the source type to the intermediate types, and then combine the
2253 results (stored in VEC_OPRNDS) in demotion operation to the destination
2254 type. */
2255 if (multi_step_cvt)
2256 {
2257 /* At each level of recursion we have half of the operands we had at the
2258 previous level. */
9771b263 2259 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2260 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2261 stmt, vec_dsts, gsi, slp_node,
2262 VEC_PACK_TRUNC_EXPR,
2263 prev_stmt_info);
2264 }
2265
9771b263 2266 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2267}
2268
2269
2270/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2271 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2272 the resulting vectors and call the function recursively. */
2273
2274static void
9771b263
DN
2275vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2276 vec<tree> *vec_oprnds1,
4a00c761
JJ
2277 gimple stmt, tree vec_dest,
2278 gimple_stmt_iterator *gsi,
2279 enum tree_code code1,
2280 enum tree_code code2, tree decl1,
2281 tree decl2, int op_type)
2282{
2283 int i;
2284 tree vop0, vop1, new_tmp1, new_tmp2;
2285 gimple new_stmt1, new_stmt2;
6e1aa848 2286 vec<tree> vec_tmp = vNULL;
4a00c761 2287
9771b263
DN
2288 vec_tmp.create (vec_oprnds0->length () * 2);
2289 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2290 {
2291 if (op_type == binary_op)
9771b263 2292 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2293 else
2294 vop1 = NULL_TREE;
2295
2296 /* Generate the two halves of promotion operation. */
2297 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2298 op_type, vec_dest, gsi, stmt);
2299 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2300 op_type, vec_dest, gsi, stmt);
2301 if (is_gimple_call (new_stmt1))
2302 {
2303 new_tmp1 = gimple_call_lhs (new_stmt1);
2304 new_tmp2 = gimple_call_lhs (new_stmt2);
2305 }
2306 else
2307 {
2308 new_tmp1 = gimple_assign_lhs (new_stmt1);
2309 new_tmp2 = gimple_assign_lhs (new_stmt2);
2310 }
2311
2312 /* Store the results for the next step. */
9771b263
DN
2313 vec_tmp.quick_push (new_tmp1);
2314 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2315 }
2316
689eaba3 2317 vec_oprnds0->release ();
4a00c761
JJ
2318 *vec_oprnds0 = vec_tmp;
2319}
2320
2321
b8698a0f
L
2322/* Check if STMT performs a conversion operation, that can be vectorized.
2323 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2324 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2325 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2326
2327static bool
2328vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2329 gimple *vec_stmt, slp_tree slp_node)
2330{
2331 tree vec_dest;
2332 tree scalar_dest;
4a00c761 2333 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2334 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2337 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2338 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2339 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2340 tree new_temp;
2341 tree def;
2342 gimple def_stmt;
2343 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2344 gimple new_stmt = NULL;
2345 stmt_vec_info prev_stmt_info;
2346 int nunits_in;
2347 int nunits_out;
2348 tree vectype_out, vectype_in;
4a00c761
JJ
2349 int ncopies, i, j;
2350 tree lhs_type, rhs_type;
ebfd146a 2351 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2352 vec<tree> vec_oprnds0 = vNULL;
2353 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2354 tree vop0;
4a00c761
JJ
2355 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2356 int multi_step_cvt = 0;
6e1aa848
DN
2357 vec<tree> vec_dsts = vNULL;
2358 vec<tree> interm_types = vNULL;
4a00c761
JJ
2359 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2360 int op_type;
2361 enum machine_mode rhs_mode;
2362 unsigned short fltsz;
ebfd146a
IR
2363
2364 /* Is STMT a vectorizable conversion? */
2365
4a00c761 2366 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2367 return false;
2368
8644a673 2369 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2370 return false;
2371
2372 if (!is_gimple_assign (stmt))
2373 return false;
2374
2375 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2376 return false;
2377
2378 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2379 if (!CONVERT_EXPR_CODE_P (code)
2380 && code != FIX_TRUNC_EXPR
2381 && code != FLOAT_EXPR
2382 && code != WIDEN_MULT_EXPR
2383 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2384 return false;
2385
4a00c761
JJ
2386 op_type = TREE_CODE_LENGTH (code);
2387
ebfd146a 2388 /* Check types of lhs and rhs. */
b690cc0f 2389 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2390 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2391 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2392
ebfd146a
IR
2393 op0 = gimple_assign_rhs1 (stmt);
2394 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2395
2396 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2397 && !((INTEGRAL_TYPE_P (lhs_type)
2398 && INTEGRAL_TYPE_P (rhs_type))
2399 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2400 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2401 return false;
2402
2403 if ((INTEGRAL_TYPE_P (lhs_type)
2404 && (TYPE_PRECISION (lhs_type)
2405 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2406 || (INTEGRAL_TYPE_P (rhs_type)
2407 && (TYPE_PRECISION (rhs_type)
2408 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2409 {
73fbfcad 2410 if (dump_enabled_p ())
78c60e3d 2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2412 "type conversion to/from bit-precision unsupported."
2413 "\n");
4a00c761
JJ
2414 return false;
2415 }
2416
b690cc0f 2417 /* Check the operands of the operation. */
24ee1384 2418 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2419 &def_stmt, &def, &dt[0], &vectype_in))
2420 {
73fbfcad 2421 if (dump_enabled_p ())
78c60e3d 2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2423 "use not simple.\n");
b690cc0f
RG
2424 return false;
2425 }
4a00c761
JJ
2426 if (op_type == binary_op)
2427 {
2428 bool ok;
2429
2430 op1 = gimple_assign_rhs2 (stmt);
2431 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2432 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2433 OP1. */
2434 if (CONSTANT_CLASS_P (op0))
f5709183 2435 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2436 &def_stmt, &def, &dt[1], &vectype_in);
2437 else
f5709183 2438 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2439 &def, &dt[1]);
4a00c761
JJ
2440
2441 if (!ok)
2442 {
73fbfcad 2443 if (dump_enabled_p ())
78c60e3d 2444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2445 "use not simple.\n");
4a00c761
JJ
2446 return false;
2447 }
2448 }
2449
b690cc0f
RG
2450 /* If op0 is an external or constant defs use a vector type of
2451 the same size as the output vector type. */
ebfd146a 2452 if (!vectype_in)
b690cc0f 2453 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2454 if (vec_stmt)
2455 gcc_assert (vectype_in);
2456 if (!vectype_in)
2457 {
73fbfcad 2458 if (dump_enabled_p ())
4a00c761 2459 {
78c60e3d
SS
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "no vectype for scalar type ");
2462 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2463 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 2464 }
7d8930a0
IR
2465
2466 return false;
2467 }
ebfd146a 2468
b690cc0f
RG
2469 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2470 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2471 if (nunits_in < nunits_out)
ebfd146a
IR
2472 modifier = NARROW;
2473 else if (nunits_out == nunits_in)
2474 modifier = NONE;
ebfd146a 2475 else
4a00c761 2476 modifier = WIDEN;
ebfd146a 2477
ff802fa1
IR
2478 /* Multiple types in SLP are handled by creating the appropriate number of
2479 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2480 case of SLP. */
437f4a00 2481 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2482 ncopies = 1;
4a00c761
JJ
2483 else if (modifier == NARROW)
2484 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2485 else
2486 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2487
ebfd146a
IR
2488 /* Sanity check: make sure that at least one copy of the vectorized stmt
2489 needs to be generated. */
2490 gcc_assert (ncopies >= 1);
2491
ebfd146a 2492 /* Supportable by target? */
4a00c761 2493 switch (modifier)
ebfd146a 2494 {
4a00c761
JJ
2495 case NONE:
2496 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2497 return false;
2498 if (supportable_convert_operation (code, vectype_out, vectype_in,
2499 &decl1, &code1))
2500 break;
2501 /* FALLTHRU */
2502 unsupported:
73fbfcad 2503 if (dump_enabled_p ())
78c60e3d 2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2505 "conversion not supported by target.\n");
ebfd146a 2506 return false;
ebfd146a 2507
4a00c761
JJ
2508 case WIDEN:
2509 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2510 &code1, &code2, &multi_step_cvt,
2511 &interm_types))
4a00c761
JJ
2512 {
2513 /* Binary widening operation can only be supported directly by the
2514 architecture. */
2515 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2516 break;
2517 }
2518
2519 if (code != FLOAT_EXPR
2520 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2521 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2522 goto unsupported;
2523
2524 rhs_mode = TYPE_MODE (rhs_type);
2525 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2526 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2527 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2528 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2529 {
2530 cvt_type
2531 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2532 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2533 if (cvt_type == NULL_TREE)
2534 goto unsupported;
2535
2536 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2537 {
2538 if (!supportable_convert_operation (code, vectype_out,
2539 cvt_type, &decl1, &codecvt1))
2540 goto unsupported;
2541 }
2542 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2543 cvt_type, &codecvt1,
2544 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2545 &interm_types))
2546 continue;
2547 else
2548 gcc_assert (multi_step_cvt == 0);
2549
2550 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2551 vectype_in, &code1, &code2,
2552 &multi_step_cvt, &interm_types))
4a00c761
JJ
2553 break;
2554 }
2555
2556 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2557 goto unsupported;
2558
2559 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2560 codecvt2 = ERROR_MARK;
2561 else
2562 {
2563 multi_step_cvt++;
9771b263 2564 interm_types.safe_push (cvt_type);
4a00c761
JJ
2565 cvt_type = NULL_TREE;
2566 }
2567 break;
2568
2569 case NARROW:
2570 gcc_assert (op_type == unary_op);
2571 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2572 &code1, &multi_step_cvt,
2573 &interm_types))
2574 break;
2575
2576 if (code != FIX_TRUNC_EXPR
2577 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2578 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2579 goto unsupported;
2580
2581 rhs_mode = TYPE_MODE (rhs_type);
2582 cvt_type
2583 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2584 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2585 if (cvt_type == NULL_TREE)
2586 goto unsupported;
2587 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2588 &decl1, &codecvt1))
2589 goto unsupported;
2590 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2591 &code1, &multi_step_cvt,
2592 &interm_types))
2593 break;
2594 goto unsupported;
2595
2596 default:
2597 gcc_unreachable ();
ebfd146a
IR
2598 }
2599
2600 if (!vec_stmt) /* transformation not required. */
2601 {
73fbfcad 2602 if (dump_enabled_p ())
78c60e3d 2603 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2604 "=== vectorizable_conversion ===\n");
4a00c761 2605 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2606 {
2607 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2608 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2609 }
4a00c761
JJ
2610 else if (modifier == NARROW)
2611 {
2612 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2613 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2614 }
2615 else
2616 {
2617 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2618 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2619 }
9771b263 2620 interm_types.release ();
ebfd146a
IR
2621 return true;
2622 }
2623
2624 /** Transform. **/
73fbfcad 2625 if (dump_enabled_p ())
78c60e3d 2626 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2627 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 2628
4a00c761
JJ
2629 if (op_type == binary_op)
2630 {
2631 if (CONSTANT_CLASS_P (op0))
2632 op0 = fold_convert (TREE_TYPE (op1), op0);
2633 else if (CONSTANT_CLASS_P (op1))
2634 op1 = fold_convert (TREE_TYPE (op0), op1);
2635 }
2636
2637 /* In case of multi-step conversion, we first generate conversion operations
2638 to the intermediate types, and then from that types to the final one.
2639 We create vector destinations for the intermediate type (TYPES) received
2640 from supportable_*_operation, and store them in the correct order
2641 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2642 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2643 vec_dest = vect_create_destination_var (scalar_dest,
2644 (cvt_type && modifier == WIDEN)
2645 ? cvt_type : vectype_out);
9771b263 2646 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2647
2648 if (multi_step_cvt)
2649 {
9771b263
DN
2650 for (i = interm_types.length () - 1;
2651 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2652 {
2653 vec_dest = vect_create_destination_var (scalar_dest,
2654 intermediate_type);
9771b263 2655 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2656 }
2657 }
ebfd146a 2658
4a00c761 2659 if (cvt_type)
82294ec1
JJ
2660 vec_dest = vect_create_destination_var (scalar_dest,
2661 modifier == WIDEN
2662 ? vectype_out : cvt_type);
4a00c761
JJ
2663
2664 if (!slp_node)
2665 {
30862efc 2666 if (modifier == WIDEN)
4a00c761 2667 {
c3284718 2668 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 2669 if (op_type == binary_op)
9771b263 2670 vec_oprnds1.create (1);
4a00c761 2671 }
30862efc 2672 else if (modifier == NARROW)
9771b263
DN
2673 vec_oprnds0.create (
2674 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2675 }
2676 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2677 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2678
4a00c761 2679 last_oprnd = op0;
ebfd146a
IR
2680 prev_stmt_info = NULL;
2681 switch (modifier)
2682 {
2683 case NONE:
2684 for (j = 0; j < ncopies; j++)
2685 {
ebfd146a 2686 if (j == 0)
d092494c
IR
2687 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2688 -1);
ebfd146a
IR
2689 else
2690 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2691
9771b263 2692 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2693 {
2694 /* Arguments are ready, create the new vector stmt. */
2695 if (code1 == CALL_EXPR)
2696 {
2697 new_stmt = gimple_build_call (decl1, 1, vop0);
2698 new_temp = make_ssa_name (vec_dest, new_stmt);
2699 gimple_call_set_lhs (new_stmt, new_temp);
2700 }
2701 else
2702 {
2703 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2704 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2705 vop0, NULL);
2706 new_temp = make_ssa_name (vec_dest, new_stmt);
2707 gimple_assign_set_lhs (new_stmt, new_temp);
2708 }
2709
2710 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2711 if (slp_node)
9771b263 2712 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2713 }
2714
ebfd146a
IR
2715 if (j == 0)
2716 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2717 else
2718 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2719 prev_stmt_info = vinfo_for_stmt (new_stmt);
2720 }
2721 break;
2722
2723 case WIDEN:
2724 /* In case the vectorization factor (VF) is bigger than the number
2725 of elements that we can fit in a vectype (nunits), we have to
2726 generate more than one vector stmt - i.e - we need to "unroll"
2727 the vector stmt by a factor VF/nunits. */
2728 for (j = 0; j < ncopies; j++)
2729 {
4a00c761 2730 /* Handle uses. */
ebfd146a 2731 if (j == 0)
4a00c761
JJ
2732 {
2733 if (slp_node)
2734 {
2735 if (code == WIDEN_LSHIFT_EXPR)
2736 {
2737 unsigned int k;
ebfd146a 2738
4a00c761
JJ
2739 vec_oprnd1 = op1;
2740 /* Store vec_oprnd1 for every vector stmt to be created
2741 for SLP_NODE. We check during the analysis that all
2742 the shift arguments are the same. */
2743 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2744 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2745
2746 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2747 slp_node, -1);
2748 }
2749 else
2750 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2751 &vec_oprnds1, slp_node, -1);
2752 }
2753 else
2754 {
2755 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2756 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2757 if (op_type == binary_op)
2758 {
2759 if (code == WIDEN_LSHIFT_EXPR)
2760 vec_oprnd1 = op1;
2761 else
2762 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2763 NULL);
9771b263 2764 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2765 }
2766 }
2767 }
ebfd146a 2768 else
4a00c761
JJ
2769 {
2770 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2771 vec_oprnds0.truncate (0);
2772 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2773 if (op_type == binary_op)
2774 {
2775 if (code == WIDEN_LSHIFT_EXPR)
2776 vec_oprnd1 = op1;
2777 else
2778 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2779 vec_oprnd1);
9771b263
DN
2780 vec_oprnds1.truncate (0);
2781 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2782 }
2783 }
ebfd146a 2784
4a00c761
JJ
2785 /* Arguments are ready. Create the new vector stmts. */
2786 for (i = multi_step_cvt; i >= 0; i--)
2787 {
9771b263 2788 tree this_dest = vec_dsts[i];
4a00c761
JJ
2789 enum tree_code c1 = code1, c2 = code2;
2790 if (i == 0 && codecvt2 != ERROR_MARK)
2791 {
2792 c1 = codecvt1;
2793 c2 = codecvt2;
2794 }
2795 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2796 &vec_oprnds1,
2797 stmt, this_dest, gsi,
2798 c1, c2, decl1, decl2,
2799 op_type);
2800 }
2801
9771b263 2802 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2803 {
2804 if (cvt_type)
2805 {
2806 if (codecvt1 == CALL_EXPR)
2807 {
2808 new_stmt = gimple_build_call (decl1, 1, vop0);
2809 new_temp = make_ssa_name (vec_dest, new_stmt);
2810 gimple_call_set_lhs (new_stmt, new_temp);
2811 }
2812 else
2813 {
2814 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2815 new_temp = make_ssa_name (vec_dest, NULL);
2816 new_stmt = gimple_build_assign_with_ops (codecvt1,
2817 new_temp,
2818 vop0, NULL);
2819 }
2820
2821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2822 }
2823 else
2824 new_stmt = SSA_NAME_DEF_STMT (vop0);
2825
2826 if (slp_node)
9771b263 2827 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2828 else
2829 {
2830 if (!prev_stmt_info)
2831 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2832 else
2833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2834 prev_stmt_info = vinfo_for_stmt (new_stmt);
2835 }
2836 }
ebfd146a 2837 }
4a00c761
JJ
2838
2839 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2840 break;
2841
2842 case NARROW:
2843 /* In case the vectorization factor (VF) is bigger than the number
2844 of elements that we can fit in a vectype (nunits), we have to
2845 generate more than one vector stmt - i.e - we need to "unroll"
2846 the vector stmt by a factor VF/nunits. */
2847 for (j = 0; j < ncopies; j++)
2848 {
2849 /* Handle uses. */
4a00c761
JJ
2850 if (slp_node)
2851 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2852 slp_node, -1);
ebfd146a
IR
2853 else
2854 {
9771b263 2855 vec_oprnds0.truncate (0);
4a00c761
JJ
2856 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2857 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2858 }
2859
4a00c761
JJ
2860 /* Arguments are ready. Create the new vector stmts. */
2861 if (cvt_type)
9771b263 2862 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2863 {
2864 if (codecvt1 == CALL_EXPR)
2865 {
2866 new_stmt = gimple_build_call (decl1, 1, vop0);
2867 new_temp = make_ssa_name (vec_dest, new_stmt);
2868 gimple_call_set_lhs (new_stmt, new_temp);
2869 }
2870 else
2871 {
2872 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2873 new_temp = make_ssa_name (vec_dest, NULL);
2874 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2875 vop0, NULL);
2876 }
ebfd146a 2877
4a00c761 2878 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2879 vec_oprnds0[i] = new_temp;
4a00c761 2880 }
ebfd146a 2881
4a00c761
JJ
2882 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2883 stmt, vec_dsts, gsi,
2884 slp_node, code1,
2885 &prev_stmt_info);
ebfd146a
IR
2886 }
2887
2888 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2889 break;
ebfd146a
IR
2890 }
2891
9771b263
DN
2892 vec_oprnds0.release ();
2893 vec_oprnds1.release ();
2894 vec_dsts.release ();
2895 interm_types.release ();
ebfd146a
IR
2896
2897 return true;
2898}
ff802fa1
IR
2899
2900
ebfd146a
IR
2901/* Function vectorizable_assignment.
2902
b8698a0f
L
2903 Check if STMT performs an assignment (copy) that can be vectorized.
2904 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2905 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2906 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2907
2908static bool
2909vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2910 gimple *vec_stmt, slp_tree slp_node)
2911{
2912 tree vec_dest;
2913 tree scalar_dest;
2914 tree op;
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2917 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2918 tree new_temp;
2919 tree def;
2920 gimple def_stmt;
2921 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2922 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2923 int ncopies;
f18b55bd 2924 int i, j;
6e1aa848 2925 vec<tree> vec_oprnds = vNULL;
ebfd146a 2926 tree vop;
a70d6342 2927 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2928 gimple new_stmt = NULL;
2929 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2930 enum tree_code code;
2931 tree vectype_in;
ebfd146a
IR
2932
2933 /* Multiple types in SLP are handled by creating the appropriate number of
2934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2935 case of SLP. */
437f4a00 2936 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2937 ncopies = 1;
2938 else
2939 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2940
2941 gcc_assert (ncopies >= 1);
ebfd146a 2942
a70d6342 2943 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2944 return false;
2945
8644a673 2946 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2947 return false;
2948
2949 /* Is vectorizable assignment? */
2950 if (!is_gimple_assign (stmt))
2951 return false;
2952
2953 scalar_dest = gimple_assign_lhs (stmt);
2954 if (TREE_CODE (scalar_dest) != SSA_NAME)
2955 return false;
2956
fde9c428 2957 code = gimple_assign_rhs_code (stmt);
ebfd146a 2958 if (gimple_assign_single_p (stmt)
fde9c428
RG
2959 || code == PAREN_EXPR
2960 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2961 op = gimple_assign_rhs1 (stmt);
2962 else
2963 return false;
2964
7b7ec6c5
RG
2965 if (code == VIEW_CONVERT_EXPR)
2966 op = TREE_OPERAND (op, 0);
2967
24ee1384 2968 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2969 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2970 {
73fbfcad 2971 if (dump_enabled_p ())
78c60e3d 2972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2973 "use not simple.\n");
ebfd146a
IR
2974 return false;
2975 }
2976
fde9c428
RG
2977 /* We can handle NOP_EXPR conversions that do not change the number
2978 of elements or the vector size. */
7b7ec6c5
RG
2979 if ((CONVERT_EXPR_CODE_P (code)
2980 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2981 && (!vectype_in
2982 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2983 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2984 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2985 return false;
2986
7b7b1813
RG
2987 /* We do not handle bit-precision changes. */
2988 if ((CONVERT_EXPR_CODE_P (code)
2989 || code == VIEW_CONVERT_EXPR)
2990 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2991 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2992 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2993 || ((TYPE_PRECISION (TREE_TYPE (op))
2994 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2995 /* But a conversion that does not change the bit-pattern is ok. */
2996 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2997 > TYPE_PRECISION (TREE_TYPE (op)))
2998 && TYPE_UNSIGNED (TREE_TYPE (op))))
2999 {
73fbfcad 3000 if (dump_enabled_p ())
78c60e3d
SS
3001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3002 "type conversion to/from bit-precision "
e645e942 3003 "unsupported.\n");
7b7b1813
RG
3004 return false;
3005 }
3006
ebfd146a
IR
3007 if (!vec_stmt) /* transformation not required. */
3008 {
3009 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3010 if (dump_enabled_p ())
78c60e3d 3011 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3012 "=== vectorizable_assignment ===\n");
c3e7ee41 3013 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3014 return true;
3015 }
3016
3017 /** Transform. **/
73fbfcad 3018 if (dump_enabled_p ())
e645e942 3019 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
3020
3021 /* Handle def. */
3022 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3023
3024 /* Handle use. */
f18b55bd 3025 for (j = 0; j < ncopies; j++)
ebfd146a 3026 {
f18b55bd
IR
3027 /* Handle uses. */
3028 if (j == 0)
d092494c 3029 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3030 else
3031 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3032
3033 /* Arguments are ready. create the new vector stmt. */
9771b263 3034 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3035 {
7b7ec6c5
RG
3036 if (CONVERT_EXPR_CODE_P (code)
3037 || code == VIEW_CONVERT_EXPR)
4a73490d 3038 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3039 new_stmt = gimple_build_assign (vec_dest, vop);
3040 new_temp = make_ssa_name (vec_dest, new_stmt);
3041 gimple_assign_set_lhs (new_stmt, new_temp);
3042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3043 if (slp_node)
9771b263 3044 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3045 }
ebfd146a
IR
3046
3047 if (slp_node)
f18b55bd
IR
3048 continue;
3049
3050 if (j == 0)
3051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3052 else
3053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3054
3055 prev_stmt_info = vinfo_for_stmt (new_stmt);
3056 }
b8698a0f 3057
9771b263 3058 vec_oprnds.release ();
ebfd146a
IR
3059 return true;
3060}
3061
9dc3f7de 3062
1107f3ae
IR
3063/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3064 either as shift by a scalar or by a vector. */
3065
3066bool
3067vect_supportable_shift (enum tree_code code, tree scalar_type)
3068{
3069
3070 enum machine_mode vec_mode;
3071 optab optab;
3072 int icode;
3073 tree vectype;
3074
3075 vectype = get_vectype_for_scalar_type (scalar_type);
3076 if (!vectype)
3077 return false;
3078
3079 optab = optab_for_tree_code (code, vectype, optab_scalar);
3080 if (!optab
3081 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3082 {
3083 optab = optab_for_tree_code (code, vectype, optab_vector);
3084 if (!optab
3085 || (optab_handler (optab, TYPE_MODE (vectype))
3086 == CODE_FOR_nothing))
3087 return false;
3088 }
3089
3090 vec_mode = TYPE_MODE (vectype);
3091 icode = (int) optab_handler (optab, vec_mode);
3092 if (icode == CODE_FOR_nothing)
3093 return false;
3094
3095 return true;
3096}
3097
3098
9dc3f7de
IR
3099/* Function vectorizable_shift.
3100
3101 Check if STMT performs a shift operation that can be vectorized.
3102 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3103 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3104 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3105
3106static bool
3107vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3108 gimple *vec_stmt, slp_tree slp_node)
3109{
3110 tree vec_dest;
3111 tree scalar_dest;
3112 tree op0, op1 = NULL;
3113 tree vec_oprnd1 = NULL_TREE;
3114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3115 tree vectype;
3116 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3117 enum tree_code code;
3118 enum machine_mode vec_mode;
3119 tree new_temp;
3120 optab optab;
3121 int icode;
3122 enum machine_mode optab_op2_mode;
3123 tree def;
3124 gimple def_stmt;
3125 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3126 gimple new_stmt = NULL;
3127 stmt_vec_info prev_stmt_info;
3128 int nunits_in;
3129 int nunits_out;
3130 tree vectype_out;
cede2577 3131 tree op1_vectype;
9dc3f7de
IR
3132 int ncopies;
3133 int j, i;
6e1aa848
DN
3134 vec<tree> vec_oprnds0 = vNULL;
3135 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3136 tree vop0, vop1;
3137 unsigned int k;
49eab32e 3138 bool scalar_shift_arg = true;
9dc3f7de
IR
3139 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3140 int vf;
3141
3142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3143 return false;
3144
3145 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3146 return false;
3147
3148 /* Is STMT a vectorizable binary/unary operation? */
3149 if (!is_gimple_assign (stmt))
3150 return false;
3151
3152 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3153 return false;
3154
3155 code = gimple_assign_rhs_code (stmt);
3156
3157 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3158 || code == RROTATE_EXPR))
3159 return false;
3160
3161 scalar_dest = gimple_assign_lhs (stmt);
3162 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3163 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3164 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3165 {
73fbfcad 3166 if (dump_enabled_p ())
78c60e3d 3167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3168 "bit-precision shifts not supported.\n");
7b7b1813
RG
3169 return false;
3170 }
9dc3f7de
IR
3171
3172 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3173 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3174 &def_stmt, &def, &dt[0], &vectype))
3175 {
73fbfcad 3176 if (dump_enabled_p ())
78c60e3d 3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3178 "use not simple.\n");
9dc3f7de
IR
3179 return false;
3180 }
3181 /* If op0 is an external or constant def use a vector type with
3182 the same size as the output vector type. */
3183 if (!vectype)
3184 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3185 if (vec_stmt)
3186 gcc_assert (vectype);
3187 if (!vectype)
3188 {
73fbfcad 3189 if (dump_enabled_p ())
78c60e3d 3190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3191 "no vectype for scalar type\n");
9dc3f7de
IR
3192 return false;
3193 }
3194
3195 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3196 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3197 if (nunits_out != nunits_in)
3198 return false;
3199
3200 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3201 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3202 &def, &dt[1], &op1_vectype))
9dc3f7de 3203 {
73fbfcad 3204 if (dump_enabled_p ())
78c60e3d 3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3206 "use not simple.\n");
9dc3f7de
IR
3207 return false;
3208 }
3209
3210 if (loop_vinfo)
3211 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3212 else
3213 vf = 1;
3214
3215 /* Multiple types in SLP are handled by creating the appropriate number of
3216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3217 case of SLP. */
437f4a00 3218 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3219 ncopies = 1;
3220 else
3221 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3222
3223 gcc_assert (ncopies >= 1);
3224
3225 /* Determine whether the shift amount is a vector, or scalar. If the
3226 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3227
49eab32e
JJ
3228 if (dt[1] == vect_internal_def && !slp_node)
3229 scalar_shift_arg = false;
3230 else if (dt[1] == vect_constant_def
3231 || dt[1] == vect_external_def
3232 || dt[1] == vect_internal_def)
3233 {
3234 /* In SLP, need to check whether the shift count is the same,
3235 in loops if it is a constant or invariant, it is always
3236 a scalar shift. */
3237 if (slp_node)
3238 {
9771b263 3239 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3240 gimple slpstmt;
3241
9771b263 3242 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3243 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3244 scalar_shift_arg = false;
3245 }
3246 }
3247 else
3248 {
73fbfcad 3249 if (dump_enabled_p ())
78c60e3d 3250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3251 "operand mode requires invariant argument.\n");
49eab32e
JJ
3252 return false;
3253 }
3254
9dc3f7de 3255 /* Vector shifted by vector. */
49eab32e 3256 if (!scalar_shift_arg)
9dc3f7de
IR
3257 {
3258 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3259 if (dump_enabled_p ())
78c60e3d 3260 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3261 "vector/vector shift/rotate found.\n");
78c60e3d 3262
aa948027
JJ
3263 if (!op1_vectype)
3264 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3265 if (op1_vectype == NULL_TREE
3266 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3267 {
73fbfcad 3268 if (dump_enabled_p ())
78c60e3d
SS
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270 "unusable type for last operand in"
e645e942 3271 " vector/vector shift/rotate.\n");
cede2577
JJ
3272 return false;
3273 }
9dc3f7de
IR
3274 }
3275 /* See if the machine has a vector shifted by scalar insn and if not
3276 then see if it has a vector shifted by vector insn. */
49eab32e 3277 else
9dc3f7de
IR
3278 {
3279 optab = optab_for_tree_code (code, vectype, optab_scalar);
3280 if (optab
3281 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3282 {
73fbfcad 3283 if (dump_enabled_p ())
78c60e3d 3284 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3285 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
3286 }
3287 else
3288 {
3289 optab = optab_for_tree_code (code, vectype, optab_vector);
3290 if (optab
3291 && (optab_handler (optab, TYPE_MODE (vectype))
3292 != CODE_FOR_nothing))
3293 {
49eab32e
JJ
3294 scalar_shift_arg = false;
3295
73fbfcad 3296 if (dump_enabled_p ())
78c60e3d 3297 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3298 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
3299
3300 /* Unlike the other binary operators, shifts/rotates have
3301 the rhs being int, instead of the same type as the lhs,
3302 so make sure the scalar is the right type if we are
aa948027 3303 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3304 if (dt[1] == vect_constant_def)
3305 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3306 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3307 TREE_TYPE (op1)))
3308 {
3309 if (slp_node
3310 && TYPE_MODE (TREE_TYPE (vectype))
3311 != TYPE_MODE (TREE_TYPE (op1)))
3312 {
73fbfcad 3313 if (dump_enabled_p ())
78c60e3d
SS
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 "unusable type for last operand in"
e645e942 3316 " vector/vector shift/rotate.\n");
aa948027
JJ
3317 return false;
3318 }
3319 if (vec_stmt && !slp_node)
3320 {
3321 op1 = fold_convert (TREE_TYPE (vectype), op1);
3322 op1 = vect_init_vector (stmt, op1,
3323 TREE_TYPE (vectype), NULL);
3324 }
3325 }
9dc3f7de
IR
3326 }
3327 }
3328 }
9dc3f7de
IR
3329
3330 /* Supportable by target? */
3331 if (!optab)
3332 {
73fbfcad 3333 if (dump_enabled_p ())
78c60e3d 3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3335 "no optab.\n");
9dc3f7de
IR
3336 return false;
3337 }
3338 vec_mode = TYPE_MODE (vectype);
3339 icode = (int) optab_handler (optab, vec_mode);
3340 if (icode == CODE_FOR_nothing)
3341 {
73fbfcad 3342 if (dump_enabled_p ())
78c60e3d 3343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3344 "op not supported by target.\n");
9dc3f7de
IR
3345 /* Check only during analysis. */
3346 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3347 || (vf < vect_min_worthwhile_factor (code)
3348 && !vec_stmt))
3349 return false;
73fbfcad 3350 if (dump_enabled_p ())
e645e942
TJ
3351 dump_printf_loc (MSG_NOTE, vect_location,
3352 "proceeding using word mode.\n");
9dc3f7de
IR
3353 }
3354
3355 /* Worthwhile without SIMD support? Check only during analysis. */
3356 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3357 && vf < vect_min_worthwhile_factor (code)
3358 && !vec_stmt)
3359 {
73fbfcad 3360 if (dump_enabled_p ())
78c60e3d 3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3362 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
3363 return false;
3364 }
3365
3366 if (!vec_stmt) /* transformation not required. */
3367 {
3368 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3369 if (dump_enabled_p ())
e645e942
TJ
3370 dump_printf_loc (MSG_NOTE, vect_location,
3371 "=== vectorizable_shift ===\n");
c3e7ee41 3372 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3373 return true;
3374 }
3375
3376 /** Transform. **/
3377
73fbfcad 3378 if (dump_enabled_p ())
78c60e3d 3379 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3380 "transform binary/unary operation.\n");
9dc3f7de
IR
3381
3382 /* Handle def. */
3383 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3384
9dc3f7de
IR
3385 prev_stmt_info = NULL;
3386 for (j = 0; j < ncopies; j++)
3387 {
3388 /* Handle uses. */
3389 if (j == 0)
3390 {
3391 if (scalar_shift_arg)
3392 {
3393 /* Vector shl and shr insn patterns can be defined with scalar
3394 operand 2 (shift operand). In this case, use constant or loop
3395 invariant op1 directly, without extending it to vector mode
3396 first. */
3397 optab_op2_mode = insn_data[icode].operand[2].mode;
3398 if (!VECTOR_MODE_P (optab_op2_mode))
3399 {
73fbfcad 3400 if (dump_enabled_p ())
78c60e3d 3401 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3402 "operand 1 using scalar mode.\n");
9dc3f7de 3403 vec_oprnd1 = op1;
8930f723 3404 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3405 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3406 if (slp_node)
3407 {
3408 /* Store vec_oprnd1 for every vector stmt to be created
3409 for SLP_NODE. We check during the analysis that all
3410 the shift arguments are the same.
3411 TODO: Allow different constants for different vector
3412 stmts generated for an SLP instance. */
3413 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3414 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3415 }
3416 }
3417 }
3418
3419 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3420 (a special case for certain kind of vector shifts); otherwise,
3421 operand 1 should be of a vector type (the usual case). */
3422 if (vec_oprnd1)
3423 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3424 slp_node, -1);
9dc3f7de
IR
3425 else
3426 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3427 slp_node, -1);
9dc3f7de
IR
3428 }
3429 else
3430 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3431
3432 /* Arguments are ready. Create the new vector stmt. */
9771b263 3433 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3434 {
9771b263 3435 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3436 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3437 new_temp = make_ssa_name (vec_dest, new_stmt);
3438 gimple_assign_set_lhs (new_stmt, new_temp);
3439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3440 if (slp_node)
9771b263 3441 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3442 }
3443
3444 if (slp_node)
3445 continue;
3446
3447 if (j == 0)
3448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3449 else
3450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3451 prev_stmt_info = vinfo_for_stmt (new_stmt);
3452 }
3453
9771b263
DN
3454 vec_oprnds0.release ();
3455 vec_oprnds1.release ();
9dc3f7de
IR
3456
3457 return true;
3458}
3459
3460
5deb57cb
JJ
3461static tree permute_vec_elements (tree, tree, tree, gimple,
3462 gimple_stmt_iterator *);
3463
3464
ebfd146a
IR
3465/* Function vectorizable_operation.
3466
16949072
RG
3467 Check if STMT performs a binary, unary or ternary operation that can
3468 be vectorized.
b8698a0f 3469 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3470 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3471 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3472
3473static bool
3474vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3475 gimple *vec_stmt, slp_tree slp_node)
3476{
00f07b86 3477 tree vec_dest;
ebfd146a 3478 tree scalar_dest;
16949072 3479 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3481 tree vectype;
ebfd146a
IR
3482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3483 enum tree_code code;
3484 enum machine_mode vec_mode;
3485 tree new_temp;
3486 int op_type;
00f07b86 3487 optab optab;
ebfd146a 3488 int icode;
ebfd146a
IR
3489 tree def;
3490 gimple def_stmt;
16949072
RG
3491 enum vect_def_type dt[3]
3492 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3493 gimple new_stmt = NULL;
3494 stmt_vec_info prev_stmt_info;
b690cc0f 3495 int nunits_in;
ebfd146a
IR
3496 int nunits_out;
3497 tree vectype_out;
3498 int ncopies;
3499 int j, i;
6e1aa848
DN
3500 vec<tree> vec_oprnds0 = vNULL;
3501 vec<tree> vec_oprnds1 = vNULL;
3502 vec<tree> vec_oprnds2 = vNULL;
16949072 3503 tree vop0, vop1, vop2;
a70d6342
IR
3504 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3505 int vf;
3506
a70d6342 3507 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3508 return false;
3509
8644a673 3510 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3511 return false;
3512
3513 /* Is STMT a vectorizable binary/unary operation? */
3514 if (!is_gimple_assign (stmt))
3515 return false;
3516
3517 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3518 return false;
3519
ebfd146a
IR
3520 code = gimple_assign_rhs_code (stmt);
3521
3522 /* For pointer addition, we should use the normal plus for
3523 the vector addition. */
3524 if (code == POINTER_PLUS_EXPR)
3525 code = PLUS_EXPR;
3526
3527 /* Support only unary or binary operations. */
3528 op_type = TREE_CODE_LENGTH (code);
16949072 3529 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3530 {
73fbfcad 3531 if (dump_enabled_p ())
78c60e3d 3532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3533 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 3534 op_type);
ebfd146a
IR
3535 return false;
3536 }
3537
b690cc0f
RG
3538 scalar_dest = gimple_assign_lhs (stmt);
3539 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3540
7b7b1813
RG
3541 /* Most operations cannot handle bit-precision types without extra
3542 truncations. */
3543 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3544 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3545 /* Exception are bitwise binary operations. */
3546 && code != BIT_IOR_EXPR
3547 && code != BIT_XOR_EXPR
3548 && code != BIT_AND_EXPR)
3549 {
73fbfcad 3550 if (dump_enabled_p ())
78c60e3d 3551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3552 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
3553 return false;
3554 }
3555
ebfd146a 3556 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3557 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3558 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3559 {
73fbfcad 3560 if (dump_enabled_p ())
78c60e3d 3561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3562 "use not simple.\n");
ebfd146a
IR
3563 return false;
3564 }
b690cc0f
RG
3565 /* If op0 is an external or constant def use a vector type with
3566 the same size as the output vector type. */
3567 if (!vectype)
3568 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3569 if (vec_stmt)
3570 gcc_assert (vectype);
3571 if (!vectype)
3572 {
73fbfcad 3573 if (dump_enabled_p ())
7d8930a0 3574 {
78c60e3d
SS
3575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3576 "no vectype for scalar type ");
3577 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3578 TREE_TYPE (op0));
e645e942 3579 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3580 }
3581
3582 return false;
3583 }
b690cc0f
RG
3584
3585 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3586 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3587 if (nunits_out != nunits_in)
3588 return false;
ebfd146a 3589
16949072 3590 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3591 {
3592 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3593 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3594 &def, &dt[1]))
ebfd146a 3595 {
73fbfcad 3596 if (dump_enabled_p ())
78c60e3d 3597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3598 "use not simple.\n");
ebfd146a
IR
3599 return false;
3600 }
3601 }
16949072
RG
3602 if (op_type == ternary_op)
3603 {
3604 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3605 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3606 &def, &dt[2]))
16949072 3607 {
73fbfcad 3608 if (dump_enabled_p ())
78c60e3d 3609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3610 "use not simple.\n");
16949072
RG
3611 return false;
3612 }
3613 }
ebfd146a 3614
b690cc0f
RG
3615 if (loop_vinfo)
3616 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3617 else
3618 vf = 1;
3619
3620 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3621 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3622 case of SLP. */
437f4a00 3623 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3624 ncopies = 1;
3625 else
3626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3627
3628 gcc_assert (ncopies >= 1);
3629
9dc3f7de 3630 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3631 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3632 || code == RROTATE_EXPR)
9dc3f7de 3633 return false;
ebfd146a 3634
ebfd146a 3635 /* Supportable by target? */
00f07b86
RH
3636
3637 vec_mode = TYPE_MODE (vectype);
3638 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3639 {
00f07b86 3640 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3641 icode = LAST_INSN_CODE;
00f07b86
RH
3642 else
3643 icode = CODE_FOR_nothing;
ebfd146a 3644 }
00f07b86
RH
3645 else
3646 {
3647 optab = optab_for_tree_code (code, vectype, optab_default);
3648 if (!optab)
5deb57cb 3649 {
73fbfcad 3650 if (dump_enabled_p ())
78c60e3d 3651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3652 "no optab.\n");
00f07b86 3653 return false;
5deb57cb 3654 }
00f07b86 3655 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3656 }
3657
ebfd146a
IR
3658 if (icode == CODE_FOR_nothing)
3659 {
73fbfcad 3660 if (dump_enabled_p ())
78c60e3d 3661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3662 "op not supported by target.\n");
ebfd146a
IR
3663 /* Check only during analysis. */
3664 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3665 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3666 return false;
73fbfcad 3667 if (dump_enabled_p ())
e645e942
TJ
3668 dump_printf_loc (MSG_NOTE, vect_location,
3669 "proceeding using word mode.\n");
383d9c83
IR
3670 }
3671
4a00c761 3672 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3673 if (!VECTOR_MODE_P (vec_mode)
3674 && !vec_stmt
3675 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3676 {
73fbfcad 3677 if (dump_enabled_p ())
78c60e3d 3678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3679 "not worthwhile without SIMD support.\n");
e34842c6 3680 return false;
7d8930a0 3681 }
ebfd146a 3682
ebfd146a
IR
3683 if (!vec_stmt) /* transformation not required. */
3684 {
4a00c761 3685 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3686 if (dump_enabled_p ())
78c60e3d 3687 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3688 "=== vectorizable_operation ===\n");
c3e7ee41 3689 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3690 return true;
3691 }
3692
3693 /** Transform. **/
3694
73fbfcad 3695 if (dump_enabled_p ())
78c60e3d 3696 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3697 "transform binary/unary operation.\n");
383d9c83 3698
ebfd146a 3699 /* Handle def. */
00f07b86 3700 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3701
ebfd146a
IR
3702 /* In case the vectorization factor (VF) is bigger than the number
3703 of elements that we can fit in a vectype (nunits), we have to generate
3704 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3705 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3706 from one copy of the vector stmt to the next, in the field
3707 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3708 stages to find the correct vector defs to be used when vectorizing
3709 stmts that use the defs of the current stmt. The example below
3710 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3711 we need to create 4 vectorized stmts):
3712
3713 before vectorization:
3714 RELATED_STMT VEC_STMT
3715 S1: x = memref - -
3716 S2: z = x + 1 - -
3717
3718 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3719 there):
3720 RELATED_STMT VEC_STMT
3721 VS1_0: vx0 = memref0 VS1_1 -
3722 VS1_1: vx1 = memref1 VS1_2 -
3723 VS1_2: vx2 = memref2 VS1_3 -
3724 VS1_3: vx3 = memref3 - -
3725 S1: x = load - VS1_0
3726 S2: z = x + 1 - -
3727
3728 step2: vectorize stmt S2 (done here):
3729 To vectorize stmt S2 we first need to find the relevant vector
3730 def for the first operand 'x'. This is, as usual, obtained from
3731 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3732 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3733 relevant vector def 'vx0'. Having found 'vx0' we can generate
3734 the vector stmt VS2_0, and as usual, record it in the
3735 STMT_VINFO_VEC_STMT of stmt S2.
3736 When creating the second copy (VS2_1), we obtain the relevant vector
3737 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3738 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3739 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3740 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3741 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3742 chain of stmts and pointers:
3743 RELATED_STMT VEC_STMT
3744 VS1_0: vx0 = memref0 VS1_1 -
3745 VS1_1: vx1 = memref1 VS1_2 -
3746 VS1_2: vx2 = memref2 VS1_3 -
3747 VS1_3: vx3 = memref3 - -
3748 S1: x = load - VS1_0
3749 VS2_0: vz0 = vx0 + v1 VS2_1 -
3750 VS2_1: vz1 = vx1 + v1 VS2_2 -
3751 VS2_2: vz2 = vx2 + v1 VS2_3 -
3752 VS2_3: vz3 = vx3 + v1 - -
3753 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3754
3755 prev_stmt_info = NULL;
3756 for (j = 0; j < ncopies; j++)
3757 {
3758 /* Handle uses. */
3759 if (j == 0)
4a00c761
JJ
3760 {
3761 if (op_type == binary_op || op_type == ternary_op)
3762 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3763 slp_node, -1);
3764 else
3765 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3766 slp_node, -1);
3767 if (op_type == ternary_op)
36ba4aae 3768 {
9771b263
DN
3769 vec_oprnds2.create (1);
3770 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3771 stmt,
3772 NULL));
36ba4aae 3773 }
4a00c761 3774 }
ebfd146a 3775 else
4a00c761
JJ
3776 {
3777 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3778 if (op_type == ternary_op)
3779 {
9771b263
DN
3780 tree vec_oprnd = vec_oprnds2.pop ();
3781 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3782 vec_oprnd));
4a00c761
JJ
3783 }
3784 }
3785
3786 /* Arguments are ready. Create the new vector stmt. */
9771b263 3787 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3788 {
4a00c761 3789 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3790 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3791 vop2 = ((op_type == ternary_op)
9771b263 3792 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3793 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3794 vop0, vop1, vop2);
4a00c761
JJ
3795 new_temp = make_ssa_name (vec_dest, new_stmt);
3796 gimple_assign_set_lhs (new_stmt, new_temp);
3797 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3798 if (slp_node)
9771b263 3799 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3800 }
3801
4a00c761
JJ
3802 if (slp_node)
3803 continue;
3804
3805 if (j == 0)
3806 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3807 else
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3810 }
3811
9771b263
DN
3812 vec_oprnds0.release ();
3813 vec_oprnds1.release ();
3814 vec_oprnds2.release ();
ebfd146a 3815
ebfd146a
IR
3816 return true;
3817}
3818
c716e67f
XDL
3819/* A helper function to ensure data reference DR's base alignment
3820 for STMT_INFO. */
3821
3822static void
3823ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3824{
3825 if (!dr->aux)
3826 return;
3827
3828 if (((dataref_aux *)dr->aux)->base_misaligned)
3829 {
3830 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3831 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3832
3833 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3834 DECL_USER_ALIGN (base_decl) = 1;
3835 ((dataref_aux *)dr->aux)->base_misaligned = false;
3836 }
3837}
3838
ebfd146a
IR
3839
3840/* Function vectorizable_store.
3841
b8698a0f
L
3842 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3843 can be vectorized.
3844 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3845 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3846 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3847
3848static bool
3849vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3850 slp_tree slp_node)
ebfd146a
IR
3851{
3852 tree scalar_dest;
3853 tree data_ref;
3854 tree op;
3855 tree vec_oprnd = NULL_TREE;
3856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3857 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3858 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3859 tree elem_type;
ebfd146a 3860 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3861 struct loop *loop = NULL;
ebfd146a
IR
3862 enum machine_mode vec_mode;
3863 tree dummy;
3864 enum dr_alignment_support alignment_support_scheme;
3865 tree def;
3866 gimple def_stmt;
3867 enum vect_def_type dt;
3868 stmt_vec_info prev_stmt_info = NULL;
3869 tree dataref_ptr = NULL_TREE;
74bf76ed 3870 tree dataref_offset = NULL_TREE;
fef4d2b3 3871 gimple ptr_incr = NULL;
ebfd146a
IR
3872 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3873 int ncopies;
3874 int j;
3875 gimple next_stmt, first_stmt = NULL;
0d0293ac 3876 bool grouped_store = false;
272c6793 3877 bool store_lanes_p = false;
ebfd146a 3878 unsigned int group_size, i;
6e1aa848
DN
3879 vec<tree> dr_chain = vNULL;
3880 vec<tree> oprnds = vNULL;
3881 vec<tree> result_chain = vNULL;
ebfd146a 3882 bool inv_p;
6e1aa848 3883 vec<tree> vec_oprnds = vNULL;
ebfd146a 3884 bool slp = (slp_node != NULL);
ebfd146a 3885 unsigned int vec_num;
a70d6342 3886 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3887 tree aggr_type;
a70d6342
IR
3888
3889 if (loop_vinfo)
3890 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3891
3892 /* Multiple types in SLP are handled by creating the appropriate number of
3893 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3894 case of SLP. */
437f4a00 3895 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3896 ncopies = 1;
3897 else
3898 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3899
3900 gcc_assert (ncopies >= 1);
3901
3902 /* FORNOW. This restriction should be relaxed. */
a70d6342 3903 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3904 {
73fbfcad 3905 if (dump_enabled_p ())
78c60e3d 3906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3907 "multiple types in nested loop.\n");
ebfd146a
IR
3908 return false;
3909 }
3910
a70d6342 3911 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3912 return false;
3913
8644a673 3914 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3915 return false;
3916
3917 /* Is vectorizable store? */
3918
3919 if (!is_gimple_assign (stmt))
3920 return false;
3921
3922 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3923 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3924 && is_pattern_stmt_p (stmt_info))
3925 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3926 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3927 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3928 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3929 && TREE_CODE (scalar_dest) != COMPONENT_REF
3930 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3931 && TREE_CODE (scalar_dest) != REALPART_EXPR
3932 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3933 return false;
3934
3935 gcc_assert (gimple_assign_single_p (stmt));
3936 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3937 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3938 &def, &dt))
ebfd146a 3939 {
73fbfcad 3940 if (dump_enabled_p ())
78c60e3d 3941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3942 "use not simple.\n");
ebfd146a
IR
3943 return false;
3944 }
3945
272c6793 3946 elem_type = TREE_TYPE (vectype);
ebfd146a 3947 vec_mode = TYPE_MODE (vectype);
7b7b1813 3948
ebfd146a
IR
3949 /* FORNOW. In some cases can vectorize even if data-type not supported
3950 (e.g. - array initialization with 0). */
947131ba 3951 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3952 return false;
3953
3954 if (!STMT_VINFO_DATA_REF (stmt_info))
3955 return false;
3956
a7ce6ec3
RG
3957 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3958 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3959 size_zero_node) < 0)
a1e53f3f 3960 {
73fbfcad 3961 if (dump_enabled_p ())
78c60e3d 3962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3963 "negative step for store.\n");
a1e53f3f
L
3964 return false;
3965 }
3966
0d0293ac 3967 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3968 {
0d0293ac 3969 grouped_store = true;
e14c1050 3970 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3971 if (!slp && !PURE_SLP_STMT (stmt_info))
3972 {
e14c1050 3973 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3974 if (vect_store_lanes_supported (vectype, group_size))
3975 store_lanes_p = true;
0d0293ac 3976 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3977 return false;
3978 }
b8698a0f 3979
ebfd146a
IR
3980 if (first_stmt == stmt)
3981 {
3982 /* STMT is the leader of the group. Check the operands of all the
3983 stmts of the group. */
e14c1050 3984 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3985 while (next_stmt)
3986 {
3987 gcc_assert (gimple_assign_single_p (next_stmt));
3988 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3989 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3990 &def_stmt, &def, &dt))
ebfd146a 3991 {
73fbfcad 3992 if (dump_enabled_p ())
78c60e3d 3993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3994 "use not simple.\n");
ebfd146a
IR
3995 return false;
3996 }
e14c1050 3997 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3998 }
3999 }
4000 }
4001
4002 if (!vec_stmt) /* transformation not required. */
4003 {
4004 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
4005 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4006 NULL, NULL, NULL);
ebfd146a
IR
4007 return true;
4008 }
4009
4010 /** Transform. **/
4011
c716e67f
XDL
4012 ensure_base_align (stmt_info, dr);
4013
0d0293ac 4014 if (grouped_store)
ebfd146a
IR
4015 {
4016 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4017 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4018
e14c1050 4019 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4020
4021 /* FORNOW */
a70d6342 4022 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4023
4024 /* We vectorize all the stmts of the interleaving group when we
4025 reach the last stmt in the group. */
e14c1050
IR
4026 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4027 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4028 && !slp)
4029 {
4030 *vec_stmt = NULL;
4031 return true;
4032 }
4033
4034 if (slp)
4b5caab7 4035 {
0d0293ac 4036 grouped_store = false;
4b5caab7
IR
4037 /* VEC_NUM is the number of vect stmts to be created for this
4038 group. */
4039 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4040 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4041 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4042 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4043 }
ebfd146a 4044 else
4b5caab7
IR
4045 /* VEC_NUM is the number of vect stmts to be created for this
4046 group. */
ebfd146a
IR
4047 vec_num = group_size;
4048 }
b8698a0f 4049 else
ebfd146a
IR
4050 {
4051 first_stmt = stmt;
4052 first_dr = dr;
4053 group_size = vec_num = 1;
ebfd146a 4054 }
b8698a0f 4055
73fbfcad 4056 if (dump_enabled_p ())
78c60e3d 4057 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4058 "transform store. ncopies = %d\n", ncopies);
ebfd146a 4059
9771b263
DN
4060 dr_chain.create (group_size);
4061 oprnds.create (group_size);
ebfd146a 4062
720f5239 4063 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4064 gcc_assert (alignment_support_scheme);
272c6793
RS
4065 /* Targets with store-lane instructions must not require explicit
4066 realignment. */
4067 gcc_assert (!store_lanes_p
4068 || alignment_support_scheme == dr_aligned
4069 || alignment_support_scheme == dr_unaligned_supported);
4070
4071 if (store_lanes_p)
4072 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4073 else
4074 aggr_type = vectype;
ebfd146a
IR
4075
4076 /* In case the vectorization factor (VF) is bigger than the number
4077 of elements that we can fit in a vectype (nunits), we have to generate
4078 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4079 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4080 vect_get_vec_def_for_copy_stmt. */
4081
0d0293ac 4082 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4083
4084 S1: &base + 2 = x2
4085 S2: &base = x0
4086 S3: &base + 1 = x1
4087 S4: &base + 3 = x3
4088
4089 We create vectorized stores starting from base address (the access of the
4090 first stmt in the chain (S2 in the above example), when the last store stmt
4091 of the chain (S4) is reached:
4092
4093 VS1: &base = vx2
4094 VS2: &base + vec_size*1 = vx0
4095 VS3: &base + vec_size*2 = vx1
4096 VS4: &base + vec_size*3 = vx3
4097
4098 Then permutation statements are generated:
4099
3fcc1b55
JJ
4100 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4101 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4102 ...
b8698a0f 4103
ebfd146a
IR
4104 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4105 (the order of the data-refs in the output of vect_permute_store_chain
4106 corresponds to the order of scalar stmts in the interleaving chain - see
4107 the documentation of vect_permute_store_chain()).
4108
4109 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4110 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4111 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4112 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4113 */
4114
4115 prev_stmt_info = NULL;
4116 for (j = 0; j < ncopies; j++)
4117 {
4118 gimple new_stmt;
ebfd146a
IR
4119
4120 if (j == 0)
4121 {
4122 if (slp)
4123 {
4124 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4125 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4126 NULL, slp_node, -1);
ebfd146a 4127
9771b263 4128 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4129 }
4130 else
4131 {
b8698a0f
L
4132 /* For interleaved stores we collect vectorized defs for all the
4133 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4134 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4135 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4136
0d0293ac 4137 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4138 OPRNDS are of size 1. */
b8698a0f 4139 next_stmt = first_stmt;
ebfd146a
IR
4140 for (i = 0; i < group_size; i++)
4141 {
b8698a0f
L
4142 /* Since gaps are not supported for interleaved stores,
4143 GROUP_SIZE is the exact number of stmts in the chain.
4144 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4145 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4146 iteration of the loop will be executed. */
4147 gcc_assert (next_stmt
4148 && gimple_assign_single_p (next_stmt));
4149 op = gimple_assign_rhs1 (next_stmt);
4150
b8698a0f 4151 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4152 NULL);
9771b263
DN
4153 dr_chain.quick_push (vec_oprnd);
4154 oprnds.quick_push (vec_oprnd);
e14c1050 4155 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4156 }
4157 }
4158
4159 /* We should have catched mismatched types earlier. */
4160 gcc_assert (useless_type_conversion_p (vectype,
4161 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4162 bool simd_lane_access_p
4163 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4164 if (simd_lane_access_p
4165 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4166 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4167 && integer_zerop (DR_OFFSET (first_dr))
4168 && integer_zerop (DR_INIT (first_dr))
4169 && alias_sets_conflict_p (get_alias_set (aggr_type),
4170 get_alias_set (DR_REF (first_dr))))
4171 {
4172 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4173 dataref_offset = build_int_cst (reference_alias_ptr_type
4174 (DR_REF (first_dr)), 0);
8928eff3 4175 inv_p = false;
74bf76ed
JJ
4176 }
4177 else
4178 dataref_ptr
4179 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4180 simd_lane_access_p ? loop : NULL,
4181 NULL_TREE, &dummy, gsi, &ptr_incr,
4182 simd_lane_access_p, &inv_p);
a70d6342 4183 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4184 }
b8698a0f 4185 else
ebfd146a 4186 {
b8698a0f
L
4187 /* For interleaved stores we created vectorized defs for all the
4188 defs stored in OPRNDS in the previous iteration (previous copy).
4189 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4190 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4191 next copy.
0d0293ac 4192 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4193 OPRNDS are of size 1. */
4194 for (i = 0; i < group_size; i++)
4195 {
9771b263 4196 op = oprnds[i];
24ee1384
IR
4197 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4198 &def, &dt);
b8698a0f 4199 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4200 dr_chain[i] = vec_oprnd;
4201 oprnds[i] = vec_oprnd;
ebfd146a 4202 }
74bf76ed
JJ
4203 if (dataref_offset)
4204 dataref_offset
4205 = int_const_binop (PLUS_EXPR, dataref_offset,
4206 TYPE_SIZE_UNIT (aggr_type));
4207 else
4208 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4209 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4210 }
4211
272c6793 4212 if (store_lanes_p)
ebfd146a 4213 {
272c6793 4214 tree vec_array;
267d3070 4215
272c6793
RS
4216 /* Combine all the vectors into an array. */
4217 vec_array = create_vector_array (vectype, vec_num);
4218 for (i = 0; i < vec_num; i++)
c2d7ab2a 4219 {
9771b263 4220 vec_oprnd = dr_chain[i];
272c6793 4221 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4222 }
b8698a0f 4223
272c6793
RS
4224 /* Emit:
4225 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4226 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4227 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4228 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4230 }
4231 else
4232 {
4233 new_stmt = NULL;
0d0293ac 4234 if (grouped_store)
272c6793 4235 {
b6b9227d
JJ
4236 if (j == 0)
4237 result_chain.create (group_size);
272c6793
RS
4238 /* Permute. */
4239 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4240 &result_chain);
4241 }
c2d7ab2a 4242
272c6793
RS
4243 next_stmt = first_stmt;
4244 for (i = 0; i < vec_num; i++)
4245 {
644ffefd 4246 unsigned align, misalign;
272c6793
RS
4247
4248 if (i > 0)
4249 /* Bump the vector pointer. */
4250 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4251 stmt, NULL_TREE);
4252
4253 if (slp)
9771b263 4254 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4255 else if (grouped_store)
4256 /* For grouped stores vectorized defs are interleaved in
272c6793 4257 vect_permute_store_chain(). */
9771b263 4258 vec_oprnd = result_chain[i];
272c6793
RS
4259
4260 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4261 dataref_offset
4262 ? dataref_offset
4263 : build_int_cst (reference_alias_ptr_type
4264 (DR_REF (first_dr)), 0));
644ffefd 4265 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4266 if (aligned_access_p (first_dr))
644ffefd 4267 misalign = 0;
272c6793
RS
4268 else if (DR_MISALIGNMENT (first_dr) == -1)
4269 {
4270 TREE_TYPE (data_ref)
4271 = build_aligned_type (TREE_TYPE (data_ref),
4272 TYPE_ALIGN (elem_type));
644ffefd
MJ
4273 align = TYPE_ALIGN_UNIT (elem_type);
4274 misalign = 0;
272c6793
RS
4275 }
4276 else
4277 {
4278 TREE_TYPE (data_ref)
4279 = build_aligned_type (TREE_TYPE (data_ref),
4280 TYPE_ALIGN (elem_type));
644ffefd 4281 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4282 }
74bf76ed
JJ
4283 if (dataref_offset == NULL_TREE)
4284 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4285 misalign);
c2d7ab2a 4286
272c6793
RS
4287 /* Arguments are ready. Create the new vector stmt. */
4288 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4289 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4290
4291 if (slp)
4292 continue;
4293
e14c1050 4294 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4295 if (!next_stmt)
4296 break;
4297 }
ebfd146a 4298 }
1da0876c
RS
4299 if (!slp)
4300 {
4301 if (j == 0)
4302 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4303 else
4304 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4305 prev_stmt_info = vinfo_for_stmt (new_stmt);
4306 }
ebfd146a
IR
4307 }
4308
9771b263
DN
4309 dr_chain.release ();
4310 oprnds.release ();
4311 result_chain.release ();
4312 vec_oprnds.release ();
ebfd146a
IR
4313
4314 return true;
4315}
4316
aec7ae7d
JJ
4317/* Given a vector type VECTYPE and permutation SEL returns
4318 the VECTOR_CST mask that implements the permutation of the
4319 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4320
3fcc1b55
JJ
4321tree
4322vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4323{
d2a12ae7 4324 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4325 int i, nunits;
a1e53f3f 4326
22e4dee7 4327 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4328
4329 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4330 return NULL;
4331
96f9265a
RG
4332 mask_elt_type = lang_hooks.types.type_for_mode
4333 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4334 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4335
d2a12ae7 4336 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4337 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4338 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4339 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4340
2635892a 4341 return mask_vec;
a1e53f3f
L
4342}
4343
aec7ae7d
JJ
4344/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4345 reversal of the vector elements. If that is impossible to do,
4346 returns NULL. */
4347
4348static tree
4349perm_mask_for_reverse (tree vectype)
4350{
4351 int i, nunits;
4352 unsigned char *sel;
4353
4354 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4355 sel = XALLOCAVEC (unsigned char, nunits);
4356
4357 for (i = 0; i < nunits; ++i)
4358 sel[i] = nunits - 1 - i;
4359
3fcc1b55 4360 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4361}
4362
4363/* Given a vector variable X and Y, that was generated for the scalar
4364 STMT, generate instructions to permute the vector elements of X and Y
4365 using permutation mask MASK_VEC, insert them at *GSI and return the
4366 permuted vector variable. */
a1e53f3f
L
4367
4368static tree
aec7ae7d
JJ
4369permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4370 gimple_stmt_iterator *gsi)
a1e53f3f
L
4371{
4372 tree vectype = TREE_TYPE (x);
aec7ae7d 4373 tree perm_dest, data_ref;
a1e53f3f
L
4374 gimple perm_stmt;
4375
a1e53f3f 4376 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4377 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4378
4379 /* Generate the permute statement. */
73804b12
RG
4380 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4381 x, y, mask_vec);
a1e53f3f
L
4382 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4383
4384 return data_ref;
4385}
4386
ebfd146a
IR
4387/* vectorizable_load.
4388
b8698a0f
L
4389 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4390 can be vectorized.
4391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4392 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4393 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4394
4395static bool
4396vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4397 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4398{
4399 tree scalar_dest;
4400 tree vec_dest = NULL;
4401 tree data_ref = NULL;
4402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4403 stmt_vec_info prev_stmt_info;
ebfd146a 4404 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4405 struct loop *loop = NULL;
ebfd146a 4406 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4407 bool nested_in_vect_loop = false;
c716e67f 4408 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4409 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4410 tree elem_type;
ebfd146a 4411 tree new_temp;
947131ba 4412 enum machine_mode mode;
ebfd146a
IR
4413 gimple new_stmt = NULL;
4414 tree dummy;
4415 enum dr_alignment_support alignment_support_scheme;
4416 tree dataref_ptr = NULL_TREE;
74bf76ed 4417 tree dataref_offset = NULL_TREE;
fef4d2b3 4418 gimple ptr_incr = NULL;
ebfd146a
IR
4419 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4420 int ncopies;
a64b9c26 4421 int i, j, group_size, group_gap;
ebfd146a
IR
4422 tree msq = NULL_TREE, lsq;
4423 tree offset = NULL_TREE;
4424 tree realignment_token = NULL_TREE;
4425 gimple phi = NULL;
6e1aa848 4426 vec<tree> dr_chain = vNULL;
0d0293ac 4427 bool grouped_load = false;
272c6793 4428 bool load_lanes_p = false;
ebfd146a 4429 gimple first_stmt;
ebfd146a 4430 bool inv_p;
319e6439 4431 bool negative = false;
ebfd146a
IR
4432 bool compute_in_loop = false;
4433 struct loop *at_loop;
4434 int vec_num;
4435 bool slp = (slp_node != NULL);
4436 bool slp_perm = false;
4437 enum tree_code code;
a70d6342
IR
4438 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4439 int vf;
272c6793 4440 tree aggr_type;
aec7ae7d
JJ
4441 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4442 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4443 int gather_scale = 1;
4444 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4445
4446 if (loop_vinfo)
4447 {
4448 loop = LOOP_VINFO_LOOP (loop_vinfo);
4449 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4450 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4451 }
4452 else
3533e503 4453 vf = 1;
ebfd146a
IR
4454
4455 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4457 case of SLP. */
437f4a00 4458 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4459 ncopies = 1;
4460 else
4461 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4462
4463 gcc_assert (ncopies >= 1);
4464
4465 /* FORNOW. This restriction should be relaxed. */
4466 if (nested_in_vect_loop && ncopies > 1)
4467 {
73fbfcad 4468 if (dump_enabled_p ())
78c60e3d 4469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4470 "multiple types in nested loop.\n");
ebfd146a
IR
4471 return false;
4472 }
4473
a70d6342 4474 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4475 return false;
4476
8644a673 4477 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4478 return false;
4479
4480 /* Is vectorizable load? */
4481 if (!is_gimple_assign (stmt))
4482 return false;
4483
4484 scalar_dest = gimple_assign_lhs (stmt);
4485 if (TREE_CODE (scalar_dest) != SSA_NAME)
4486 return false;
4487
4488 code = gimple_assign_rhs_code (stmt);
4489 if (code != ARRAY_REF
38000232 4490 && code != BIT_FIELD_REF
ebfd146a 4491 && code != INDIRECT_REF
e9dbe7bb
IR
4492 && code != COMPONENT_REF
4493 && code != IMAGPART_EXPR
70f34814 4494 && code != REALPART_EXPR
42373e0b
RG
4495 && code != MEM_REF
4496 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4497 return false;
4498
4499 if (!STMT_VINFO_DATA_REF (stmt_info))
4500 return false;
4501
7b7b1813 4502 elem_type = TREE_TYPE (vectype);
947131ba 4503 mode = TYPE_MODE (vectype);
ebfd146a
IR
4504
4505 /* FORNOW. In some cases can vectorize even if data-type not supported
4506 (e.g. - data copies). */
947131ba 4507 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4508 {
73fbfcad 4509 if (dump_enabled_p ())
78c60e3d 4510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4511 "Aligned load, but unsupported type.\n");
ebfd146a
IR
4512 return false;
4513 }
4514
ebfd146a 4515 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4516 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4517 {
0d0293ac 4518 grouped_load = true;
ebfd146a 4519 /* FORNOW */
aec7ae7d 4520 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4521
e14c1050 4522 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4523 if (!slp && !PURE_SLP_STMT (stmt_info))
4524 {
e14c1050 4525 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4526 if (vect_load_lanes_supported (vectype, group_size))
4527 load_lanes_p = true;
0d0293ac 4528 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4529 return false;
4530 }
ebfd146a
IR
4531 }
4532
a1e53f3f 4533
aec7ae7d
JJ
4534 if (STMT_VINFO_GATHER_P (stmt_info))
4535 {
4536 gimple def_stmt;
4537 tree def;
4538 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4539 &gather_off, &gather_scale);
4540 gcc_assert (gather_decl);
24ee1384 4541 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4542 &def_stmt, &def, &gather_dt,
4543 &gather_off_vectype))
4544 {
73fbfcad 4545 if (dump_enabled_p ())
78c60e3d 4546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4547 "gather index use not simple.\n");
aec7ae7d
JJ
4548 return false;
4549 }
4550 }
7d75abc8 4551 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4552 ;
319e6439
RG
4553 else
4554 {
4555 negative = tree_int_cst_compare (nested_in_vect_loop
4556 ? STMT_VINFO_DR_STEP (stmt_info)
4557 : DR_STEP (dr),
4558 size_zero_node) < 0;
4559 if (negative && ncopies > 1)
4560 {
73fbfcad 4561 if (dump_enabled_p ())
78c60e3d 4562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4563 "multiple types with negative step.\n");
319e6439
RG
4564 return false;
4565 }
4566
4567 if (negative)
4568 {
08940f33
RB
4569 if (grouped_load)
4570 {
4571 if (dump_enabled_p ())
4572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4573 "negative step for group load not supported"
4574 "\n");
08940f33
RB
4575 return false;
4576 }
319e6439
RG
4577 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4578 if (alignment_support_scheme != dr_aligned
4579 && alignment_support_scheme != dr_unaligned_supported)
4580 {
73fbfcad 4581 if (dump_enabled_p ())
78c60e3d 4582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4583 "negative step but alignment required.\n");
319e6439
RG
4584 return false;
4585 }
4586 if (!perm_mask_for_reverse (vectype))
4587 {
73fbfcad 4588 if (dump_enabled_p ())
78c60e3d 4589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4590 "negative step and reversing not supported."
4591 "\n");
319e6439
RG
4592 return false;
4593 }
4594 }
7d75abc8 4595 }
aec7ae7d 4596
ebfd146a
IR
4597 if (!vec_stmt) /* transformation not required. */
4598 {
4599 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4600 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4601 return true;
4602 }
4603
73fbfcad 4604 if (dump_enabled_p ())
78c60e3d 4605 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4606 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
4607
4608 /** Transform. **/
4609
c716e67f
XDL
4610 ensure_base_align (stmt_info, dr);
4611
aec7ae7d
JJ
4612 if (STMT_VINFO_GATHER_P (stmt_info))
4613 {
4614 tree vec_oprnd0 = NULL_TREE, op;
4615 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4616 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4617 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4618 edge pe = loop_preheader_edge (loop);
4619 gimple_seq seq;
4620 basic_block new_bb;
4621 enum { NARROW, NONE, WIDEN } modifier;
4622 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4623
4624 if (nunits == gather_off_nunits)
4625 modifier = NONE;
4626 else if (nunits == gather_off_nunits / 2)
4627 {
4628 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4629 modifier = WIDEN;
4630
4631 for (i = 0; i < gather_off_nunits; ++i)
4632 sel[i] = i | nunits;
4633
3fcc1b55 4634 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4635 gcc_assert (perm_mask != NULL_TREE);
4636 }
4637 else if (nunits == gather_off_nunits * 2)
4638 {
4639 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4640 modifier = NARROW;
4641
4642 for (i = 0; i < nunits; ++i)
4643 sel[i] = i < gather_off_nunits
4644 ? i : i + nunits - gather_off_nunits;
4645
3fcc1b55 4646 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4647 gcc_assert (perm_mask != NULL_TREE);
4648 ncopies *= 2;
4649 }
4650 else
4651 gcc_unreachable ();
4652
4653 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4654 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4655 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4656 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4657 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4658 scaletype = TREE_VALUE (arglist);
4659 gcc_checking_assert (types_compatible_p (srctype, rettype)
4660 && types_compatible_p (srctype, masktype));
4661
4662 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4663
4664 ptr = fold_convert (ptrtype, gather_base);
4665 if (!is_gimple_min_invariant (ptr))
4666 {
4667 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4668 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4669 gcc_assert (!new_bb);
4670 }
4671
4672 /* Currently we support only unconditional gather loads,
4673 so mask should be all ones. */
4674 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4675 mask = build_int_cst (TREE_TYPE (masktype), -1);
4676 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4677 {
4678 REAL_VALUE_TYPE r;
4679 long tmp[6];
4680 for (j = 0; j < 6; ++j)
4681 tmp[j] = -1;
4682 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4683 mask = build_real (TREE_TYPE (masktype), r);
4684 }
4685 else
4686 gcc_unreachable ();
4687 mask = build_vector_from_val (masktype, mask);
4688 mask = vect_init_vector (stmt, mask, masktype, NULL);
4689
4690 scale = build_int_cst (scaletype, gather_scale);
4691
4692 prev_stmt_info = NULL;
4693 for (j = 0; j < ncopies; ++j)
4694 {
4695 if (modifier == WIDEN && (j & 1))
4696 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4697 perm_mask, stmt, gsi);
4698 else if (j == 0)
4699 op = vec_oprnd0
4700 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4701 else
4702 op = vec_oprnd0
4703 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4704
4705 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4706 {
4707 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4708 == TYPE_VECTOR_SUBPARTS (idxtype));
4709 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4710 var = make_ssa_name (var, NULL);
4711 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4712 new_stmt
4713 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4714 op, NULL_TREE);
4715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4716 op = var;
4717 }
4718
4719 new_stmt
4720 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4721
4722 if (!useless_type_conversion_p (vectype, rettype))
4723 {
4724 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4725 == TYPE_VECTOR_SUBPARTS (rettype));
4726 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4727 op = make_ssa_name (var, new_stmt);
4728 gimple_call_set_lhs (new_stmt, op);
4729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4730 var = make_ssa_name (vec_dest, NULL);
4731 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4732 new_stmt
4733 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4734 NULL_TREE);
4735 }
4736 else
4737 {
4738 var = make_ssa_name (vec_dest, new_stmt);
4739 gimple_call_set_lhs (new_stmt, var);
4740 }
4741
4742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4743
4744 if (modifier == NARROW)
4745 {
4746 if ((j & 1) == 0)
4747 {
4748 prev_res = var;
4749 continue;
4750 }
4751 var = permute_vec_elements (prev_res, var,
4752 perm_mask, stmt, gsi);
4753 new_stmt = SSA_NAME_DEF_STMT (var);
4754 }
4755
4756 if (prev_stmt_info == NULL)
4757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4758 else
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4760 prev_stmt_info = vinfo_for_stmt (new_stmt);
4761 }
4762 return true;
4763 }
7d75abc8
MM
4764 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4765 {
4766 gimple_stmt_iterator incr_gsi;
4767 bool insert_after;
4768 gimple incr;
4769 tree offvar;
7d75abc8
MM
4770 tree ivstep;
4771 tree running_off;
9771b263 4772 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4773 gimple_seq stmts = NULL;
14ac6aa2
RB
4774 tree stride_base, stride_step, alias_off;
4775
4776 gcc_assert (!nested_in_vect_loop);
7d75abc8 4777
14ac6aa2
RB
4778 stride_base
4779 = fold_build_pointer_plus
4780 (unshare_expr (DR_BASE_ADDRESS (dr)),
4781 size_binop (PLUS_EXPR,
4782 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 4783 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 4784 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4785
4786 /* For a load with loop-invariant (but other than power-of-2)
4787 stride (i.e. not a grouped access) like so:
4788
4789 for (i = 0; i < n; i += stride)
4790 ... = array[i];
4791
4792 we generate a new induction variable and new accesses to
4793 form a new vector (or vectors, depending on ncopies):
4794
4795 for (j = 0; ; j += VF*stride)
4796 tmp1 = array[j];
4797 tmp2 = array[j + stride];
4798 ...
4799 vectemp = {tmp1, tmp2, ...}
4800 */
4801
4802 ivstep = stride_step;
4803 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4804 build_int_cst (TREE_TYPE (ivstep), vf));
4805
4806 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4807
4808 create_iv (stride_base, ivstep, NULL,
4809 loop, &incr_gsi, insert_after,
4810 &offvar, NULL);
4811 incr = gsi_stmt (incr_gsi);
4812 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4813
4814 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4815 if (stmts)
4816 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4817
4818 prev_stmt_info = NULL;
4819 running_off = offvar;
14ac6aa2 4820 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4821 for (j = 0; j < ncopies; j++)
4822 {
4823 tree vec_inv;
4824
9771b263 4825 vec_alloc (v, nunits);
7d75abc8
MM
4826 for (i = 0; i < nunits; i++)
4827 {
4828 tree newref, newoff;
4829 gimple incr;
14ac6aa2
RB
4830 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4831 running_off, alias_off);
7d75abc8
MM
4832
4833 newref = force_gimple_operand_gsi (gsi, newref, true,
4834 NULL_TREE, true,
4835 GSI_SAME_STMT);
4836 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4837 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4838 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4839 running_off, stride_step);
7d75abc8
MM
4840 vect_finish_stmt_generation (stmt, incr, gsi);
4841
4842 running_off = newoff;
4843 }
4844
4845 vec_inv = build_constructor (vectype, v);
4846 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4847 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4848
4849 if (j == 0)
4850 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4851 else
4852 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4853 prev_stmt_info = vinfo_for_stmt (new_stmt);
4854 }
4855 return true;
4856 }
aec7ae7d 4857
0d0293ac 4858 if (grouped_load)
ebfd146a 4859 {
e14c1050 4860 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4861 if (slp
01d8bf07 4862 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4863 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4864 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4865
ebfd146a 4866 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4867 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4868 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4869 ??? But we can only do so if there is exactly one
4870 as we have no way to get at the rest. Leave the CSE
4871 opportunity alone.
4872 ??? With the group load eventually participating
4873 in multiple different permutations (having multiple
4874 slp nodes which refer to the same group) the CSE
4875 is even wrong code. See PR56270. */
4876 && !slp)
ebfd146a
IR
4877 {
4878 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4879 return true;
4880 }
4881 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4882 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4883
4884 /* VEC_NUM is the number of vect stmts to be created for this group. */
4885 if (slp)
4886 {
0d0293ac 4887 grouped_load = false;
ebfd146a 4888 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4889 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4890 slp_perm = true;
a64b9c26 4891 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4892 }
ebfd146a 4893 else
a64b9c26
RB
4894 {
4895 vec_num = group_size;
4896 group_gap = 0;
4897 }
ebfd146a
IR
4898 }
4899 else
4900 {
4901 first_stmt = stmt;
4902 first_dr = dr;
4903 group_size = vec_num = 1;
a64b9c26 4904 group_gap = 0;
ebfd146a
IR
4905 }
4906
720f5239 4907 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4908 gcc_assert (alignment_support_scheme);
272c6793
RS
4909 /* Targets with load-lane instructions must not require explicit
4910 realignment. */
4911 gcc_assert (!load_lanes_p
4912 || alignment_support_scheme == dr_aligned
4913 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4914
4915 /* In case the vectorization factor (VF) is bigger than the number
4916 of elements that we can fit in a vectype (nunits), we have to generate
4917 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4918 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4919 from one copy of the vector stmt to the next, in the field
ff802fa1 4920 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4921 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4922 stmts that use the defs of the current stmt. The example below
4923 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4924 need to create 4 vectorized stmts):
ebfd146a
IR
4925
4926 before vectorization:
4927 RELATED_STMT VEC_STMT
4928 S1: x = memref - -
4929 S2: z = x + 1 - -
4930
4931 step 1: vectorize stmt S1:
4932 We first create the vector stmt VS1_0, and, as usual, record a
4933 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4934 Next, we create the vector stmt VS1_1, and record a pointer to
4935 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4936 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4937 stmts and pointers:
4938 RELATED_STMT VEC_STMT
4939 VS1_0: vx0 = memref0 VS1_1 -
4940 VS1_1: vx1 = memref1 VS1_2 -
4941 VS1_2: vx2 = memref2 VS1_3 -
4942 VS1_3: vx3 = memref3 - -
4943 S1: x = load - VS1_0
4944 S2: z = x + 1 - -
4945
b8698a0f
L
4946 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4947 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4948 stmt S2. */
4949
0d0293ac 4950 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4951
4952 S1: x2 = &base + 2
4953 S2: x0 = &base
4954 S3: x1 = &base + 1
4955 S4: x3 = &base + 3
4956
b8698a0f 4957 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4958 starting from the access of the first stmt of the chain:
4959
4960 VS1: vx0 = &base
4961 VS2: vx1 = &base + vec_size*1
4962 VS3: vx3 = &base + vec_size*2
4963 VS4: vx4 = &base + vec_size*3
4964
4965 Then permutation statements are generated:
4966
e2c83630
RH
4967 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4968 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4969 ...
4970
4971 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4972 (the order of the data-refs in the output of vect_permute_load_chain
4973 corresponds to the order of scalar stmts in the interleaving chain - see
4974 the documentation of vect_permute_load_chain()).
4975 The generation of permutation stmts and recording them in
0d0293ac 4976 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4977
b8698a0f 4978 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4979 permutation stmts above are created for every copy. The result vector
4980 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4981 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4982
4983 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4984 on a target that supports unaligned accesses (dr_unaligned_supported)
4985 we generate the following code:
4986 p = initial_addr;
4987 indx = 0;
4988 loop {
4989 p = p + indx * vectype_size;
4990 vec_dest = *(p);
4991 indx = indx + 1;
4992 }
4993
4994 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4995 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4996 then generate the following code, in which the data in each iteration is
4997 obtained by two vector loads, one from the previous iteration, and one
4998 from the current iteration:
4999 p1 = initial_addr;
5000 msq_init = *(floor(p1))
5001 p2 = initial_addr + VS - 1;
5002 realignment_token = call target_builtin;
5003 indx = 0;
5004 loop {
5005 p2 = p2 + indx * vectype_size
5006 lsq = *(floor(p2))
5007 vec_dest = realign_load (msq, lsq, realignment_token)
5008 indx = indx + 1;
5009 msq = lsq;
5010 } */
5011
5012 /* If the misalignment remains the same throughout the execution of the
5013 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5014 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5015 This can only occur when vectorizing memory accesses in the inner-loop
5016 nested within an outer-loop that is being vectorized. */
5017
d1e4b493 5018 if (nested_in_vect_loop
211bea38 5019 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5020 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5021 {
5022 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5023 compute_in_loop = true;
5024 }
5025
5026 if ((alignment_support_scheme == dr_explicit_realign_optimized
5027 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5028 && !compute_in_loop)
ebfd146a
IR
5029 {
5030 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5031 alignment_support_scheme, NULL_TREE,
5032 &at_loop);
5033 if (alignment_support_scheme == dr_explicit_realign_optimized)
5034 {
5035 phi = SSA_NAME_DEF_STMT (msq);
5036 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5037 }
5038 }
5039 else
5040 at_loop = loop;
5041
a1e53f3f
L
5042 if (negative)
5043 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5044
272c6793
RS
5045 if (load_lanes_p)
5046 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5047 else
5048 aggr_type = vectype;
5049
ebfd146a
IR
5050 prev_stmt_info = NULL;
5051 for (j = 0; j < ncopies; j++)
b8698a0f 5052 {
272c6793 5053 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5054 if (j == 0)
74bf76ed
JJ
5055 {
5056 bool simd_lane_access_p
5057 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5058 if (simd_lane_access_p
5059 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5060 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5061 && integer_zerop (DR_OFFSET (first_dr))
5062 && integer_zerop (DR_INIT (first_dr))
5063 && alias_sets_conflict_p (get_alias_set (aggr_type),
5064 get_alias_set (DR_REF (first_dr)))
5065 && (alignment_support_scheme == dr_aligned
5066 || alignment_support_scheme == dr_unaligned_supported))
5067 {
5068 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5069 dataref_offset = build_int_cst (reference_alias_ptr_type
5070 (DR_REF (first_dr)), 0);
8928eff3 5071 inv_p = false;
74bf76ed
JJ
5072 }
5073 else
5074 dataref_ptr
5075 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5076 offset, &dummy, gsi, &ptr_incr,
5077 simd_lane_access_p, &inv_p);
5078 }
5079 else if (dataref_offset)
5080 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5081 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5082 else
272c6793
RS
5083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5084 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5085
0d0293ac 5086 if (grouped_load || slp_perm)
9771b263 5087 dr_chain.create (vec_num);
5ce1ee7f 5088
272c6793 5089 if (load_lanes_p)
ebfd146a 5090 {
272c6793
RS
5091 tree vec_array;
5092
5093 vec_array = create_vector_array (vectype, vec_num);
5094
5095 /* Emit:
5096 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5097 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5098 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5099 gimple_call_set_lhs (new_stmt, vec_array);
5100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5101
272c6793
RS
5102 /* Extract each vector into an SSA_NAME. */
5103 for (i = 0; i < vec_num; i++)
ebfd146a 5104 {
272c6793
RS
5105 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5106 vec_array, i);
9771b263 5107 dr_chain.quick_push (new_temp);
272c6793
RS
5108 }
5109
5110 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5111 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5112 }
5113 else
5114 {
5115 for (i = 0; i < vec_num; i++)
5116 {
5117 if (i > 0)
5118 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5119 stmt, NULL_TREE);
5120
5121 /* 2. Create the vector-load in the loop. */
5122 switch (alignment_support_scheme)
5123 {
5124 case dr_aligned:
5125 case dr_unaligned_supported:
be1ac4ec 5126 {
644ffefd
MJ
5127 unsigned int align, misalign;
5128
272c6793
RS
5129 data_ref
5130 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5131 dataref_offset
5132 ? dataref_offset
5133 : build_int_cst (reference_alias_ptr_type
5134 (DR_REF (first_dr)), 0));
644ffefd 5135 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5136 if (alignment_support_scheme == dr_aligned)
5137 {
5138 gcc_assert (aligned_access_p (first_dr));
644ffefd 5139 misalign = 0;
272c6793
RS
5140 }
5141 else if (DR_MISALIGNMENT (first_dr) == -1)
5142 {
5143 TREE_TYPE (data_ref)
5144 = build_aligned_type (TREE_TYPE (data_ref),
5145 TYPE_ALIGN (elem_type));
644ffefd
MJ
5146 align = TYPE_ALIGN_UNIT (elem_type);
5147 misalign = 0;
272c6793
RS
5148 }
5149 else
5150 {
5151 TREE_TYPE (data_ref)
5152 = build_aligned_type (TREE_TYPE (data_ref),
5153 TYPE_ALIGN (elem_type));
644ffefd 5154 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5155 }
74bf76ed
JJ
5156 if (dataref_offset == NULL_TREE)
5157 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5158 align, misalign);
272c6793 5159 break;
be1ac4ec 5160 }
272c6793 5161 case dr_explicit_realign:
267d3070 5162 {
272c6793
RS
5163 tree ptr, bump;
5164 tree vs_minus_1;
5165
5166 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5167
5168 if (compute_in_loop)
5169 msq = vect_setup_realignment (first_stmt, gsi,
5170 &realignment_token,
5171 dr_explicit_realign,
5172 dataref_ptr, NULL);
5173
070ecdfd 5174 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5175 new_stmt = gimple_build_assign_with_ops
070ecdfd 5176 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5177 build_int_cst
5178 (TREE_TYPE (dataref_ptr),
5179 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5181 data_ref
5182 = build2 (MEM_REF, vectype, ptr,
5183 build_int_cst (reference_alias_ptr_type
5184 (DR_REF (first_dr)), 0));
5185 vec_dest = vect_create_destination_var (scalar_dest,
5186 vectype);
5187 new_stmt = gimple_build_assign (vec_dest, data_ref);
5188 new_temp = make_ssa_name (vec_dest, new_stmt);
5189 gimple_assign_set_lhs (new_stmt, new_temp);
5190 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5191 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5193 msq = new_temp;
5194
5195 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5196 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5197 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5198 new_stmt = gimple_build_assign_with_ops
5199 (BIT_AND_EXPR, NULL_TREE, ptr,
5200 build_int_cst
5201 (TREE_TYPE (ptr),
5202 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5203 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5204 gimple_assign_set_lhs (new_stmt, ptr);
5205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5206 data_ref
5207 = build2 (MEM_REF, vectype, ptr,
5208 build_int_cst (reference_alias_ptr_type
5209 (DR_REF (first_dr)), 0));
5210 break;
267d3070 5211 }
272c6793 5212 case dr_explicit_realign_optimized:
070ecdfd 5213 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5214 new_stmt = gimple_build_assign_with_ops
070ecdfd 5215 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5216 build_int_cst
5217 (TREE_TYPE (dataref_ptr),
5218 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5219 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5220 data_ref
5221 = build2 (MEM_REF, vectype, new_temp,
5222 build_int_cst (reference_alias_ptr_type
5223 (DR_REF (first_dr)), 0));
5224 break;
5225 default:
5226 gcc_unreachable ();
5227 }
ebfd146a 5228 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5229 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5230 new_temp = make_ssa_name (vec_dest, new_stmt);
5231 gimple_assign_set_lhs (new_stmt, new_temp);
5232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5233
272c6793
RS
5234 /* 3. Handle explicit realignment if necessary/supported.
5235 Create in loop:
5236 vec_dest = realign_load (msq, lsq, realignment_token) */
5237 if (alignment_support_scheme == dr_explicit_realign_optimized
5238 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5239 {
272c6793
RS
5240 lsq = gimple_assign_lhs (new_stmt);
5241 if (!realignment_token)
5242 realignment_token = dataref_ptr;
5243 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5244 new_stmt
73804b12
RG
5245 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5246 vec_dest, msq, lsq,
5247 realignment_token);
272c6793
RS
5248 new_temp = make_ssa_name (vec_dest, new_stmt);
5249 gimple_assign_set_lhs (new_stmt, new_temp);
5250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5251
5252 if (alignment_support_scheme == dr_explicit_realign_optimized)
5253 {
5254 gcc_assert (phi);
5255 if (i == vec_num - 1 && j == ncopies - 1)
5256 add_phi_arg (phi, lsq,
5257 loop_latch_edge (containing_loop),
9e227d60 5258 UNKNOWN_LOCATION);
272c6793
RS
5259 msq = lsq;
5260 }
ebfd146a 5261 }
ebfd146a 5262
59fd17e3
RB
5263 /* 4. Handle invariant-load. */
5264 if (inv_p && !bb_vinfo)
5265 {
5266 gimple_stmt_iterator gsi2 = *gsi;
5267 gcc_assert (!grouped_load);
5268 gsi_next (&gsi2);
5269 new_temp = vect_init_vector (stmt, scalar_dest,
5270 vectype, &gsi2);
5271 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5272 }
5273
272c6793
RS
5274 if (negative)
5275 {
aec7ae7d
JJ
5276 tree perm_mask = perm_mask_for_reverse (vectype);
5277 new_temp = permute_vec_elements (new_temp, new_temp,
5278 perm_mask, stmt, gsi);
ebfd146a
IR
5279 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5280 }
267d3070 5281
272c6793 5282 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5283 vect_transform_grouped_load (). */
5284 if (grouped_load || slp_perm)
9771b263 5285 dr_chain.quick_push (new_temp);
267d3070 5286
272c6793
RS
5287 /* Store vector loads in the corresponding SLP_NODE. */
5288 if (slp && !slp_perm)
9771b263 5289 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5290 }
a64b9c26
RB
5291 /* Bump the vector pointer to account for a gap. */
5292 if (slp && group_gap != 0)
5293 {
5294 tree bump = size_binop (MULT_EXPR,
5295 TYPE_SIZE_UNIT (elem_type),
5296 size_int (group_gap));
5297 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5298 stmt, bump);
5299 }
ebfd146a
IR
5300 }
5301
5302 if (slp && !slp_perm)
5303 continue;
5304
5305 if (slp_perm)
5306 {
01d8bf07 5307 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5308 slp_node_instance, false))
5309 {
9771b263 5310 dr_chain.release ();
ebfd146a
IR
5311 return false;
5312 }
5313 }
5314 else
5315 {
0d0293ac 5316 if (grouped_load)
ebfd146a 5317 {
272c6793 5318 if (!load_lanes_p)
0d0293ac 5319 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5320 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5321 }
5322 else
5323 {
5324 if (j == 0)
5325 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5326 else
5327 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5328 prev_stmt_info = vinfo_for_stmt (new_stmt);
5329 }
5330 }
9771b263 5331 dr_chain.release ();
ebfd146a
IR
5332 }
5333
ebfd146a
IR
5334 return true;
5335}
5336
5337/* Function vect_is_simple_cond.
b8698a0f 5338
ebfd146a
IR
5339 Input:
5340 LOOP - the loop that is being vectorized.
5341 COND - Condition that is checked for simple use.
5342
e9e1d143
RG
5343 Output:
5344 *COMP_VECTYPE - the vector type for the comparison.
5345
ebfd146a
IR
5346 Returns whether a COND can be vectorized. Checks whether
5347 condition operands are supportable using vec_is_simple_use. */
5348
87aab9b2 5349static bool
24ee1384
IR
5350vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5351 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5352{
5353 tree lhs, rhs;
5354 tree def;
5355 enum vect_def_type dt;
e9e1d143 5356 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5357
5358 if (!COMPARISON_CLASS_P (cond))
5359 return false;
5360
5361 lhs = TREE_OPERAND (cond, 0);
5362 rhs = TREE_OPERAND (cond, 1);
5363
5364 if (TREE_CODE (lhs) == SSA_NAME)
5365 {
5366 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5367 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5368 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5369 return false;
5370 }
5371 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5372 && TREE_CODE (lhs) != FIXED_CST)
5373 return false;
5374
5375 if (TREE_CODE (rhs) == SSA_NAME)
5376 {
5377 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5378 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5379 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5380 return false;
5381 }
f7e531cf 5382 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5383 && TREE_CODE (rhs) != FIXED_CST)
5384 return false;
5385
e9e1d143 5386 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5387 return true;
5388}
5389
5390/* vectorizable_condition.
5391
b8698a0f
L
5392 Check if STMT is conditional modify expression that can be vectorized.
5393 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5394 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5395 at GSI.
5396
5397 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5398 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5399 else caluse if it is 2).
ebfd146a
IR
5400
5401 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5402
4bbe8262 5403bool
ebfd146a 5404vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5405 gimple *vec_stmt, tree reduc_def, int reduc_index,
5406 slp_tree slp_node)
ebfd146a
IR
5407{
5408 tree scalar_dest = NULL_TREE;
5409 tree vec_dest = NULL_TREE;
ebfd146a
IR
5410 tree cond_expr, then_clause, else_clause;
5411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5412 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5413 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5414 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5415 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5416 tree vec_compare, vec_cond_expr;
5417 tree new_temp;
5418 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5419 tree def;
a855b1b1 5420 enum vect_def_type dt, dts[4];
ebfd146a 5421 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5422 int ncopies;
ebfd146a 5423 enum tree_code code;
a855b1b1 5424 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5425 int i, j;
5426 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5427 vec<tree> vec_oprnds0 = vNULL;
5428 vec<tree> vec_oprnds1 = vNULL;
5429 vec<tree> vec_oprnds2 = vNULL;
5430 vec<tree> vec_oprnds3 = vNULL;
74946978 5431 tree vec_cmp_type;
b8698a0f 5432
f7e531cf
IR
5433 if (slp_node || PURE_SLP_STMT (stmt_info))
5434 ncopies = 1;
5435 else
5436 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5437
ebfd146a 5438 gcc_assert (ncopies >= 1);
a855b1b1 5439 if (reduc_index && ncopies > 1)
ebfd146a
IR
5440 return false; /* FORNOW */
5441
f7e531cf
IR
5442 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5443 return false;
5444
5445 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5446 return false;
5447
4bbe8262
IR
5448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5449 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5450 && reduc_def))
ebfd146a
IR
5451 return false;
5452
ebfd146a 5453 /* FORNOW: not yet supported. */
b8698a0f 5454 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5455 {
73fbfcad 5456 if (dump_enabled_p ())
78c60e3d 5457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5458 "value used after loop.\n");
ebfd146a
IR
5459 return false;
5460 }
5461
5462 /* Is vectorizable conditional operation? */
5463 if (!is_gimple_assign (stmt))
5464 return false;
5465
5466 code = gimple_assign_rhs_code (stmt);
5467
5468 if (code != COND_EXPR)
5469 return false;
5470
4e71066d
RG
5471 cond_expr = gimple_assign_rhs1 (stmt);
5472 then_clause = gimple_assign_rhs2 (stmt);
5473 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5474
24ee1384
IR
5475 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5476 &comp_vectype)
e9e1d143 5477 || !comp_vectype)
ebfd146a
IR
5478 return false;
5479
5480 if (TREE_CODE (then_clause) == SSA_NAME)
5481 {
5482 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5483 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5484 &then_def_stmt, &def, &dt))
5485 return false;
5486 }
b8698a0f 5487 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5488 && TREE_CODE (then_clause) != REAL_CST
5489 && TREE_CODE (then_clause) != FIXED_CST)
5490 return false;
5491
5492 if (TREE_CODE (else_clause) == SSA_NAME)
5493 {
5494 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5495 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5496 &else_def_stmt, &def, &dt))
5497 return false;
5498 }
b8698a0f 5499 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5500 && TREE_CODE (else_clause) != REAL_CST
5501 && TREE_CODE (else_clause) != FIXED_CST)
5502 return false;
5503
74946978
MP
5504 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5505 /* The result of a vector comparison should be signed type. */
5506 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5507 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5508 if (vec_cmp_type == NULL_TREE)
5509 return false;
784fb9b3 5510
b8698a0f 5511 if (!vec_stmt)
ebfd146a
IR
5512 {
5513 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5514 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5515 }
5516
f7e531cf
IR
5517 /* Transform. */
5518
5519 if (!slp_node)
5520 {
9771b263
DN
5521 vec_oprnds0.create (1);
5522 vec_oprnds1.create (1);
5523 vec_oprnds2.create (1);
5524 vec_oprnds3.create (1);
f7e531cf 5525 }
ebfd146a
IR
5526
5527 /* Handle def. */
5528 scalar_dest = gimple_assign_lhs (stmt);
5529 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5530
5531 /* Handle cond expr. */
a855b1b1
MM
5532 for (j = 0; j < ncopies; j++)
5533 {
f7e531cf 5534 gimple new_stmt = NULL;
a855b1b1
MM
5535 if (j == 0)
5536 {
f7e531cf
IR
5537 if (slp_node)
5538 {
07687835
TS
5539 stack_vec<tree, 4> ops;
5540 stack_vec<vec<tree>, 4> vec_defs;
9771b263 5541
9771b263
DN
5542 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5543 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5544 ops.safe_push (then_clause);
5545 ops.safe_push (else_clause);
f7e531cf 5546 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5547 vec_oprnds3 = vec_defs.pop ();
5548 vec_oprnds2 = vec_defs.pop ();
5549 vec_oprnds1 = vec_defs.pop ();
5550 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5551
9771b263
DN
5552 ops.release ();
5553 vec_defs.release ();
f7e531cf
IR
5554 }
5555 else
5556 {
5557 gimple gtemp;
5558 vec_cond_lhs =
a855b1b1
MM
5559 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5560 stmt, NULL);
24ee1384
IR
5561 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5562 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5563
5564 vec_cond_rhs =
5565 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5566 stmt, NULL);
24ee1384
IR
5567 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5568 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5569 if (reduc_index == 1)
5570 vec_then_clause = reduc_def;
5571 else
5572 {
5573 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5574 stmt, NULL);
24ee1384 5575 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5576 NULL, &gtemp, &def, &dts[2]);
5577 }
5578 if (reduc_index == 2)
5579 vec_else_clause = reduc_def;
5580 else
5581 {
5582 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5583 stmt, NULL);
24ee1384 5584 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5585 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5586 }
a855b1b1
MM
5587 }
5588 }
5589 else
5590 {
f7e531cf 5591 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5592 vec_oprnds0.pop ());
f7e531cf 5593 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5594 vec_oprnds1.pop ());
a855b1b1 5595 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5596 vec_oprnds2.pop ());
a855b1b1 5597 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5598 vec_oprnds3.pop ());
f7e531cf
IR
5599 }
5600
5601 if (!slp_node)
5602 {
9771b263
DN
5603 vec_oprnds0.quick_push (vec_cond_lhs);
5604 vec_oprnds1.quick_push (vec_cond_rhs);
5605 vec_oprnds2.quick_push (vec_then_clause);
5606 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5607 }
5608
9dc3f7de 5609 /* Arguments are ready. Create the new vector stmt. */
9771b263 5610 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5611 {
9771b263
DN
5612 vec_cond_rhs = vec_oprnds1[i];
5613 vec_then_clause = vec_oprnds2[i];
5614 vec_else_clause = vec_oprnds3[i];
a855b1b1 5615
784fb9b3
JJ
5616 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5617 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5618 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5619 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5620
f7e531cf
IR
5621 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5622 new_temp = make_ssa_name (vec_dest, new_stmt);
5623 gimple_assign_set_lhs (new_stmt, new_temp);
5624 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5625 if (slp_node)
9771b263 5626 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5627 }
5628
5629 if (slp_node)
5630 continue;
5631
5632 if (j == 0)
5633 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5634 else
5635 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5636
5637 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5638 }
b8698a0f 5639
9771b263
DN
5640 vec_oprnds0.release ();
5641 vec_oprnds1.release ();
5642 vec_oprnds2.release ();
5643 vec_oprnds3.release ();
f7e531cf 5644
ebfd146a
IR
5645 return true;
5646}
5647
5648
8644a673 5649/* Make sure the statement is vectorizable. */
ebfd146a
IR
5650
5651bool
a70d6342 5652vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5653{
8644a673 5654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5655 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5656 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5657 bool ok;
a70d6342 5658 tree scalar_type, vectype;
363477c0
JJ
5659 gimple pattern_stmt;
5660 gimple_seq pattern_def_seq;
ebfd146a 5661
73fbfcad 5662 if (dump_enabled_p ())
ebfd146a 5663 {
78c60e3d
SS
5664 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5666 dump_printf (MSG_NOTE, "\n");
8644a673 5667 }
ebfd146a 5668
1825a1f3 5669 if (gimple_has_volatile_ops (stmt))
b8698a0f 5670 {
73fbfcad 5671 if (dump_enabled_p ())
78c60e3d 5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5673 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
5674
5675 return false;
5676 }
b8698a0f
L
5677
5678 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5679 to include:
5680 - the COND_EXPR which is the loop exit condition
5681 - any LABEL_EXPRs in the loop
b8698a0f 5682 - computations that are used only for array indexing or loop control.
8644a673 5683 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5684 instance, therefore, all the statements are relevant.
ebfd146a 5685
d092494c 5686 Pattern statement needs to be analyzed instead of the original statement
83197f37 5687 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5688 statements. In basic blocks we are called from some SLP instance
5689 traversal, don't analyze pattern stmts instead, the pattern stmts
5690 already will be part of SLP instance. */
83197f37
IR
5691
5692 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5693 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5694 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5695 {
9d5e7640 5696 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5697 && pattern_stmt
9d5e7640
IR
5698 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5699 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5700 {
83197f37 5701 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5702 stmt = pattern_stmt;
5703 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5704 if (dump_enabled_p ())
9d5e7640 5705 {
78c60e3d
SS
5706 dump_printf_loc (MSG_NOTE, vect_location,
5707 "==> examining pattern statement: ");
5708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5709 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
5710 }
5711 }
5712 else
5713 {
73fbfcad 5714 if (dump_enabled_p ())
e645e942 5715 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 5716
9d5e7640
IR
5717 return true;
5718 }
8644a673 5719 }
83197f37 5720 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5721 && node == NULL
83197f37
IR
5722 && pattern_stmt
5723 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5724 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5725 {
5726 /* Analyze PATTERN_STMT too. */
73fbfcad 5727 if (dump_enabled_p ())
83197f37 5728 {
78c60e3d
SS
5729 dump_printf_loc (MSG_NOTE, vect_location,
5730 "==> examining pattern statement: ");
5731 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5732 dump_printf (MSG_NOTE, "\n");
83197f37
IR
5733 }
5734
5735 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5736 return false;
5737 }
ebfd146a 5738
1107f3ae 5739 if (is_pattern_stmt_p (stmt_info)
079c527f 5740 && node == NULL
363477c0 5741 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5742 {
363477c0 5743 gimple_stmt_iterator si;
1107f3ae 5744
363477c0
JJ
5745 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5746 {
5747 gimple pattern_def_stmt = gsi_stmt (si);
5748 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5749 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5750 {
5751 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5752 if (dump_enabled_p ())
363477c0 5753 {
78c60e3d
SS
5754 dump_printf_loc (MSG_NOTE, vect_location,
5755 "==> examining pattern def statement: ");
5756 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 5757 dump_printf (MSG_NOTE, "\n");
363477c0 5758 }
1107f3ae 5759
363477c0
JJ
5760 if (!vect_analyze_stmt (pattern_def_stmt,
5761 need_to_vectorize, node))
5762 return false;
5763 }
5764 }
5765 }
1107f3ae 5766
8644a673
IR
5767 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5768 {
5769 case vect_internal_def:
5770 break;
ebfd146a 5771
8644a673 5772 case vect_reduction_def:
7c5222ff 5773 case vect_nested_cycle:
a70d6342 5774 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5775 || relevance == vect_used_in_outer_by_reduction
a70d6342 5776 || relevance == vect_unused_in_scope));
8644a673
IR
5777 break;
5778
5779 case vect_induction_def:
5780 case vect_constant_def:
5781 case vect_external_def:
5782 case vect_unknown_def_type:
5783 default:
5784 gcc_unreachable ();
5785 }
ebfd146a 5786
a70d6342
IR
5787 if (bb_vinfo)
5788 {
5789 gcc_assert (PURE_SLP_STMT (stmt_info));
5790
b690cc0f 5791 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5792 if (dump_enabled_p ())
a70d6342 5793 {
78c60e3d
SS
5794 dump_printf_loc (MSG_NOTE, vect_location,
5795 "get vectype for scalar type: ");
5796 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 5797 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5798 }
5799
5800 vectype = get_vectype_for_scalar_type (scalar_type);
5801 if (!vectype)
5802 {
73fbfcad 5803 if (dump_enabled_p ())
a70d6342 5804 {
78c60e3d
SS
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5806 "not SLPed: unsupported data-type ");
5807 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5808 scalar_type);
e645e942 5809 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
5810 }
5811 return false;
5812 }
5813
73fbfcad 5814 if (dump_enabled_p ())
a70d6342 5815 {
78c60e3d
SS
5816 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5817 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 5818 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5819 }
5820
5821 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5822 }
5823
8644a673 5824 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5825 {
8644a673
IR
5826 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5827 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5828 *need_to_vectorize = true;
ebfd146a
IR
5829 }
5830
8644a673 5831 ok = true;
b8698a0f 5832 if (!bb_vinfo
a70d6342
IR
5833 && (STMT_VINFO_RELEVANT_P (stmt_info)
5834 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5835 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5836 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5837 || vectorizable_operation (stmt, NULL, NULL, NULL)
5838 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5839 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5840 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5841 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5842 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5843 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5844 else
5845 {
5846 if (bb_vinfo)
4a00c761
JJ
5847 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5848 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5849 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5850 || vectorizable_assignment (stmt, NULL, NULL, node)
5851 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5852 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5853 || vectorizable_store (stmt, NULL, NULL, node)
5854 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5855 }
8644a673
IR
5856
5857 if (!ok)
ebfd146a 5858 {
73fbfcad 5859 if (dump_enabled_p ())
8644a673 5860 {
78c60e3d
SS
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "not vectorized: relevant stmt not ");
5863 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5864 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5865 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5866 }
b8698a0f 5867
ebfd146a
IR
5868 return false;
5869 }
5870
a70d6342
IR
5871 if (bb_vinfo)
5872 return true;
5873
8644a673
IR
5874 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5875 need extra handling, except for vectorizable reductions. */
5876 if (STMT_VINFO_LIVE_P (stmt_info)
5877 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5878 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5879
8644a673 5880 if (!ok)
ebfd146a 5881 {
73fbfcad 5882 if (dump_enabled_p ())
8644a673 5883 {
78c60e3d
SS
5884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5885 "not vectorized: live stmt not ");
5886 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5887 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5888 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5889 }
b8698a0f 5890
8644a673 5891 return false;
ebfd146a
IR
5892 }
5893
ebfd146a
IR
5894 return true;
5895}
5896
5897
5898/* Function vect_transform_stmt.
5899
5900 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5901
5902bool
5903vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5904 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5905 slp_instance slp_node_instance)
5906{
5907 bool is_store = false;
5908 gimple vec_stmt = NULL;
5909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5910 bool done;
ebfd146a
IR
5911
5912 switch (STMT_VINFO_TYPE (stmt_info))
5913 {
5914 case type_demotion_vec_info_type:
ebfd146a 5915 case type_promotion_vec_info_type:
ebfd146a
IR
5916 case type_conversion_vec_info_type:
5917 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5918 gcc_assert (done);
5919 break;
5920
5921 case induc_vec_info_type:
5922 gcc_assert (!slp_node);
5923 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5924 gcc_assert (done);
5925 break;
5926
9dc3f7de
IR
5927 case shift_vec_info_type:
5928 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5929 gcc_assert (done);
5930 break;
5931
ebfd146a
IR
5932 case op_vec_info_type:
5933 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5934 gcc_assert (done);
5935 break;
5936
5937 case assignment_vec_info_type:
5938 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5939 gcc_assert (done);
5940 break;
5941
5942 case load_vec_info_type:
b8698a0f 5943 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5944 slp_node_instance);
5945 gcc_assert (done);
5946 break;
5947
5948 case store_vec_info_type:
5949 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5950 gcc_assert (done);
0d0293ac 5951 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5952 {
5953 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5954 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5955 one are skipped, and there vec_stmt_info shouldn't be freed
5956 meanwhile. */
0d0293ac 5957 *grouped_store = true;
ebfd146a
IR
5958 if (STMT_VINFO_VEC_STMT (stmt_info))
5959 is_store = true;
5960 }
5961 else
5962 is_store = true;
5963 break;
5964
5965 case condition_vec_info_type:
f7e531cf 5966 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5967 gcc_assert (done);
5968 break;
5969
5970 case call_vec_info_type:
190c2236 5971 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5972 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5973 break;
5974
5975 case reduc_vec_info_type:
b5aeb3bb 5976 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5977 gcc_assert (done);
5978 break;
5979
5980 default:
5981 if (!STMT_VINFO_LIVE_P (stmt_info))
5982 {
73fbfcad 5983 if (dump_enabled_p ())
78c60e3d 5984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5985 "stmt not supported.\n");
ebfd146a
IR
5986 gcc_unreachable ();
5987 }
5988 }
5989
5990 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5991 is being vectorized, but outside the immediately enclosing loop. */
5992 if (vec_stmt
a70d6342
IR
5993 && STMT_VINFO_LOOP_VINFO (stmt_info)
5994 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5995 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5996 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5997 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5998 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5999 vect_used_in_outer_by_reduction))
ebfd146a 6000 {
a70d6342
IR
6001 struct loop *innerloop = LOOP_VINFO_LOOP (
6002 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
6003 imm_use_iterator imm_iter;
6004 use_operand_p use_p;
6005 tree scalar_dest;
6006 gimple exit_phi;
6007
73fbfcad 6008 if (dump_enabled_p ())
78c60e3d 6009 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6010 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
6011
6012 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6013 (to be used when vectorizing outer-loop stmts that use the DEF of
6014 STMT). */
6015 if (gimple_code (stmt) == GIMPLE_PHI)
6016 scalar_dest = PHI_RESULT (stmt);
6017 else
6018 scalar_dest = gimple_assign_lhs (stmt);
6019
6020 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6021 {
6022 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6023 {
6024 exit_phi = USE_STMT (use_p);
6025 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6026 }
6027 }
6028 }
6029
6030 /* Handle stmts whose DEF is used outside the loop-nest that is
6031 being vectorized. */
6032 if (STMT_VINFO_LIVE_P (stmt_info)
6033 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6034 {
6035 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6036 gcc_assert (done);
6037 }
6038
6039 if (vec_stmt)
83197f37 6040 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6041
b8698a0f 6042 return is_store;
ebfd146a
IR
6043}
6044
6045
b8698a0f 6046/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6047 stmt_vec_info. */
6048
6049void
6050vect_remove_stores (gimple first_stmt)
6051{
6052 gimple next = first_stmt;
6053 gimple tmp;
6054 gimple_stmt_iterator next_si;
6055
6056 while (next)
6057 {
78048b1c
JJ
6058 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6059
6060 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6061 if (is_pattern_stmt_p (stmt_info))
6062 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6063 /* Free the attached stmt_vec_info and remove the stmt. */
6064 next_si = gsi_for_stmt (next);
3d3f2249 6065 unlink_stmt_vdef (next);
ebfd146a 6066 gsi_remove (&next_si, true);
3d3f2249 6067 release_defs (next);
ebfd146a
IR
6068 free_stmt_vec_info (next);
6069 next = tmp;
6070 }
6071}
6072
6073
6074/* Function new_stmt_vec_info.
6075
6076 Create and initialize a new stmt_vec_info struct for STMT. */
6077
6078stmt_vec_info
b8698a0f 6079new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6080 bb_vec_info bb_vinfo)
ebfd146a
IR
6081{
6082 stmt_vec_info res;
6083 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6084
6085 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6086 STMT_VINFO_STMT (res) = stmt;
6087 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6088 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6089 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6090 STMT_VINFO_LIVE_P (res) = false;
6091 STMT_VINFO_VECTYPE (res) = NULL;
6092 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6093 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6094 STMT_VINFO_IN_PATTERN_P (res) = false;
6095 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6096 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6097 STMT_VINFO_DATA_REF (res) = NULL;
6098
6099 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6100 STMT_VINFO_DR_OFFSET (res) = NULL;
6101 STMT_VINFO_DR_INIT (res) = NULL;
6102 STMT_VINFO_DR_STEP (res) = NULL;
6103 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6104
6105 if (gimple_code (stmt) == GIMPLE_PHI
6106 && is_loop_header_bb_p (gimple_bb (stmt)))
6107 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6108 else
8644a673
IR
6109 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6110
9771b263 6111 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6112 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6113 GROUP_FIRST_ELEMENT (res) = NULL;
6114 GROUP_NEXT_ELEMENT (res) = NULL;
6115 GROUP_SIZE (res) = 0;
6116 GROUP_STORE_COUNT (res) = 0;
6117 GROUP_GAP (res) = 0;
6118 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6119
6120 return res;
6121}
6122
6123
6124/* Create a hash table for stmt_vec_info. */
6125
6126void
6127init_stmt_vec_info_vec (void)
6128{
9771b263
DN
6129 gcc_assert (!stmt_vec_info_vec.exists ());
6130 stmt_vec_info_vec.create (50);
ebfd146a
IR
6131}
6132
6133
6134/* Free hash table for stmt_vec_info. */
6135
6136void
6137free_stmt_vec_info_vec (void)
6138{
93675444
JJ
6139 unsigned int i;
6140 vec_void_p info;
6141 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6142 if (info != NULL)
6143 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6144 gcc_assert (stmt_vec_info_vec.exists ());
6145 stmt_vec_info_vec.release ();
ebfd146a
IR
6146}
6147
6148
6149/* Free stmt vectorization related info. */
6150
6151void
6152free_stmt_vec_info (gimple stmt)
6153{
6154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6155
6156 if (!stmt_info)
6157 return;
6158
78048b1c
JJ
6159 /* Check if this statement has a related "pattern stmt"
6160 (introduced by the vectorizer during the pattern recognition
6161 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6162 too. */
6163 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6164 {
6165 stmt_vec_info patt_info
6166 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6167 if (patt_info)
6168 {
363477c0
JJ
6169 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6170 if (seq)
6171 {
6172 gimple_stmt_iterator si;
6173 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6174 free_stmt_vec_info (gsi_stmt (si));
6175 }
78048b1c
JJ
6176 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6177 }
6178 }
6179
9771b263 6180 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6181 set_vinfo_for_stmt (stmt, NULL);
6182 free (stmt_info);
6183}
6184
6185
bb67d9c7 6186/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6187
bb67d9c7 6188 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6189 by the target. */
6190
bb67d9c7
RG
6191static tree
6192get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6193{
6194 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6195 enum machine_mode simd_mode;
2f816591 6196 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6197 int nunits;
6198 tree vectype;
6199
cc4b5170 6200 if (nbytes == 0)
ebfd146a
IR
6201 return NULL_TREE;
6202
48f2e373
RB
6203 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6204 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6205 return NULL_TREE;
6206
7b7b1813
RG
6207 /* For vector types of elements whose mode precision doesn't
6208 match their types precision we use a element type of mode
6209 precision. The vectorization routines will have to make sure
48f2e373
RB
6210 they support the proper result truncation/extension.
6211 We also make sure to build vector types with INTEGER_TYPE
6212 component type only. */
6d7971b8 6213 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6214 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6215 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6216 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6217 TYPE_UNSIGNED (scalar_type));
6d7971b8 6218
ccbf5bb4
RG
6219 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6220 When the component mode passes the above test simply use a type
6221 corresponding to that mode. The theory is that any use that
6222 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 6223 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 6224 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
6225 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6226
6227 /* We can't build a vector type of elements with alignment bigger than
6228 their size. */
dfc2e2ac 6229 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6230 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6231 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6232
dfc2e2ac
RB
6233 /* If we felt back to using the mode fail if there was
6234 no scalar type for it. */
6235 if (scalar_type == NULL_TREE)
6236 return NULL_TREE;
6237
bb67d9c7
RG
6238 /* If no size was supplied use the mode the target prefers. Otherwise
6239 lookup a vector mode of the specified size. */
6240 if (size == 0)
6241 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6242 else
6243 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6244 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6245 if (nunits <= 1)
6246 return NULL_TREE;
ebfd146a
IR
6247
6248 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6249
6250 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6251 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6252 return NULL_TREE;
ebfd146a
IR
6253
6254 return vectype;
6255}
6256
bb67d9c7
RG
6257unsigned int current_vector_size;
6258
6259/* Function get_vectype_for_scalar_type.
6260
6261 Returns the vector type corresponding to SCALAR_TYPE as supported
6262 by the target. */
6263
6264tree
6265get_vectype_for_scalar_type (tree scalar_type)
6266{
6267 tree vectype;
6268 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6269 current_vector_size);
6270 if (vectype
6271 && current_vector_size == 0)
6272 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6273 return vectype;
6274}
6275
b690cc0f
RG
6276/* Function get_same_sized_vectype
6277
6278 Returns a vector type corresponding to SCALAR_TYPE of size
6279 VECTOR_TYPE if supported by the target. */
6280
6281tree
bb67d9c7 6282get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6283{
bb67d9c7
RG
6284 return get_vectype_for_scalar_type_and_size
6285 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6286}
6287
ebfd146a
IR
6288/* Function vect_is_simple_use.
6289
6290 Input:
a70d6342
IR
6291 LOOP_VINFO - the vect info of the loop that is being vectorized.
6292 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6293 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6294 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6295
6296 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6297 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6298 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6299 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6300 is the case in reduction/induction computations).
6301 For basic blocks, supportable operands are constants and bb invariants.
6302 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6303
6304bool
24ee1384 6305vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6306 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6307 tree *def, enum vect_def_type *dt)
b8698a0f 6308{
ebfd146a
IR
6309 basic_block bb;
6310 stmt_vec_info stmt_vinfo;
a70d6342 6311 struct loop *loop = NULL;
b8698a0f 6312
a70d6342
IR
6313 if (loop_vinfo)
6314 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6315
6316 *def_stmt = NULL;
6317 *def = NULL_TREE;
b8698a0f 6318
73fbfcad 6319 if (dump_enabled_p ())
ebfd146a 6320 {
78c60e3d
SS
6321 dump_printf_loc (MSG_NOTE, vect_location,
6322 "vect_is_simple_use: operand ");
6323 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 6324 dump_printf (MSG_NOTE, "\n");
ebfd146a 6325 }
b8698a0f 6326
b758f602 6327 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6328 {
6329 *dt = vect_constant_def;
6330 return true;
6331 }
b8698a0f 6332
ebfd146a
IR
6333 if (is_gimple_min_invariant (operand))
6334 {
6335 *def = operand;
8644a673 6336 *dt = vect_external_def;
ebfd146a
IR
6337 return true;
6338 }
6339
6340 if (TREE_CODE (operand) == PAREN_EXPR)
6341 {
73fbfcad 6342 if (dump_enabled_p ())
e645e942 6343 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
6344 operand = TREE_OPERAND (operand, 0);
6345 }
b8698a0f 6346
ebfd146a
IR
6347 if (TREE_CODE (operand) != SSA_NAME)
6348 {
73fbfcad 6349 if (dump_enabled_p ())
78c60e3d 6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6351 "not ssa-name.\n");
ebfd146a
IR
6352 return false;
6353 }
b8698a0f 6354
ebfd146a
IR
6355 *def_stmt = SSA_NAME_DEF_STMT (operand);
6356 if (*def_stmt == NULL)
6357 {
73fbfcad 6358 if (dump_enabled_p ())
78c60e3d 6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6360 "no def_stmt.\n");
ebfd146a
IR
6361 return false;
6362 }
6363
73fbfcad 6364 if (dump_enabled_p ())
ebfd146a 6365 {
78c60e3d
SS
6366 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 6368 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
6369 }
6370
8644a673 6371 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6372 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6373 if (gimple_nop_p (*def_stmt))
6374 {
6375 *def = operand;
8644a673 6376 *dt = vect_external_def;
ebfd146a
IR
6377 return true;
6378 }
6379
6380 bb = gimple_bb (*def_stmt);
a70d6342
IR
6381
6382 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6383 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6384 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6385 *dt = vect_external_def;
ebfd146a
IR
6386 else
6387 {
6388 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6389 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6390 }
6391
24ee1384
IR
6392 if (*dt == vect_unknown_def_type
6393 || (stmt
6394 && *dt == vect_double_reduction_def
6395 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6396 {
73fbfcad 6397 if (dump_enabled_p ())
78c60e3d 6398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6399 "Unsupported pattern.\n");
ebfd146a
IR
6400 return false;
6401 }
6402
73fbfcad 6403 if (dump_enabled_p ())
e645e942 6404 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
6405
6406 switch (gimple_code (*def_stmt))
6407 {
6408 case GIMPLE_PHI:
6409 *def = gimple_phi_result (*def_stmt);
6410 break;
6411
6412 case GIMPLE_ASSIGN:
6413 *def = gimple_assign_lhs (*def_stmt);
6414 break;
6415
6416 case GIMPLE_CALL:
6417 *def = gimple_call_lhs (*def_stmt);
6418 if (*def != NULL)
6419 break;
6420 /* FALLTHRU */
6421 default:
73fbfcad 6422 if (dump_enabled_p ())
78c60e3d 6423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6424 "unsupported defining stmt:\n");
ebfd146a
IR
6425 return false;
6426 }
6427
6428 return true;
6429}
6430
b690cc0f
RG
6431/* Function vect_is_simple_use_1.
6432
6433 Same as vect_is_simple_use_1 but also determines the vector operand
6434 type of OPERAND and stores it to *VECTYPE. If the definition of
6435 OPERAND is vect_uninitialized_def, vect_constant_def or
6436 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6437 is responsible to compute the best suited vector type for the
6438 scalar operand. */
6439
6440bool
24ee1384 6441vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6442 bb_vec_info bb_vinfo, gimple *def_stmt,
6443 tree *def, enum vect_def_type *dt, tree *vectype)
6444{
24ee1384
IR
6445 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6446 def, dt))
b690cc0f
RG
6447 return false;
6448
6449 /* Now get a vector type if the def is internal, otherwise supply
6450 NULL_TREE and leave it up to the caller to figure out a proper
6451 type for the use stmt. */
6452 if (*dt == vect_internal_def
6453 || *dt == vect_induction_def
6454 || *dt == vect_reduction_def
6455 || *dt == vect_double_reduction_def
6456 || *dt == vect_nested_cycle)
6457 {
6458 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6459
6460 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6461 && !STMT_VINFO_RELEVANT (stmt_info)
6462 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6463 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6464
b690cc0f
RG
6465 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6466 gcc_assert (*vectype != NULL_TREE);
6467 }
6468 else if (*dt == vect_uninitialized_def
6469 || *dt == vect_constant_def
6470 || *dt == vect_external_def)
6471 *vectype = NULL_TREE;
6472 else
6473 gcc_unreachable ();
6474
6475 return true;
6476}
6477
ebfd146a
IR
6478
6479/* Function supportable_widening_operation
6480
b8698a0f
L
6481 Check whether an operation represented by the code CODE is a
6482 widening operation that is supported by the target platform in
b690cc0f
RG
6483 vector form (i.e., when operating on arguments of type VECTYPE_IN
6484 producing a result of type VECTYPE_OUT).
b8698a0f 6485
ebfd146a
IR
6486 Widening operations we currently support are NOP (CONVERT), FLOAT
6487 and WIDEN_MULT. This function checks if these operations are supported
6488 by the target platform either directly (via vector tree-codes), or via
6489 target builtins.
6490
6491 Output:
b8698a0f
L
6492 - CODE1 and CODE2 are codes of vector operations to be used when
6493 vectorizing the operation, if available.
ebfd146a
IR
6494 - MULTI_STEP_CVT determines the number of required intermediate steps in
6495 case of multi-step conversion (like char->short->int - in that case
6496 MULTI_STEP_CVT will be 1).
b8698a0f
L
6497 - INTERM_TYPES contains the intermediate type required to perform the
6498 widening operation (short in the above example). */
ebfd146a
IR
6499
6500bool
b690cc0f
RG
6501supportable_widening_operation (enum tree_code code, gimple stmt,
6502 tree vectype_out, tree vectype_in,
ebfd146a
IR
6503 enum tree_code *code1, enum tree_code *code2,
6504 int *multi_step_cvt,
9771b263 6505 vec<tree> *interm_types)
ebfd146a
IR
6506{
6507 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6508 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6509 struct loop *vect_loop = NULL;
ebfd146a 6510 enum machine_mode vec_mode;
81f40b79 6511 enum insn_code icode1, icode2;
ebfd146a 6512 optab optab1, optab2;
b690cc0f
RG
6513 tree vectype = vectype_in;
6514 tree wide_vectype = vectype_out;
ebfd146a 6515 enum tree_code c1, c2;
4a00c761
JJ
6516 int i;
6517 tree prev_type, intermediate_type;
6518 enum machine_mode intermediate_mode, prev_mode;
6519 optab optab3, optab4;
ebfd146a 6520
4a00c761 6521 *multi_step_cvt = 0;
4ef69dfc
IR
6522 if (loop_info)
6523 vect_loop = LOOP_VINFO_LOOP (loop_info);
6524
ebfd146a
IR
6525 switch (code)
6526 {
6527 case WIDEN_MULT_EXPR:
6ae6116f
RH
6528 /* The result of a vectorized widening operation usually requires
6529 two vectors (because the widened results do not fit into one vector).
6530 The generated vector results would normally be expected to be
6531 generated in the same order as in the original scalar computation,
6532 i.e. if 8 results are generated in each vector iteration, they are
6533 to be organized as follows:
6534 vect1: [res1,res2,res3,res4],
6535 vect2: [res5,res6,res7,res8].
6536
6537 However, in the special case that the result of the widening
6538 operation is used in a reduction computation only, the order doesn't
6539 matter (because when vectorizing a reduction we change the order of
6540 the computation). Some targets can take advantage of this and
6541 generate more efficient code. For example, targets like Altivec,
6542 that support widen_mult using a sequence of {mult_even,mult_odd}
6543 generate the following vectors:
6544 vect1: [res1,res3,res5,res7],
6545 vect2: [res2,res4,res6,res8].
6546
6547 When vectorizing outer-loops, we execute the inner-loop sequentially
6548 (each vectorized inner-loop iteration contributes to VF outer-loop
6549 iterations in parallel). We therefore don't allow to change the
6550 order of the computation in the inner-loop during outer-loop
6551 vectorization. */
6552 /* TODO: Another case in which order doesn't *really* matter is when we
6553 widen and then contract again, e.g. (short)((int)x * y >> 8).
6554 Normally, pack_trunc performs an even/odd permute, whereas the
6555 repack from an even/odd expansion would be an interleave, which
6556 would be significantly simpler for e.g. AVX2. */
6557 /* In any case, in order to avoid duplicating the code below, recurse
6558 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6559 are properly set up for the caller. If we fail, we'll continue with
6560 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6561 if (vect_loop
6562 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6563 && !nested_in_vect_loop_p (vect_loop, stmt)
6564 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6565 stmt, vectype_out, vectype_in,
a86ec597
RH
6566 code1, code2, multi_step_cvt,
6567 interm_types))
6ae6116f 6568 return true;
4a00c761
JJ
6569 c1 = VEC_WIDEN_MULT_LO_EXPR;
6570 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6571 break;
6572
6ae6116f
RH
6573 case VEC_WIDEN_MULT_EVEN_EXPR:
6574 /* Support the recursion induced just above. */
6575 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6576 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6577 break;
6578
36ba4aae 6579 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6580 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6581 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6582 break;
6583
ebfd146a 6584 CASE_CONVERT:
4a00c761
JJ
6585 c1 = VEC_UNPACK_LO_EXPR;
6586 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6587 break;
6588
6589 case FLOAT_EXPR:
4a00c761
JJ
6590 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6591 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6592 break;
6593
6594 case FIX_TRUNC_EXPR:
6595 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6596 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6597 computing the operation. */
6598 return false;
6599
6600 default:
6601 gcc_unreachable ();
6602 }
6603
6ae6116f 6604 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6605 {
6606 enum tree_code ctmp = c1;
6607 c1 = c2;
6608 c2 = ctmp;
6609 }
6610
ebfd146a
IR
6611 if (code == FIX_TRUNC_EXPR)
6612 {
6613 /* The signedness is determined from output operand. */
b690cc0f
RG
6614 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6615 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6616 }
6617 else
6618 {
6619 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6620 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6621 }
6622
6623 if (!optab1 || !optab2)
6624 return false;
6625
6626 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6627 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6628 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6629 return false;
6630
4a00c761
JJ
6631 *code1 = c1;
6632 *code2 = c2;
6633
6634 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6635 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6636 return true;
6637
b8698a0f 6638 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6639 types. */
ebfd146a 6640
4a00c761
JJ
6641 prev_type = vectype;
6642 prev_mode = vec_mode;
b8698a0f 6643
4a00c761
JJ
6644 if (!CONVERT_EXPR_CODE_P (code))
6645 return false;
b8698a0f 6646
4a00c761
JJ
6647 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6648 intermediate steps in promotion sequence. We try
6649 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6650 not. */
9771b263 6651 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6652 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6653 {
6654 intermediate_mode = insn_data[icode1].operand[0].mode;
6655 intermediate_type
6656 = lang_hooks.types.type_for_mode (intermediate_mode,
6657 TYPE_UNSIGNED (prev_type));
6658 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6659 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6660
6661 if (!optab3 || !optab4
6662 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6663 || insn_data[icode1].operand[0].mode != intermediate_mode
6664 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6665 || insn_data[icode2].operand[0].mode != intermediate_mode
6666 || ((icode1 = optab_handler (optab3, intermediate_mode))
6667 == CODE_FOR_nothing)
6668 || ((icode2 = optab_handler (optab4, intermediate_mode))
6669 == CODE_FOR_nothing))
6670 break;
ebfd146a 6671
9771b263 6672 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6673 (*multi_step_cvt)++;
6674
6675 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6676 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6677 return true;
6678
6679 prev_type = intermediate_type;
6680 prev_mode = intermediate_mode;
ebfd146a
IR
6681 }
6682
9771b263 6683 interm_types->release ();
4a00c761 6684 return false;
ebfd146a
IR
6685}
6686
6687
6688/* Function supportable_narrowing_operation
6689
b8698a0f
L
6690 Check whether an operation represented by the code CODE is a
6691 narrowing operation that is supported by the target platform in
b690cc0f
RG
6692 vector form (i.e., when operating on arguments of type VECTYPE_IN
6693 and producing a result of type VECTYPE_OUT).
b8698a0f 6694
ebfd146a 6695 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6696 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6697 the target platform directly via vector tree-codes.
6698
6699 Output:
b8698a0f
L
6700 - CODE1 is the code of a vector operation to be used when
6701 vectorizing the operation, if available.
ebfd146a
IR
6702 - MULTI_STEP_CVT determines the number of required intermediate steps in
6703 case of multi-step conversion (like int->short->char - in that case
6704 MULTI_STEP_CVT will be 1).
6705 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6706 narrowing operation (short in the above example). */
ebfd146a
IR
6707
6708bool
6709supportable_narrowing_operation (enum tree_code code,
b690cc0f 6710 tree vectype_out, tree vectype_in,
ebfd146a 6711 enum tree_code *code1, int *multi_step_cvt,
9771b263 6712 vec<tree> *interm_types)
ebfd146a
IR
6713{
6714 enum machine_mode vec_mode;
6715 enum insn_code icode1;
6716 optab optab1, interm_optab;
b690cc0f
RG
6717 tree vectype = vectype_in;
6718 tree narrow_vectype = vectype_out;
ebfd146a 6719 enum tree_code c1;
4a00c761
JJ
6720 tree intermediate_type;
6721 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6722 int i;
4a00c761 6723 bool uns;
ebfd146a 6724
4a00c761 6725 *multi_step_cvt = 0;
ebfd146a
IR
6726 switch (code)
6727 {
6728 CASE_CONVERT:
6729 c1 = VEC_PACK_TRUNC_EXPR;
6730 break;
6731
6732 case FIX_TRUNC_EXPR:
6733 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6734 break;
6735
6736 case FLOAT_EXPR:
6737 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6738 tree code and optabs used for computing the operation. */
6739 return false;
6740
6741 default:
6742 gcc_unreachable ();
6743 }
6744
6745 if (code == FIX_TRUNC_EXPR)
6746 /* The signedness is determined from output operand. */
b690cc0f 6747 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6748 else
6749 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6750
6751 if (!optab1)
6752 return false;
6753
6754 vec_mode = TYPE_MODE (vectype);
947131ba 6755 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6756 return false;
6757
4a00c761
JJ
6758 *code1 = c1;
6759
6760 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6761 return true;
6762
ebfd146a
IR
6763 /* Check if it's a multi-step conversion that can be done using intermediate
6764 types. */
4a00c761
JJ
6765 prev_mode = vec_mode;
6766 if (code == FIX_TRUNC_EXPR)
6767 uns = TYPE_UNSIGNED (vectype_out);
6768 else
6769 uns = TYPE_UNSIGNED (vectype);
6770
6771 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6772 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6773 costly than signed. */
6774 if (code == FIX_TRUNC_EXPR && uns)
6775 {
6776 enum insn_code icode2;
6777
6778 intermediate_type
6779 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6780 interm_optab
6781 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6782 if (interm_optab != unknown_optab
4a00c761
JJ
6783 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6784 && insn_data[icode1].operand[0].mode
6785 == insn_data[icode2].operand[0].mode)
6786 {
6787 uns = false;
6788 optab1 = interm_optab;
6789 icode1 = icode2;
6790 }
6791 }
ebfd146a 6792
4a00c761
JJ
6793 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6794 intermediate steps in promotion sequence. We try
6795 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6796 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6797 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6798 {
6799 intermediate_mode = insn_data[icode1].operand[0].mode;
6800 intermediate_type
6801 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6802 interm_optab
6803 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6804 optab_default);
6805 if (!interm_optab
6806 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6807 || insn_data[icode1].operand[0].mode != intermediate_mode
6808 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6809 == CODE_FOR_nothing))
6810 break;
6811
9771b263 6812 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6813 (*multi_step_cvt)++;
6814
6815 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6816 return true;
6817
6818 prev_mode = intermediate_mode;
6819 optab1 = interm_optab;
ebfd146a
IR
6820 }
6821
9771b263 6822 interm_types->release ();
4a00c761 6823 return false;
ebfd146a 6824}