]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR target/59216 (ARM negdi*extendsidi regression)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
d8a2d370 29#include "stor-layout.h"
ebfd146a
IR
30#include "target.h"
31#include "basic-block.h"
cf835838 32#include "gimple-pretty-print.h"
18f429e2 33#include "gimple.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
442b4905
AM
37#include "gimple-ssa.h"
38#include "tree-cfg.h"
39#include "tree-phinodes.h"
40#include "ssa-iterators.h"
d8a2d370 41#include "stringpool.h"
442b4905 42#include "tree-ssanames.h"
e28030cf 43#include "tree-ssa-loop-manip.h"
ebfd146a 44#include "cfgloop.h"
ebfd146a 45#include "expr.h"
7ee2468b 46#include "recog.h" /* FIXME: for insn_data */
ebfd146a 47#include "optabs.h"
718f9c0f 48#include "diagnostic-core.h"
ebfd146a 49#include "tree-vectorizer.h"
7ee2468b 50#include "dumpfile.h"
ebfd146a 51
7ee2468b
SB
52/* For lang_hooks.types.type_for_mode. */
53#include "langhooks.h"
ebfd146a 54
c3e7ee41
BS
55/* Return the vectorized type for the given statement. */
56
57tree
58stmt_vectype (struct _stmt_vec_info *stmt_info)
59{
60 return STMT_VINFO_VECTYPE (stmt_info);
61}
62
63/* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65bool
66stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
67{
68 gimple stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
72
73 if (!loop_vinfo)
74 return false;
75
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
77
78 return (bb->loop_father == loop->inner);
79}
80
81/* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
84
85unsigned
92345349 86record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 88 int misalign, enum vect_cost_model_location where)
c3e7ee41 89{
92345349 90 if (body_cost_vec)
c3e7ee41 91 {
92345349
BS
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 add_stmt_info_to_vec (body_cost_vec, count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign);
c3e7ee41 96 return (unsigned)
92345349 97 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
98
99 }
100 else
101 {
102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
103 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
104 void *target_cost_data;
105
106 if (loop_vinfo)
107 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
108 else
109 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
110
92345349
BS
111 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
112 misalign, where);
c3e7ee41
BS
113 }
114}
115
272c6793
RS
116/* Return a variable of type ELEM_TYPE[NELEMS]. */
117
118static tree
119create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120{
121 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
122 "vect_array");
123}
124
125/* ARRAY is an array of vectors created by create_vector_array.
126 Return an SSA_NAME for the vector in index N. The reference
127 is part of the vectorization of STMT and the vector is associated
128 with scalar destination SCALAR_DEST. */
129
130static tree
131read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
132 tree array, unsigned HOST_WIDE_INT n)
133{
134 tree vect_type, vect, vect_name, array_ref;
135 gimple new_stmt;
136
137 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
138 vect_type = TREE_TYPE (TREE_TYPE (array));
139 vect = vect_create_destination_var (scalar_dest, vect_type);
140 array_ref = build4 (ARRAY_REF, vect_type, array,
141 build_int_cst (size_type_node, n),
142 NULL_TREE, NULL_TREE);
143
144 new_stmt = gimple_build_assign (vect, array_ref);
145 vect_name = make_ssa_name (vect, new_stmt);
146 gimple_assign_set_lhs (new_stmt, vect_name);
147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
148
149 return vect_name;
150}
151
152/* ARRAY is an array of vectors created by create_vector_array.
153 Emit code to store SSA_NAME VECT in index N of the array.
154 The store is part of the vectorization of STMT. */
155
156static void
157write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
158 tree array, unsigned HOST_WIDE_INT n)
159{
160 tree array_ref;
161 gimple new_stmt;
162
163 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
164 build_int_cst (size_type_node, n),
165 NULL_TREE, NULL_TREE);
166
167 new_stmt = gimple_build_assign (array_ref, vect);
168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
169}
170
171/* PTR is a pointer to an array of type TYPE. Return a representation
172 of *PTR. The memory reference replaces those in FIRST_DR
173 (and its group). */
174
175static tree
176create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
177{
272c6793
RS
178 tree mem_ref, alias_ptr_type;
179
180 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
181 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
182 /* Arrays have the same alignment as their type. */
644ffefd 183 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
184 return mem_ref;
185}
186
ebfd146a
IR
187/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
188
189/* Function vect_mark_relevant.
190
191 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
192
193static void
9771b263 194vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
195 enum vect_relevant relevant, bool live_p,
196 bool used_in_pattern)
ebfd146a
IR
197{
198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
199 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
200 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 201 gimple pattern_stmt;
ebfd146a 202
73fbfcad 203 if (dump_enabled_p ())
78c60e3d 204 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 205 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 206
83197f37
IR
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
ebfd146a
IR
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 {
83197f37
IR
213 bool found = false;
214 if (!used_in_pattern)
215 {
216 imm_use_iterator imm_iter;
217 use_operand_p use_p;
218 gimple use_stmt;
219 tree lhs;
13c931c9
JJ
220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 222
83197f37
IR
223 if (is_gimple_assign (stmt))
224 lhs = gimple_assign_lhs (stmt);
225 else
226 lhs = gimple_call_lhs (stmt);
ebfd146a 227
83197f37
IR
228 /* This use is out of pattern use, if LHS has other uses that are
229 pattern uses, we should mark the stmt itself, and not the pattern
230 stmt. */
ab0ef706
JJ
231 if (TREE_CODE (lhs) == SSA_NAME)
232 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
233 {
234 if (is_gimple_debug (USE_STMT (use_p)))
235 continue;
236 use_stmt = USE_STMT (use_p);
237
13c931c9
JJ
238 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
239 continue;
240
ab0ef706
JJ
241 if (vinfo_for_stmt (use_stmt)
242 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
243 {
244 found = true;
245 break;
246 }
247 }
83197f37
IR
248 }
249
250 if (!found)
251 {
252 /* This is the last stmt in a sequence that was detected as a
253 pattern that can potentially be vectorized. Don't mark the stmt
254 as relevant/live because it's not going to be vectorized.
255 Instead mark the pattern-stmt that replaces it. */
256
257 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
258
73fbfcad 259 if (dump_enabled_p ())
78c60e3d
SS
260 dump_printf_loc (MSG_NOTE, vect_location,
261 "last stmt in pattern. don't mark"
e645e942 262 " relevant/live.\n");
83197f37
IR
263 stmt_info = vinfo_for_stmt (pattern_stmt);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267 stmt = pattern_stmt;
268 }
ebfd146a
IR
269 }
270
271 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
272 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
273 STMT_VINFO_RELEVANT (stmt_info) = relevant;
274
275 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
276 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
277 {
73fbfcad 278 if (dump_enabled_p ())
78c60e3d 279 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 280 "already marked relevant/live.\n");
ebfd146a
IR
281 return;
282 }
283
9771b263 284 worklist->safe_push (stmt);
ebfd146a
IR
285}
286
287
288/* Function vect_stmt_relevant_p.
289
290 Return true if STMT in loop that is represented by LOOP_VINFO is
291 "relevant for vectorization".
292
293 A stmt is considered "relevant for vectorization" if:
294 - it has uses outside the loop.
295 - it has vdefs (it alters memory).
296 - control stmts in the loop (except for the exit condition).
297
298 CHECKME: what other side effects would the vectorizer allow? */
299
300static bool
301vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
302 enum vect_relevant *relevant, bool *live_p)
303{
304 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
305 ssa_op_iter op_iter;
306 imm_use_iterator imm_iter;
307 use_operand_p use_p;
308 def_operand_p def_p;
309
8644a673 310 *relevant = vect_unused_in_scope;
ebfd146a
IR
311 *live_p = false;
312
313 /* cond stmt other than loop exit cond. */
b8698a0f
L
314 if (is_ctrl_stmt (stmt)
315 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
316 != loop_exit_ctrl_vec_info_type)
8644a673 317 *relevant = vect_used_in_scope;
ebfd146a
IR
318
319 /* changing memory. */
320 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 321 if (gimple_vdef (stmt))
ebfd146a 322 {
73fbfcad 323 if (dump_enabled_p ())
78c60e3d 324 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 325 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 326 *relevant = vect_used_in_scope;
ebfd146a
IR
327 }
328
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 {
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 {
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
336 {
73fbfcad 337 if (dump_enabled_p ())
78c60e3d 338 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 339 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 340
3157b0c2
AO
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
343
ebfd146a
IR
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
348
349 *live_p = true;
350 }
351 }
352 }
353
354 return (*live_p || *relevant);
355}
356
357
b8698a0f 358/* Function exist_non_indexing_operands_for_use_p
ebfd146a 359
ff802fa1 360 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
361 used in STMT for anything other than indexing an array. */
362
363static bool
364exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
365{
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 368
ff802fa1 369 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
59a05b0c 374
ebfd146a
IR
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
380
381 'var' in the second case corresponds to a def, not a use,
b8698a0f 382 so USE cannot correspond to any operands that are not used
ebfd146a
IR
383 for array indexing.
384
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
ebfd146a
IR
387
388 if (!gimple_assign_copy_p (stmt))
389 return false;
59a05b0c
EB
390 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
391 return false;
ebfd146a 392 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
393 if (TREE_CODE (operand) != SSA_NAME)
394 return false;
395
396 if (operand == use)
397 return true;
398
399 return false;
400}
401
402
b8698a0f 403/*
ebfd146a
IR
404 Function process_use.
405
406 Inputs:
407 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 408 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 409 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 410 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
411 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
412 be performed.
ebfd146a
IR
413
414 Outputs:
415 Generally, LIVE_P and RELEVANT are used to define the liveness and
416 relevance info of the DEF_STMT of this USE:
417 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
418 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
419 Exceptions:
420 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 421 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 422 of the respective DEF_STMT is left unchanged.
b8698a0f
L
423 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
424 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
425 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
426 be modified accordingly.
427
428 Return true if everything is as expected. Return false otherwise. */
429
430static bool
b8698a0f 431process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 432 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 433 bool force)
ebfd146a
IR
434{
435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
436 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
437 stmt_vec_info dstmt_vinfo;
438 basic_block bb, def_bb;
439 tree def;
440 gimple def_stmt;
441 enum vect_def_type dt;
442
b8698a0f 443 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 444 that are used for address computation are not considered relevant. */
aec7ae7d 445 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
446 return true;
447
24ee1384 448 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 449 {
73fbfcad 450 if (dump_enabled_p ())
78c60e3d 451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 452 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
453 return false;
454 }
455
456 if (!def_stmt || gimple_nop_p (def_stmt))
457 return true;
458
459 def_bb = gimple_bb (def_stmt);
460 if (!flow_bb_inside_loop_p (loop, def_bb))
461 {
73fbfcad 462 if (dump_enabled_p ())
e645e942 463 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
464 return true;
465 }
466
b8698a0f
L
467 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
468 DEF_STMT must have already been processed, because this should be the
469 only way that STMT, which is a reduction-phi, was put in the worklist,
470 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
471 check that everything is as expected, and we are done. */
472 dstmt_vinfo = vinfo_for_stmt (def_stmt);
473 bb = gimple_bb (stmt);
474 if (gimple_code (stmt) == GIMPLE_PHI
475 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
476 && gimple_code (def_stmt) != GIMPLE_PHI
477 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
478 && bb->loop_father == def_bb->loop_father)
479 {
73fbfcad 480 if (dump_enabled_p ())
78c60e3d 481 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 482 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
483 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
484 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
485 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 486 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 487 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
488 return true;
489 }
490
491 /* case 3a: outer-loop stmt defining an inner-loop stmt:
492 outer-loop-header-bb:
493 d = def_stmt
494 inner-loop:
495 stmt # use (d)
496 outer-loop-tail-bb:
497 ... */
498 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
499 {
73fbfcad 500 if (dump_enabled_p ())
78c60e3d 501 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 502 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 503
ebfd146a
IR
504 switch (relevant)
505 {
8644a673 506 case vect_unused_in_scope:
7c5222ff
IR
507 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
508 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 509 break;
7c5222ff 510
ebfd146a 511 case vect_used_in_outer_by_reduction:
7c5222ff 512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
513 relevant = vect_used_by_reduction;
514 break;
7c5222ff 515
ebfd146a 516 case vect_used_in_outer:
7c5222ff 517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 518 relevant = vect_used_in_scope;
ebfd146a 519 break;
7c5222ff 520
8644a673 521 case vect_used_in_scope:
ebfd146a
IR
522 break;
523
524 default:
525 gcc_unreachable ();
b8698a0f 526 }
ebfd146a
IR
527 }
528
529 /* case 3b: inner-loop stmt defining an outer-loop stmt:
530 outer-loop-header-bb:
531 ...
532 inner-loop:
533 d = def_stmt
06066f92 534 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
535 stmt # use (d) */
536 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
537 {
73fbfcad 538 if (dump_enabled_p ())
78c60e3d 539 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 540 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 541
ebfd146a
IR
542 switch (relevant)
543 {
8644a673 544 case vect_unused_in_scope:
b8698a0f 545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 546 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 547 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
548 break;
549
ebfd146a
IR
550 case vect_used_by_reduction:
551 relevant = vect_used_in_outer_by_reduction;
552 break;
553
8644a673 554 case vect_used_in_scope:
ebfd146a
IR
555 relevant = vect_used_in_outer;
556 break;
557
558 default:
559 gcc_unreachable ();
560 }
561 }
562
83197f37
IR
563 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
564 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
565 return true;
566}
567
568
569/* Function vect_mark_stmts_to_be_vectorized.
570
571 Not all stmts in the loop need to be vectorized. For example:
572
573 for i...
574 for j...
575 1. T0 = i + j
576 2. T1 = a[T0]
577
578 3. j = j + 1
579
580 Stmt 1 and 3 do not need to be vectorized, because loop control and
581 addressing of vectorized data-refs are handled differently.
582
583 This pass detects such stmts. */
584
585bool
586vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
587{
ebfd146a
IR
588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
589 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
590 unsigned int nbbs = loop->num_nodes;
591 gimple_stmt_iterator si;
592 gimple stmt;
593 unsigned int i;
594 stmt_vec_info stmt_vinfo;
595 basic_block bb;
596 gimple phi;
597 bool live_p;
06066f92
IR
598 enum vect_relevant relevant, tmp_relevant;
599 enum vect_def_type def_type;
ebfd146a 600
73fbfcad 601 if (dump_enabled_p ())
78c60e3d 602 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 603 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 604
07687835 605 stack_vec<gimple, 64> worklist;
ebfd146a
IR
606
607 /* 1. Init worklist. */
608 for (i = 0; i < nbbs; i++)
609 {
610 bb = bbs[i];
611 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 612 {
ebfd146a 613 phi = gsi_stmt (si);
73fbfcad 614 if (dump_enabled_p ())
ebfd146a 615 {
78c60e3d
SS
616 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
617 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 618 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
619 }
620
621 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 622 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
623 }
624 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
625 {
626 stmt = gsi_stmt (si);
73fbfcad 627 if (dump_enabled_p ())
ebfd146a 628 {
78c60e3d
SS
629 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
630 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 631 dump_printf (MSG_NOTE, "\n");
b8698a0f 632 }
ebfd146a
IR
633
634 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 635 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
636 }
637 }
638
639 /* 2. Process_worklist */
9771b263 640 while (worklist.length () > 0)
ebfd146a
IR
641 {
642 use_operand_p use_p;
643 ssa_op_iter iter;
644
9771b263 645 stmt = worklist.pop ();
73fbfcad 646 if (dump_enabled_p ())
ebfd146a 647 {
78c60e3d
SS
648 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 650 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
651 }
652
b8698a0f
L
653 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
654 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
655 liveness and relevance properties of STMT. */
656 stmt_vinfo = vinfo_for_stmt (stmt);
657 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
658 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
659
660 /* Generally, the liveness and relevance properties of STMT are
661 propagated as is to the DEF_STMTs of its USEs:
662 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
663 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
664
665 One exception is when STMT has been identified as defining a reduction
666 variable; in this case we set the liveness/relevance as follows:
667 live_p = false
668 relevant = vect_used_by_reduction
669 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 670 those that are used by a reduction computation, and those that are
ff802fa1 671 (also) used by a regular computation. This allows us later on to
b8698a0f 672 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 673 order of the results that they produce does not have to be kept. */
ebfd146a 674
06066f92
IR
675 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
676 tmp_relevant = relevant;
677 switch (def_type)
ebfd146a 678 {
06066f92
IR
679 case vect_reduction_def:
680 switch (tmp_relevant)
681 {
682 case vect_unused_in_scope:
683 relevant = vect_used_by_reduction;
684 break;
685
686 case vect_used_by_reduction:
687 if (gimple_code (stmt) == GIMPLE_PHI)
688 break;
689 /* fall through */
690
691 default:
73fbfcad 692 if (dump_enabled_p ())
78c60e3d 693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 694 "unsupported use of reduction.\n");
06066f92
IR
695 return false;
696 }
697
b8698a0f 698 live_p = false;
06066f92 699 break;
b8698a0f 700
06066f92
IR
701 case vect_nested_cycle:
702 if (tmp_relevant != vect_unused_in_scope
703 && tmp_relevant != vect_used_in_outer_by_reduction
704 && tmp_relevant != vect_used_in_outer)
705 {
73fbfcad 706 if (dump_enabled_p ())
78c60e3d 707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 708 "unsupported use of nested cycle.\n");
7c5222ff 709
06066f92
IR
710 return false;
711 }
7c5222ff 712
b8698a0f
L
713 live_p = false;
714 break;
715
06066f92
IR
716 case vect_double_reduction_def:
717 if (tmp_relevant != vect_unused_in_scope
718 && tmp_relevant != vect_used_by_reduction)
719 {
73fbfcad 720 if (dump_enabled_p ())
78c60e3d 721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 722 "unsupported use of double reduction.\n");
7c5222ff 723
7c5222ff 724 return false;
06066f92
IR
725 }
726
727 live_p = false;
b8698a0f 728 break;
7c5222ff 729
06066f92
IR
730 default:
731 break;
7c5222ff 732 }
b8698a0f 733
aec7ae7d 734 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
735 {
736 /* Pattern statements are not inserted into the code, so
737 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
738 have to scan the RHS or function arguments instead. */
739 if (is_gimple_assign (stmt))
740 {
69d2aade
JJ
741 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
742 tree op = gimple_assign_rhs1 (stmt);
743
744 i = 1;
745 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
746 {
747 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 748 live_p, relevant, &worklist, false)
69d2aade 749 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 750 live_p, relevant, &worklist, false))
07687835 751 return false;
69d2aade
JJ
752 i = 2;
753 }
754 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 755 {
69d2aade 756 op = gimple_op (stmt, i);
9d5e7640 757 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 758 &worklist, false))
07687835 759 return false;
9d5e7640
IR
760 }
761 }
762 else if (is_gimple_call (stmt))
763 {
764 for (i = 0; i < gimple_call_num_args (stmt); i++)
765 {
766 tree arg = gimple_call_arg (stmt, i);
767 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 768 &worklist, false))
07687835 769 return false;
9d5e7640
IR
770 }
771 }
772 }
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 {
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 778 &worklist, false))
07687835 779 return false;
9d5e7640 780 }
aec7ae7d
JJ
781
782 if (STMT_VINFO_GATHER_P (stmt_vinfo))
783 {
784 tree off;
785 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
786 gcc_assert (decl);
787 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
788 &worklist, true))
07687835 789 return false;
aec7ae7d 790 }
ebfd146a
IR
791 } /* while worklist */
792
ebfd146a
IR
793 return true;
794}
795
796
b8698a0f 797/* Function vect_model_simple_cost.
ebfd146a 798
b8698a0f 799 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
800 single op. Right now, this does not account for multiple insns that could
801 be generated for the single vector op. We will handle that shortly. */
802
803void
b8698a0f 804vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
805 enum vect_def_type *dt,
806 stmt_vector_for_cost *prologue_cost_vec,
807 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
808{
809 int i;
92345349 810 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
811
812 /* The SLP costs were already calculated during SLP tree build. */
813 if (PURE_SLP_STMT (stmt_info))
814 return;
815
ebfd146a
IR
816 /* FORNOW: Assuming maximum 2 args per stmts. */
817 for (i = 0; i < 2; i++)
92345349
BS
818 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
819 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
820 stmt_info, 0, vect_prologue);
c3e7ee41
BS
821
822 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
823 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
824 stmt_info, 0, vect_body);
c3e7ee41 825
73fbfcad 826 if (dump_enabled_p ())
78c60e3d
SS
827 dump_printf_loc (MSG_NOTE, vect_location,
828 "vect_model_simple_cost: inside_cost = %d, "
e645e942 829 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
830}
831
832
8bd37302
BS
833/* Model cost for type demotion and promotion operations. PWR is normally
834 zero for single-step promotions and demotions. It will be one if
835 two-step promotion/demotion is required, and so on. Each additional
836 step doubles the number of instructions required. */
837
838static void
839vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
840 enum vect_def_type *dt, int pwr)
841{
842 int i, tmp;
92345349 843 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
845 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
846 void *target_cost_data;
8bd37302
BS
847
848 /* The SLP costs were already calculated during SLP tree build. */
849 if (PURE_SLP_STMT (stmt_info))
850 return;
851
c3e7ee41
BS
852 if (loop_vinfo)
853 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
854 else
855 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
856
8bd37302
BS
857 for (i = 0; i < pwr + 1; i++)
858 {
859 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
860 (i + 1) : i;
c3e7ee41 861 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
862 vec_promote_demote, stmt_info, 0,
863 vect_body);
8bd37302
BS
864 }
865
866 /* FORNOW: Assuming maximum 2 args per stmts. */
867 for (i = 0; i < 2; i++)
92345349
BS
868 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
869 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
870 stmt_info, 0, vect_prologue);
8bd37302 871
73fbfcad 872 if (dump_enabled_p ())
78c60e3d
SS
873 dump_printf_loc (MSG_NOTE, vect_location,
874 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 875 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
876}
877
0d0293ac 878/* Function vect_cost_group_size
b8698a0f 879
0d0293ac 880 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
881 load or store of a group, else return 1. This ensures that group size is
882 only returned once per group. */
883
884static int
0d0293ac 885vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 886{
e14c1050 887 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
888
889 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 890 return GROUP_SIZE (stmt_info);
ebfd146a
IR
891
892 return 1;
893}
894
895
896/* Function vect_model_store_cost
897
0d0293ac
MM
898 Models cost for stores. In the case of grouped accesses, one access
899 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
900
901void
b8698a0f 902vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 903 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
904 slp_tree slp_node,
905 stmt_vector_for_cost *prologue_cost_vec,
906 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
907{
908 int group_size;
92345349 909 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
910 struct data_reference *first_dr;
911 gimple first_stmt;
ebfd146a
IR
912
913 /* The SLP costs were already calculated during SLP tree build. */
914 if (PURE_SLP_STMT (stmt_info))
915 return;
916
8644a673 917 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
918 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
919 stmt_info, 0, vect_prologue);
ebfd146a 920
0d0293ac
MM
921 /* Grouped access? */
922 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
923 {
924 if (slp_node)
925 {
9771b263 926 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
927 group_size = 1;
928 }
929 else
930 {
e14c1050 931 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 932 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
933 }
934
935 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
936 }
0d0293ac 937 /* Not a grouped access. */
ebfd146a 938 else
720f5239
IR
939 {
940 group_size = 1;
941 first_dr = STMT_VINFO_DATA_REF (stmt_info);
942 }
ebfd146a 943
272c6793 944 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 945 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
946 access is instead being provided by a permute-and-store operation,
947 include the cost of the permutes. */
948 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
949 {
950 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
951
952 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
953 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
ebfd146a 955
73fbfcad 956 if (dump_enabled_p ())
78c60e3d 957 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 958 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 959 group_size);
ebfd146a
IR
960 }
961
962 /* Costs of the stores. */
92345349 963 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 964
73fbfcad 965 if (dump_enabled_p ())
78c60e3d
SS
966 dump_printf_loc (MSG_NOTE, vect_location,
967 "vect_model_store_cost: inside_cost = %d, "
e645e942 968 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
969}
970
971
720f5239
IR
972/* Calculate cost of DR's memory access. */
973void
974vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 975 unsigned int *inside_cost,
92345349 976 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
977{
978 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
979 gimple stmt = DR_STMT (dr);
980 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
981
982 switch (alignment_support_scheme)
983 {
984 case dr_aligned:
985 {
92345349
BS
986 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
987 vector_store, stmt_info, 0,
988 vect_body);
720f5239 989
73fbfcad 990 if (dump_enabled_p ())
78c60e3d 991 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 992 "vect_model_store_cost: aligned.\n");
720f5239
IR
993 break;
994 }
995
996 case dr_unaligned_supported:
997 {
720f5239 998 /* Here, we assign an additional cost for the unaligned store. */
92345349 999 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1000 unaligned_store, stmt_info,
92345349 1001 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1002 if (dump_enabled_p ())
78c60e3d
SS
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "vect_model_store_cost: unaligned supported by "
e645e942 1005 "hardware.\n");
720f5239
IR
1006 break;
1007 }
1008
38eec4c6
UW
1009 case dr_unaligned_unsupported:
1010 {
1011 *inside_cost = VECT_MAX_COST;
1012
73fbfcad 1013 if (dump_enabled_p ())
78c60e3d 1014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1015 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1016 break;
1017 }
1018
720f5239
IR
1019 default:
1020 gcc_unreachable ();
1021 }
1022}
1023
1024
ebfd146a
IR
1025/* Function vect_model_load_cost
1026
0d0293ac
MM
1027 Models cost for loads. In the case of grouped accesses, the last access
1028 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1029 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1030 access scheme chosen. */
1031
1032void
92345349
BS
1033vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1034 bool load_lanes_p, slp_tree slp_node,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1037{
1038 int group_size;
ebfd146a
IR
1039 gimple first_stmt;
1040 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1041 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1042
1043 /* The SLP costs were already calculated during SLP tree build. */
1044 if (PURE_SLP_STMT (stmt_info))
1045 return;
1046
0d0293ac 1047 /* Grouped accesses? */
e14c1050 1048 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1049 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1050 {
0d0293ac 1051 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1052 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1053 }
0d0293ac 1054 /* Not a grouped access. */
ebfd146a
IR
1055 else
1056 {
1057 group_size = 1;
1058 first_dr = dr;
1059 }
1060
272c6793 1061 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1062 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1063 access is instead being provided by a load-and-permute operation,
1064 include the cost of the permutes. */
1065 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1066 {
1067 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1068 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1069 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
ebfd146a 1071
73fbfcad 1072 if (dump_enabled_p ())
e645e942
TJ
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1075 group_size);
ebfd146a
IR
1076 }
1077
1078 /* The loads themselves. */
a82960aa
RG
1079 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1080 {
a21892ad
BS
1081 /* N scalar loads plus gathering them into a vector. */
1082 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1083 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1084 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1085 scalar_load, stmt_info, 0, vect_body);
1086 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1087 stmt_info, 0, vect_body);
a82960aa
RG
1088 }
1089 else
1090 vect_get_load_cost (first_dr, ncopies,
1091 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1092 || group_size > 1 || slp_node),
92345349
BS
1093 &inside_cost, &prologue_cost,
1094 prologue_cost_vec, body_cost_vec, true);
720f5239 1095
73fbfcad 1096 if (dump_enabled_p ())
78c60e3d
SS
1097 dump_printf_loc (MSG_NOTE, vect_location,
1098 "vect_model_load_cost: inside_cost = %d, "
e645e942 1099 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1100}
1101
1102
1103/* Calculate cost of DR's memory access. */
1104void
1105vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1106 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1107 unsigned int *prologue_cost,
1108 stmt_vector_for_cost *prologue_cost_vec,
1109 stmt_vector_for_cost *body_cost_vec,
1110 bool record_prologue_costs)
720f5239
IR
1111{
1112 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1113 gimple stmt = DR_STMT (dr);
1114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1115
1116 switch (alignment_support_scheme)
ebfd146a
IR
1117 {
1118 case dr_aligned:
1119 {
92345349
BS
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1121 stmt_info, 0, vect_body);
ebfd146a 1122
73fbfcad 1123 if (dump_enabled_p ())
78c60e3d 1124 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1125 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1126
1127 break;
1128 }
1129 case dr_unaligned_supported:
1130 {
720f5239 1131 /* Here, we assign an additional cost for the unaligned load. */
92345349 1132 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1133 unaligned_load, stmt_info,
92345349 1134 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1135
73fbfcad 1136 if (dump_enabled_p ())
78c60e3d
SS
1137 dump_printf_loc (MSG_NOTE, vect_location,
1138 "vect_model_load_cost: unaligned supported by "
e645e942 1139 "hardware.\n");
ebfd146a
IR
1140
1141 break;
1142 }
1143 case dr_explicit_realign:
1144 {
92345349
BS
1145 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1146 vector_load, stmt_info, 0, vect_body);
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1148 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1149
1150 /* FIXME: If the misalignment remains fixed across the iterations of
1151 the containing loop, the following cost should be added to the
92345349 1152 prologue costs. */
ebfd146a 1153 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1155 stmt_info, 0, vect_body);
ebfd146a 1156
73fbfcad 1157 if (dump_enabled_p ())
e645e942
TJ
1158 dump_printf_loc (MSG_NOTE, vect_location,
1159 "vect_model_load_cost: explicit realign\n");
8bd37302 1160
ebfd146a
IR
1161 break;
1162 }
1163 case dr_explicit_realign_optimized:
1164 {
73fbfcad 1165 if (dump_enabled_p ())
e645e942 1166 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1167 "vect_model_load_cost: unaligned software "
e645e942 1168 "pipelined.\n");
ebfd146a
IR
1169
1170 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1171 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1172 if this is an access in a group of loads, which provide grouped
ebfd146a 1173 access, then the above cost should only be considered for one
ff802fa1 1174 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1175 and a realignment op. */
1176
92345349 1177 if (add_realign_cost && record_prologue_costs)
ebfd146a 1178 {
92345349
BS
1179 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1180 vector_stmt, stmt_info,
1181 0, vect_prologue);
ebfd146a 1182 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1183 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1184 vector_stmt, stmt_info,
1185 0, vect_prologue);
ebfd146a
IR
1186 }
1187
92345349
BS
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1189 stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1191 stmt_info, 0, vect_body);
8bd37302 1192
73fbfcad 1193 if (dump_enabled_p ())
78c60e3d 1194 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1195 "vect_model_load_cost: explicit realign optimized"
1196 "\n");
8bd37302 1197
ebfd146a
IR
1198 break;
1199 }
1200
38eec4c6
UW
1201 case dr_unaligned_unsupported:
1202 {
1203 *inside_cost = VECT_MAX_COST;
1204
73fbfcad 1205 if (dump_enabled_p ())
78c60e3d 1206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1207 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1208 break;
1209 }
1210
ebfd146a
IR
1211 default:
1212 gcc_unreachable ();
1213 }
ebfd146a
IR
1214}
1215
418b7df3
RG
1216/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1217 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1218
418b7df3
RG
1219static void
1220vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1221{
ebfd146a 1222 if (gsi)
418b7df3 1223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1224 else
1225 {
418b7df3 1226 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1228
a70d6342
IR
1229 if (loop_vinfo)
1230 {
1231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1232 basic_block new_bb;
1233 edge pe;
a70d6342
IR
1234
1235 if (nested_in_vect_loop_p (loop, stmt))
1236 loop = loop->inner;
b8698a0f 1237
a70d6342 1238 pe = loop_preheader_edge (loop);
418b7df3 1239 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1240 gcc_assert (!new_bb);
1241 }
1242 else
1243 {
1244 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1245 basic_block bb;
1246 gimple_stmt_iterator gsi_bb_start;
1247
1248 gcc_assert (bb_vinfo);
1249 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1250 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1251 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1252 }
ebfd146a
IR
1253 }
1254
73fbfcad 1255 if (dump_enabled_p ())
ebfd146a 1256 {
78c60e3d
SS
1257 dump_printf_loc (MSG_NOTE, vect_location,
1258 "created new init_stmt: ");
1259 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1260 dump_printf (MSG_NOTE, "\n");
ebfd146a 1261 }
418b7df3
RG
1262}
1263
1264/* Function vect_init_vector.
ebfd146a 1265
5467ee52
RG
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at BSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
418b7df3
RG
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT. */
1273
1274tree
5467ee52 1275vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1276{
1277 tree new_var;
1278 gimple init_stmt;
1279 tree vec_oprnd;
1280 tree new_temp;
1281
5467ee52
RG
1282 if (TREE_CODE (type) == VECTOR_TYPE
1283 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1284 {
5467ee52 1285 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1286 {
5467ee52
RG
1287 if (CONSTANT_CLASS_P (val))
1288 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1289 else
1290 {
83d5977e 1291 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1292 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1293 new_temp, val,
418b7df3 1294 NULL_TREE);
418b7df3 1295 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1296 val = new_temp;
418b7df3
RG
1297 }
1298 }
5467ee52 1299 val = build_vector_from_val (type, val);
418b7df3
RG
1300 }
1301
5467ee52 1302 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1303 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1304 new_temp = make_ssa_name (new_var, init_stmt);
1305 gimple_assign_set_lhs (init_stmt, new_temp);
1306 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1307 vec_oprnd = gimple_assign_lhs (init_stmt);
1308 return vec_oprnd;
1309}
1310
a70d6342 1311
ebfd146a
IR
1312/* Function vect_get_vec_def_for_operand.
1313
ff802fa1 1314 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1315 used in the vectorized stmt for STMT.
1316
1317 In the case that OP is an SSA_NAME which is defined in the loop, then
1318 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1319
1320 In case OP is an invariant or constant, a new stmt that creates a vector def
1321 needs to be introduced. */
1322
1323tree
1324vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1325{
1326 tree vec_oprnd;
1327 gimple vec_stmt;
1328 gimple def_stmt;
1329 stmt_vec_info def_stmt_info = NULL;
1330 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1331 unsigned int nunits;
ebfd146a 1332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1333 tree def;
ebfd146a
IR
1334 enum vect_def_type dt;
1335 bool is_simple_use;
1336 tree vector_type;
1337
73fbfcad 1338 if (dump_enabled_p ())
ebfd146a 1339 {
78c60e3d
SS
1340 dump_printf_loc (MSG_NOTE, vect_location,
1341 "vect_get_vec_def_for_operand: ");
1342 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1343 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1344 }
1345
24ee1384
IR
1346 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1347 &def_stmt, &def, &dt);
ebfd146a 1348 gcc_assert (is_simple_use);
73fbfcad 1349 if (dump_enabled_p ())
ebfd146a 1350 {
78c60e3d 1351 int loc_printed = 0;
ebfd146a
IR
1352 if (def)
1353 {
78c60e3d
SS
1354 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1355 loc_printed = 1;
1356 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1357 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1358 }
1359 if (def_stmt)
1360 {
78c60e3d
SS
1361 if (loc_printed)
1362 dump_printf (MSG_NOTE, " def_stmt = ");
1363 else
1364 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1366 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1367 }
1368 }
1369
1370 switch (dt)
1371 {
1372 /* Case 1: operand is a constant. */
1373 case vect_constant_def:
1374 {
7569a6cc
RG
1375 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1376 gcc_assert (vector_type);
9dc3f7de 1377 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1378
b8698a0f 1379 if (scalar_def)
ebfd146a
IR
1380 *scalar_def = op;
1381
1382 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1383 if (dump_enabled_p ())
78c60e3d 1384 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1385 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1386
418b7df3 1387 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1388 }
1389
1390 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1391 case vect_external_def:
ebfd146a
IR
1392 {
1393 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1394 gcc_assert (vector_type);
ebfd146a 1395
b8698a0f 1396 if (scalar_def)
ebfd146a
IR
1397 *scalar_def = def;
1398
1399 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1400 if (dump_enabled_p ())
e645e942 1401 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1402
418b7df3 1403 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1404 }
1405
1406 /* Case 3: operand is defined inside the loop. */
8644a673 1407 case vect_internal_def:
ebfd146a 1408 {
b8698a0f 1409 if (scalar_def)
ebfd146a
IR
1410 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1411
1412 /* Get the def from the vectorized stmt. */
1413 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1414
ebfd146a 1415 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1416 /* Get vectorized pattern statement. */
1417 if (!vec_stmt
1418 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1419 && !STMT_VINFO_RELEVANT (def_stmt_info))
1420 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1421 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1422 gcc_assert (vec_stmt);
1423 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1424 vec_oprnd = PHI_RESULT (vec_stmt);
1425 else if (is_gimple_call (vec_stmt))
1426 vec_oprnd = gimple_call_lhs (vec_stmt);
1427 else
1428 vec_oprnd = gimple_assign_lhs (vec_stmt);
1429 return vec_oprnd;
1430 }
1431
1432 /* Case 4: operand is defined by a loop header phi - reduction */
1433 case vect_reduction_def:
06066f92 1434 case vect_double_reduction_def:
7c5222ff 1435 case vect_nested_cycle:
ebfd146a
IR
1436 {
1437 struct loop *loop;
1438
1439 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1440 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1441
1442 /* Get the def before the loop */
1443 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1444 return get_initial_def_for_reduction (stmt, op, scalar_def);
1445 }
1446
1447 /* Case 5: operand is defined by loop-header phi - induction. */
1448 case vect_induction_def:
1449 {
1450 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1451
1452 /* Get the def from the vectorized stmt. */
1453 def_stmt_info = vinfo_for_stmt (def_stmt);
1454 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1455 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1456 vec_oprnd = PHI_RESULT (vec_stmt);
1457 else
1458 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1459 return vec_oprnd;
1460 }
1461
1462 default:
1463 gcc_unreachable ();
1464 }
1465}
1466
1467
1468/* Function vect_get_vec_def_for_stmt_copy
1469
ff802fa1 1470 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1471 vectorized stmt to be created (by the caller to this function) is a "copy"
1472 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1473 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1474 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1475 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1476 DT is the type of the vector def VEC_OPRND.
1477
1478 Context:
1479 In case the vectorization factor (VF) is bigger than the number
1480 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1481 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1482 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1483 smallest data-type determines the VF, and as a result, when vectorizing
1484 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1485 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1486 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1487 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1488 which VF=16 and nunits=4, so the number of copies required is 4):
1489
1490 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1491
ebfd146a
IR
1492 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1493 VS1.1: vx.1 = memref1 VS1.2
1494 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1495 VS1.3: vx.3 = memref3
ebfd146a
IR
1496
1497 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1498 VSnew.1: vz1 = vx.1 + ... VSnew.2
1499 VSnew.2: vz2 = vx.2 + ... VSnew.3
1500 VSnew.3: vz3 = vx.3 + ...
1501
1502 The vectorization of S1 is explained in vectorizable_load.
1503 The vectorization of S2:
b8698a0f
L
1504 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1505 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1506 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1507 returns the vector-def 'vx.0'.
1508
b8698a0f
L
1509 To create the remaining copies of the vector-stmt (VSnew.j), this
1510 function is called to get the relevant vector-def for each operand. It is
1511 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1512 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1513
b8698a0f
L
1514 For example, to obtain the vector-def 'vx.1' in order to create the
1515 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1516 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1517 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1518 and return its def ('vx.1').
1519 Overall, to create the above sequence this function will be called 3 times:
1520 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1521 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1522 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1523
1524tree
1525vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1526{
1527 gimple vec_stmt_for_operand;
1528 stmt_vec_info def_stmt_info;
1529
1530 /* Do nothing; can reuse same def. */
8644a673 1531 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1532 return vec_oprnd;
1533
1534 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1535 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1536 gcc_assert (def_stmt_info);
1537 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1538 gcc_assert (vec_stmt_for_operand);
1539 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1540 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1541 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1542 else
1543 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1544 return vec_oprnd;
1545}
1546
1547
1548/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1549 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1550
1551static void
b8698a0f 1552vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1553 vec<tree> *vec_oprnds0,
1554 vec<tree> *vec_oprnds1)
ebfd146a 1555{
9771b263 1556 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1557
1558 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1559 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1560
9771b263 1561 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1562 {
9771b263 1563 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1564 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1565 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1566 }
1567}
1568
1569
d092494c
IR
1570/* Get vectorized definitions for OP0 and OP1.
1571 REDUC_INDEX is the index of reduction operand in case of reduction,
1572 and -1 otherwise. */
ebfd146a 1573
d092494c 1574void
ebfd146a 1575vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1576 vec<tree> *vec_oprnds0,
1577 vec<tree> *vec_oprnds1,
d092494c 1578 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1579{
1580 if (slp_node)
d092494c
IR
1581 {
1582 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1583 auto_vec<tree> ops (nops);
1584 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1585
9771b263 1586 ops.quick_push (op0);
d092494c 1587 if (op1)
9771b263 1588 ops.quick_push (op1);
d092494c
IR
1589
1590 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1591
37b5ec8f 1592 *vec_oprnds0 = vec_defs[0];
d092494c 1593 if (op1)
37b5ec8f 1594 *vec_oprnds1 = vec_defs[1];
d092494c 1595 }
ebfd146a
IR
1596 else
1597 {
1598 tree vec_oprnd;
1599
9771b263 1600 vec_oprnds0->create (1);
b8698a0f 1601 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1602 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1603
1604 if (op1)
1605 {
9771b263 1606 vec_oprnds1->create (1);
b8698a0f 1607 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1608 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1609 }
1610 }
1611}
1612
1613
1614/* Function vect_finish_stmt_generation.
1615
1616 Insert a new stmt. */
1617
1618void
1619vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1620 gimple_stmt_iterator *gsi)
1621{
1622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1625
1626 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1627
54e8e2c3
RG
1628 if (!gsi_end_p (*gsi)
1629 && gimple_has_mem_ops (vec_stmt))
1630 {
1631 gimple at_stmt = gsi_stmt (*gsi);
1632 tree vuse = gimple_vuse (at_stmt);
1633 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1634 {
1635 tree vdef = gimple_vdef (at_stmt);
1636 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1637 /* If we have an SSA vuse and insert a store, update virtual
1638 SSA form to avoid triggering the renamer. Do so only
1639 if we can easily see all uses - which is what almost always
1640 happens with the way vectorized stmts are inserted. */
1641 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1642 && ((is_gimple_assign (vec_stmt)
1643 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1644 || (is_gimple_call (vec_stmt)
1645 && !(gimple_call_flags (vec_stmt)
1646 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1647 {
1648 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1649 gimple_set_vdef (vec_stmt, new_vdef);
1650 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1651 }
1652 }
1653 }
ebfd146a
IR
1654 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1655
b8698a0f 1656 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1657 bb_vinfo));
ebfd146a 1658
73fbfcad 1659 if (dump_enabled_p ())
ebfd146a 1660 {
78c60e3d
SS
1661 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1663 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1664 }
1665
ad885386 1666 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1667}
1668
1669/* Checks if CALL can be vectorized in type VECTYPE. Returns
1670 a function declaration if the target has a vectorized version
1671 of the function, or NULL_TREE if the function cannot be vectorized. */
1672
1673tree
1674vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1675{
1676 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1677
1678 /* We only handle functions that do not read or clobber memory -- i.e.
1679 const or novops ones. */
1680 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1681 return NULL_TREE;
1682
1683 if (!fndecl
1684 || TREE_CODE (fndecl) != FUNCTION_DECL
1685 || !DECL_BUILT_IN (fndecl))
1686 return NULL_TREE;
1687
62f7fd21 1688 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1689 vectype_in);
1690}
1691
1692/* Function vectorizable_call.
1693
b8698a0f
L
1694 Check if STMT performs a function call that can be vectorized.
1695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1698
1699static bool
190c2236
JJ
1700vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1701 slp_tree slp_node)
ebfd146a
IR
1702{
1703 tree vec_dest;
1704 tree scalar_dest;
1705 tree op, type;
1706 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1708 tree vectype_out, vectype_in;
1709 int nunits_in;
1710 int nunits_out;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1713 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1714 gimple def_stmt;
0502fb85
UB
1715 enum vect_def_type dt[3]
1716 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1717 gimple new_stmt = NULL;
ebfd146a 1718 int ncopies, j;
6e1aa848 1719 vec<tree> vargs = vNULL;
ebfd146a
IR
1720 enum { NARROW, NONE, WIDEN } modifier;
1721 size_t i, nargs;
9d5e7640 1722 tree lhs;
ebfd146a 1723
190c2236 1724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1725 return false;
1726
8644a673 1727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1728 return false;
1729
ebfd146a
IR
1730 /* Is STMT a vectorizable call? */
1731 if (!is_gimple_call (stmt))
1732 return false;
1733
1734 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1735 return false;
1736
822ba6d7 1737 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1738 return false;
1739
b690cc0f
RG
1740 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1741
ebfd146a
IR
1742 /* Process function arguments. */
1743 rhs_type = NULL_TREE;
b690cc0f 1744 vectype_in = NULL_TREE;
ebfd146a
IR
1745 nargs = gimple_call_num_args (stmt);
1746
1b1562a5
MM
1747 /* Bail out if the function has more than three arguments, we do not have
1748 interesting builtin functions to vectorize with more than two arguments
1749 except for fma. No arguments is also not good. */
1750 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1751 return false;
1752
74bf76ed
JJ
1753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1754 if (gimple_call_internal_p (stmt)
1755 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1756 {
1757 nargs = 0;
1758 rhs_type = unsigned_type_node;
1759 }
1760
ebfd146a
IR
1761 for (i = 0; i < nargs; i++)
1762 {
b690cc0f
RG
1763 tree opvectype;
1764
ebfd146a
IR
1765 op = gimple_call_arg (stmt, i);
1766
1767 /* We can only handle calls with arguments of the same type. */
1768 if (rhs_type
8533c9d8 1769 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1770 {
73fbfcad 1771 if (dump_enabled_p ())
78c60e3d 1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1773 "argument types differ.\n");
ebfd146a
IR
1774 return false;
1775 }
b690cc0f
RG
1776 if (!rhs_type)
1777 rhs_type = TREE_TYPE (op);
ebfd146a 1778
24ee1384 1779 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1780 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1781 {
73fbfcad 1782 if (dump_enabled_p ())
78c60e3d 1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1784 "use not simple.\n");
ebfd146a
IR
1785 return false;
1786 }
ebfd146a 1787
b690cc0f
RG
1788 if (!vectype_in)
1789 vectype_in = opvectype;
1790 else if (opvectype
1791 && opvectype != vectype_in)
1792 {
73fbfcad 1793 if (dump_enabled_p ())
78c60e3d 1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1795 "argument vector types differ.\n");
b690cc0f
RG
1796 return false;
1797 }
1798 }
1799 /* If all arguments are external or constant defs use a vector type with
1800 the same size as the output vector type. */
ebfd146a 1801 if (!vectype_in)
b690cc0f 1802 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1803 if (vec_stmt)
1804 gcc_assert (vectype_in);
1805 if (!vectype_in)
1806 {
73fbfcad 1807 if (dump_enabled_p ())
7d8930a0 1808 {
78c60e3d
SS
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "no vectype for scalar type ");
1811 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 1812 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
1813 }
1814
1815 return false;
1816 }
ebfd146a
IR
1817
1818 /* FORNOW */
b690cc0f
RG
1819 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1820 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1821 if (nunits_in == nunits_out / 2)
1822 modifier = NARROW;
1823 else if (nunits_out == nunits_in)
1824 modifier = NONE;
1825 else if (nunits_out == nunits_in / 2)
1826 modifier = WIDEN;
1827 else
1828 return false;
1829
1830 /* For now, we only vectorize functions if a target specific builtin
1831 is available. TODO -- in some cases, it might be profitable to
1832 insert the calls for pieces of the vector, in order to be able
1833 to vectorize other operations in the loop. */
1834 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1835 if (fndecl == NULL_TREE)
1836 {
74bf76ed
JJ
1837 if (gimple_call_internal_p (stmt)
1838 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1839 && !slp_node
1840 && loop_vinfo
1841 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1842 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1843 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1844 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1845 {
1846 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1847 { 0, 1, 2, ... vf - 1 } vector. */
1848 gcc_assert (nargs == 0);
1849 }
1850 else
1851 {
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1854 "function is not vectorizable.\n");
74bf76ed
JJ
1855 return false;
1856 }
ebfd146a
IR
1857 }
1858
5006671f 1859 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1860
190c2236
JJ
1861 if (slp_node || PURE_SLP_STMT (stmt_info))
1862 ncopies = 1;
1863 else if (modifier == NARROW)
ebfd146a
IR
1864 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1865 else
1866 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1867
1868 /* Sanity check: make sure that at least one copy of the vectorized stmt
1869 needs to be generated. */
1870 gcc_assert (ncopies >= 1);
1871
1872 if (!vec_stmt) /* transformation not required. */
1873 {
1874 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1875 if (dump_enabled_p ())
e645e942
TJ
1876 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1877 "\n");
c3e7ee41 1878 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1879 return true;
1880 }
1881
1882 /** Transform. **/
1883
73fbfcad 1884 if (dump_enabled_p ())
e645e942 1885 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
1886
1887 /* Handle def. */
1888 scalar_dest = gimple_call_lhs (stmt);
1889 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1890
1891 prev_stmt_info = NULL;
1892 switch (modifier)
1893 {
1894 case NONE:
1895 for (j = 0; j < ncopies; ++j)
1896 {
1897 /* Build argument list for the vectorized call. */
1898 if (j == 0)
9771b263 1899 vargs.create (nargs);
ebfd146a 1900 else
9771b263 1901 vargs.truncate (0);
ebfd146a 1902
190c2236
JJ
1903 if (slp_node)
1904 {
ef062b13 1905 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 1906 vec<tree> vec_oprnds0;
190c2236
JJ
1907
1908 for (i = 0; i < nargs; i++)
9771b263 1909 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1910 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1911 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1912
1913 /* Arguments are ready. Create the new vector stmt. */
9771b263 1914 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1915 {
1916 size_t k;
1917 for (k = 0; k < nargs; k++)
1918 {
37b5ec8f 1919 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1920 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1921 }
1922 new_stmt = gimple_build_call_vec (fndecl, vargs);
1923 new_temp = make_ssa_name (vec_dest, new_stmt);
1924 gimple_call_set_lhs (new_stmt, new_temp);
1925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1926 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1927 }
1928
1929 for (i = 0; i < nargs; i++)
1930 {
37b5ec8f 1931 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1932 vec_oprndsi.release ();
190c2236 1933 }
190c2236
JJ
1934 continue;
1935 }
1936
ebfd146a
IR
1937 for (i = 0; i < nargs; i++)
1938 {
1939 op = gimple_call_arg (stmt, i);
1940 if (j == 0)
1941 vec_oprnd0
1942 = vect_get_vec_def_for_operand (op, stmt, NULL);
1943 else
63827fb8
IR
1944 {
1945 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1946 vec_oprnd0
1947 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1948 }
ebfd146a 1949
9771b263 1950 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1951 }
1952
74bf76ed
JJ
1953 if (gimple_call_internal_p (stmt)
1954 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1955 {
1956 tree *v = XALLOCAVEC (tree, nunits_out);
1957 int k;
1958 for (k = 0; k < nunits_out; ++k)
1959 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1960 tree cst = build_vector (vectype_out, v);
1961 tree new_var
1962 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1963 gimple init_stmt = gimple_build_assign (new_var, cst);
1964 new_temp = make_ssa_name (new_var, init_stmt);
1965 gimple_assign_set_lhs (init_stmt, new_temp);
1966 vect_init_vector_1 (stmt, init_stmt, NULL);
1967 new_temp = make_ssa_name (vec_dest, NULL);
1968 new_stmt = gimple_build_assign (new_temp,
1969 gimple_assign_lhs (init_stmt));
1970 }
1971 else
1972 {
1973 new_stmt = gimple_build_call_vec (fndecl, vargs);
1974 new_temp = make_ssa_name (vec_dest, new_stmt);
1975 gimple_call_set_lhs (new_stmt, new_temp);
1976 }
ebfd146a
IR
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1978
1979 if (j == 0)
1980 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1981 else
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1983
1984 prev_stmt_info = vinfo_for_stmt (new_stmt);
1985 }
1986
1987 break;
1988
1989 case NARROW:
1990 for (j = 0; j < ncopies; ++j)
1991 {
1992 /* Build argument list for the vectorized call. */
1993 if (j == 0)
9771b263 1994 vargs.create (nargs * 2);
ebfd146a 1995 else
9771b263 1996 vargs.truncate (0);
ebfd146a 1997
190c2236
JJ
1998 if (slp_node)
1999 {
ef062b13 2000 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2001 vec<tree> vec_oprnds0;
190c2236
JJ
2002
2003 for (i = 0; i < nargs; i++)
9771b263 2004 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2005 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2006 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2007
2008 /* Arguments are ready. Create the new vector stmt. */
9771b263 2009 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2010 {
2011 size_t k;
9771b263 2012 vargs.truncate (0);
190c2236
JJ
2013 for (k = 0; k < nargs; k++)
2014 {
37b5ec8f 2015 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2016 vargs.quick_push (vec_oprndsk[i]);
2017 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2018 }
2019 new_stmt = gimple_build_call_vec (fndecl, vargs);
2020 new_temp = make_ssa_name (vec_dest, new_stmt);
2021 gimple_call_set_lhs (new_stmt, new_temp);
2022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2023 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2024 }
2025
2026 for (i = 0; i < nargs; i++)
2027 {
37b5ec8f 2028 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2029 vec_oprndsi.release ();
190c2236 2030 }
190c2236
JJ
2031 continue;
2032 }
2033
ebfd146a
IR
2034 for (i = 0; i < nargs; i++)
2035 {
2036 op = gimple_call_arg (stmt, i);
2037 if (j == 0)
2038 {
2039 vec_oprnd0
2040 = vect_get_vec_def_for_operand (op, stmt, NULL);
2041 vec_oprnd1
63827fb8 2042 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2043 }
2044 else
2045 {
336ecb65 2046 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2047 vec_oprnd0
63827fb8 2048 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2049 vec_oprnd1
63827fb8 2050 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2051 }
2052
9771b263
DN
2053 vargs.quick_push (vec_oprnd0);
2054 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2055 }
2056
2057 new_stmt = gimple_build_call_vec (fndecl, vargs);
2058 new_temp = make_ssa_name (vec_dest, new_stmt);
2059 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2061
2062 if (j == 0)
2063 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2064 else
2065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2066
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2068 }
2069
2070 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2071
2072 break;
2073
2074 case WIDEN:
2075 /* No current target implements this case. */
2076 return false;
2077 }
2078
9771b263 2079 vargs.release ();
ebfd146a
IR
2080
2081 /* Update the exception handling table with the vector stmt if necessary. */
2082 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2083 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2084
2085 /* The call in STMT might prevent it from being removed in dce.
2086 We however cannot remove it here, due to the way the ssa name
2087 it defines is mapped to the new definition. So just replace
2088 rhs of the statement with something harmless. */
2089
dd34c087
JJ
2090 if (slp_node)
2091 return true;
2092
ebfd146a 2093 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2094 if (is_pattern_stmt_p (stmt_info))
2095 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2096 else
2097 lhs = gimple_call_lhs (stmt);
2098 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2099 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2100 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2101 STMT_VINFO_STMT (stmt_info) = new_stmt;
2102 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2103
2104 return true;
2105}
2106
2107
2108/* Function vect_gen_widened_results_half
2109
2110 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2111 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2112 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2113 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2114 needs to be created (DECL is a function-decl of a target-builtin).
2115 STMT is the original scalar stmt that we are vectorizing. */
2116
2117static gimple
2118vect_gen_widened_results_half (enum tree_code code,
2119 tree decl,
2120 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2121 tree vec_dest, gimple_stmt_iterator *gsi,
2122 gimple stmt)
b8698a0f 2123{
ebfd146a 2124 gimple new_stmt;
b8698a0f
L
2125 tree new_temp;
2126
2127 /* Generate half of the widened result: */
2128 if (code == CALL_EXPR)
2129 {
2130 /* Target specific support */
ebfd146a
IR
2131 if (op_type == binary_op)
2132 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2133 else
2134 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2135 new_temp = make_ssa_name (vec_dest, new_stmt);
2136 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2137 }
2138 else
ebfd146a 2139 {
b8698a0f
L
2140 /* Generic support */
2141 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2142 if (op_type != binary_op)
2143 vec_oprnd1 = NULL;
2144 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2145 vec_oprnd1);
2146 new_temp = make_ssa_name (vec_dest, new_stmt);
2147 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2148 }
ebfd146a
IR
2149 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2150
ebfd146a
IR
2151 return new_stmt;
2152}
2153
4a00c761
JJ
2154
2155/* Get vectorized definitions for loop-based vectorization. For the first
2156 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2157 scalar operand), and for the rest we get a copy with
2158 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2159 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2160 The vectors are collected into VEC_OPRNDS. */
2161
2162static void
2163vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2164 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2165{
2166 tree vec_oprnd;
2167
2168 /* Get first vector operand. */
2169 /* All the vector operands except the very first one (that is scalar oprnd)
2170 are stmt copies. */
2171 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2172 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2173 else
2174 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2175
9771b263 2176 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2177
2178 /* Get second vector operand. */
2179 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2180 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2181
2182 *oprnd = vec_oprnd;
2183
2184 /* For conversion in multiple steps, continue to get operands
2185 recursively. */
2186 if (multi_step_cvt)
2187 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2188}
2189
2190
2191/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2192 For multi-step conversions store the resulting vectors and call the function
2193 recursively. */
2194
2195static void
9771b263 2196vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2197 int multi_step_cvt, gimple stmt,
9771b263 2198 vec<tree> vec_dsts,
4a00c761
JJ
2199 gimple_stmt_iterator *gsi,
2200 slp_tree slp_node, enum tree_code code,
2201 stmt_vec_info *prev_stmt_info)
2202{
2203 unsigned int i;
2204 tree vop0, vop1, new_tmp, vec_dest;
2205 gimple new_stmt;
2206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2207
9771b263 2208 vec_dest = vec_dsts.pop ();
4a00c761 2209
9771b263 2210 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2211 {
2212 /* Create demotion operation. */
9771b263
DN
2213 vop0 = (*vec_oprnds)[i];
2214 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2215 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2216 new_tmp = make_ssa_name (vec_dest, new_stmt);
2217 gimple_assign_set_lhs (new_stmt, new_tmp);
2218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2219
2220 if (multi_step_cvt)
2221 /* Store the resulting vector for next recursive call. */
9771b263 2222 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2223 else
2224 {
2225 /* This is the last step of the conversion sequence. Store the
2226 vectors in SLP_NODE or in vector info of the scalar statement
2227 (or in STMT_VINFO_RELATED_STMT chain). */
2228 if (slp_node)
9771b263 2229 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2230 else
2231 {
2232 if (!*prev_stmt_info)
2233 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2234 else
2235 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2236
2237 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2238 }
2239 }
2240 }
2241
2242 /* For multi-step demotion operations we first generate demotion operations
2243 from the source type to the intermediate types, and then combine the
2244 results (stored in VEC_OPRNDS) in demotion operation to the destination
2245 type. */
2246 if (multi_step_cvt)
2247 {
2248 /* At each level of recursion we have half of the operands we had at the
2249 previous level. */
9771b263 2250 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2251 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2252 stmt, vec_dsts, gsi, slp_node,
2253 VEC_PACK_TRUNC_EXPR,
2254 prev_stmt_info);
2255 }
2256
9771b263 2257 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2258}
2259
2260
2261/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2262 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2263 the resulting vectors and call the function recursively. */
2264
2265static void
9771b263
DN
2266vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2267 vec<tree> *vec_oprnds1,
4a00c761
JJ
2268 gimple stmt, tree vec_dest,
2269 gimple_stmt_iterator *gsi,
2270 enum tree_code code1,
2271 enum tree_code code2, tree decl1,
2272 tree decl2, int op_type)
2273{
2274 int i;
2275 tree vop0, vop1, new_tmp1, new_tmp2;
2276 gimple new_stmt1, new_stmt2;
6e1aa848 2277 vec<tree> vec_tmp = vNULL;
4a00c761 2278
9771b263
DN
2279 vec_tmp.create (vec_oprnds0->length () * 2);
2280 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2281 {
2282 if (op_type == binary_op)
9771b263 2283 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2284 else
2285 vop1 = NULL_TREE;
2286
2287 /* Generate the two halves of promotion operation. */
2288 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2289 op_type, vec_dest, gsi, stmt);
2290 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2291 op_type, vec_dest, gsi, stmt);
2292 if (is_gimple_call (new_stmt1))
2293 {
2294 new_tmp1 = gimple_call_lhs (new_stmt1);
2295 new_tmp2 = gimple_call_lhs (new_stmt2);
2296 }
2297 else
2298 {
2299 new_tmp1 = gimple_assign_lhs (new_stmt1);
2300 new_tmp2 = gimple_assign_lhs (new_stmt2);
2301 }
2302
2303 /* Store the results for the next step. */
9771b263
DN
2304 vec_tmp.quick_push (new_tmp1);
2305 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2306 }
2307
689eaba3 2308 vec_oprnds0->release ();
4a00c761
JJ
2309 *vec_oprnds0 = vec_tmp;
2310}
2311
2312
b8698a0f
L
2313/* Check if STMT performs a conversion operation, that can be vectorized.
2314 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2315 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2316 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2317
2318static bool
2319vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2320 gimple *vec_stmt, slp_tree slp_node)
2321{
2322 tree vec_dest;
2323 tree scalar_dest;
4a00c761 2324 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2325 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2327 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2328 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2329 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2330 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2331 tree new_temp;
2332 tree def;
2333 gimple def_stmt;
2334 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2335 gimple new_stmt = NULL;
2336 stmt_vec_info prev_stmt_info;
2337 int nunits_in;
2338 int nunits_out;
2339 tree vectype_out, vectype_in;
4a00c761
JJ
2340 int ncopies, i, j;
2341 tree lhs_type, rhs_type;
ebfd146a 2342 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2343 vec<tree> vec_oprnds0 = vNULL;
2344 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2345 tree vop0;
4a00c761
JJ
2346 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2347 int multi_step_cvt = 0;
6e1aa848
DN
2348 vec<tree> vec_dsts = vNULL;
2349 vec<tree> interm_types = vNULL;
4a00c761
JJ
2350 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2351 int op_type;
2352 enum machine_mode rhs_mode;
2353 unsigned short fltsz;
ebfd146a
IR
2354
2355 /* Is STMT a vectorizable conversion? */
2356
4a00c761 2357 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2358 return false;
2359
8644a673 2360 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2361 return false;
2362
2363 if (!is_gimple_assign (stmt))
2364 return false;
2365
2366 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2367 return false;
2368
2369 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2370 if (!CONVERT_EXPR_CODE_P (code)
2371 && code != FIX_TRUNC_EXPR
2372 && code != FLOAT_EXPR
2373 && code != WIDEN_MULT_EXPR
2374 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2375 return false;
2376
4a00c761
JJ
2377 op_type = TREE_CODE_LENGTH (code);
2378
ebfd146a 2379 /* Check types of lhs and rhs. */
b690cc0f 2380 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2381 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2382 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2383
ebfd146a
IR
2384 op0 = gimple_assign_rhs1 (stmt);
2385 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2386
2387 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2388 && !((INTEGRAL_TYPE_P (lhs_type)
2389 && INTEGRAL_TYPE_P (rhs_type))
2390 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2391 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2392 return false;
2393
2394 if ((INTEGRAL_TYPE_P (lhs_type)
2395 && (TYPE_PRECISION (lhs_type)
2396 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2397 || (INTEGRAL_TYPE_P (rhs_type)
2398 && (TYPE_PRECISION (rhs_type)
2399 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2400 {
73fbfcad 2401 if (dump_enabled_p ())
78c60e3d 2402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2403 "type conversion to/from bit-precision unsupported."
2404 "\n");
4a00c761
JJ
2405 return false;
2406 }
2407
b690cc0f 2408 /* Check the operands of the operation. */
24ee1384 2409 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2410 &def_stmt, &def, &dt[0], &vectype_in))
2411 {
73fbfcad 2412 if (dump_enabled_p ())
78c60e3d 2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2414 "use not simple.\n");
b690cc0f
RG
2415 return false;
2416 }
4a00c761
JJ
2417 if (op_type == binary_op)
2418 {
2419 bool ok;
2420
2421 op1 = gimple_assign_rhs2 (stmt);
2422 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2423 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2424 OP1. */
2425 if (CONSTANT_CLASS_P (op0))
f5709183 2426 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2427 &def_stmt, &def, &dt[1], &vectype_in);
2428 else
f5709183 2429 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2430 &def, &dt[1]);
4a00c761
JJ
2431
2432 if (!ok)
2433 {
73fbfcad 2434 if (dump_enabled_p ())
78c60e3d 2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2436 "use not simple.\n");
4a00c761
JJ
2437 return false;
2438 }
2439 }
2440
b690cc0f
RG
2441 /* If op0 is an external or constant defs use a vector type of
2442 the same size as the output vector type. */
ebfd146a 2443 if (!vectype_in)
b690cc0f 2444 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2445 if (vec_stmt)
2446 gcc_assert (vectype_in);
2447 if (!vectype_in)
2448 {
73fbfcad 2449 if (dump_enabled_p ())
4a00c761 2450 {
78c60e3d
SS
2451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2452 "no vectype for scalar type ");
2453 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2454 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 2455 }
7d8930a0
IR
2456
2457 return false;
2458 }
ebfd146a 2459
b690cc0f
RG
2460 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2461 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2462 if (nunits_in < nunits_out)
ebfd146a
IR
2463 modifier = NARROW;
2464 else if (nunits_out == nunits_in)
2465 modifier = NONE;
ebfd146a 2466 else
4a00c761 2467 modifier = WIDEN;
ebfd146a 2468
ff802fa1
IR
2469 /* Multiple types in SLP are handled by creating the appropriate number of
2470 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2471 case of SLP. */
437f4a00 2472 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2473 ncopies = 1;
4a00c761
JJ
2474 else if (modifier == NARROW)
2475 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2476 else
2477 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2478
ebfd146a
IR
2479 /* Sanity check: make sure that at least one copy of the vectorized stmt
2480 needs to be generated. */
2481 gcc_assert (ncopies >= 1);
2482
ebfd146a 2483 /* Supportable by target? */
4a00c761 2484 switch (modifier)
ebfd146a 2485 {
4a00c761
JJ
2486 case NONE:
2487 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2488 return false;
2489 if (supportable_convert_operation (code, vectype_out, vectype_in,
2490 &decl1, &code1))
2491 break;
2492 /* FALLTHRU */
2493 unsupported:
73fbfcad 2494 if (dump_enabled_p ())
78c60e3d 2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2496 "conversion not supported by target.\n");
ebfd146a 2497 return false;
ebfd146a 2498
4a00c761
JJ
2499 case WIDEN:
2500 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2501 &code1, &code2, &multi_step_cvt,
2502 &interm_types))
4a00c761
JJ
2503 {
2504 /* Binary widening operation can only be supported directly by the
2505 architecture. */
2506 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2507 break;
2508 }
2509
2510 if (code != FLOAT_EXPR
2511 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2512 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2513 goto unsupported;
2514
2515 rhs_mode = TYPE_MODE (rhs_type);
2516 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2517 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2518 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2519 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2520 {
2521 cvt_type
2522 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2523 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2524 if (cvt_type == NULL_TREE)
2525 goto unsupported;
2526
2527 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2528 {
2529 if (!supportable_convert_operation (code, vectype_out,
2530 cvt_type, &decl1, &codecvt1))
2531 goto unsupported;
2532 }
2533 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2534 cvt_type, &codecvt1,
2535 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2536 &interm_types))
2537 continue;
2538 else
2539 gcc_assert (multi_step_cvt == 0);
2540
2541 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2542 vectype_in, &code1, &code2,
2543 &multi_step_cvt, &interm_types))
4a00c761
JJ
2544 break;
2545 }
2546
2547 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2548 goto unsupported;
2549
2550 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2551 codecvt2 = ERROR_MARK;
2552 else
2553 {
2554 multi_step_cvt++;
9771b263 2555 interm_types.safe_push (cvt_type);
4a00c761
JJ
2556 cvt_type = NULL_TREE;
2557 }
2558 break;
2559
2560 case NARROW:
2561 gcc_assert (op_type == unary_op);
2562 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2563 &code1, &multi_step_cvt,
2564 &interm_types))
2565 break;
2566
2567 if (code != FIX_TRUNC_EXPR
2568 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2569 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2570 goto unsupported;
2571
2572 rhs_mode = TYPE_MODE (rhs_type);
2573 cvt_type
2574 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2575 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2576 if (cvt_type == NULL_TREE)
2577 goto unsupported;
2578 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2579 &decl1, &codecvt1))
2580 goto unsupported;
2581 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2582 &code1, &multi_step_cvt,
2583 &interm_types))
2584 break;
2585 goto unsupported;
2586
2587 default:
2588 gcc_unreachable ();
ebfd146a
IR
2589 }
2590
2591 if (!vec_stmt) /* transformation not required. */
2592 {
73fbfcad 2593 if (dump_enabled_p ())
78c60e3d 2594 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2595 "=== vectorizable_conversion ===\n");
4a00c761 2596 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2597 {
2598 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2599 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2600 }
4a00c761
JJ
2601 else if (modifier == NARROW)
2602 {
2603 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2604 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2605 }
2606 else
2607 {
2608 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2609 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2610 }
9771b263 2611 interm_types.release ();
ebfd146a
IR
2612 return true;
2613 }
2614
2615 /** Transform. **/
73fbfcad 2616 if (dump_enabled_p ())
78c60e3d 2617 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2618 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 2619
4a00c761
JJ
2620 if (op_type == binary_op)
2621 {
2622 if (CONSTANT_CLASS_P (op0))
2623 op0 = fold_convert (TREE_TYPE (op1), op0);
2624 else if (CONSTANT_CLASS_P (op1))
2625 op1 = fold_convert (TREE_TYPE (op0), op1);
2626 }
2627
2628 /* In case of multi-step conversion, we first generate conversion operations
2629 to the intermediate types, and then from that types to the final one.
2630 We create vector destinations for the intermediate type (TYPES) received
2631 from supportable_*_operation, and store them in the correct order
2632 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2633 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2634 vec_dest = vect_create_destination_var (scalar_dest,
2635 (cvt_type && modifier == WIDEN)
2636 ? cvt_type : vectype_out);
9771b263 2637 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2638
2639 if (multi_step_cvt)
2640 {
9771b263
DN
2641 for (i = interm_types.length () - 1;
2642 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2643 {
2644 vec_dest = vect_create_destination_var (scalar_dest,
2645 intermediate_type);
9771b263 2646 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2647 }
2648 }
ebfd146a 2649
4a00c761 2650 if (cvt_type)
82294ec1
JJ
2651 vec_dest = vect_create_destination_var (scalar_dest,
2652 modifier == WIDEN
2653 ? vectype_out : cvt_type);
4a00c761
JJ
2654
2655 if (!slp_node)
2656 {
30862efc 2657 if (modifier == WIDEN)
4a00c761 2658 {
c3284718 2659 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 2660 if (op_type == binary_op)
9771b263 2661 vec_oprnds1.create (1);
4a00c761 2662 }
30862efc 2663 else if (modifier == NARROW)
9771b263
DN
2664 vec_oprnds0.create (
2665 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2666 }
2667 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2668 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2669
4a00c761 2670 last_oprnd = op0;
ebfd146a
IR
2671 prev_stmt_info = NULL;
2672 switch (modifier)
2673 {
2674 case NONE:
2675 for (j = 0; j < ncopies; j++)
2676 {
ebfd146a 2677 if (j == 0)
d092494c
IR
2678 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2679 -1);
ebfd146a
IR
2680 else
2681 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2682
9771b263 2683 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2684 {
2685 /* Arguments are ready, create the new vector stmt. */
2686 if (code1 == CALL_EXPR)
2687 {
2688 new_stmt = gimple_build_call (decl1, 1, vop0);
2689 new_temp = make_ssa_name (vec_dest, new_stmt);
2690 gimple_call_set_lhs (new_stmt, new_temp);
2691 }
2692 else
2693 {
2694 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2695 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2696 vop0, NULL);
2697 new_temp = make_ssa_name (vec_dest, new_stmt);
2698 gimple_assign_set_lhs (new_stmt, new_temp);
2699 }
2700
2701 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2702 if (slp_node)
9771b263 2703 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2704 }
2705
ebfd146a
IR
2706 if (j == 0)
2707 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2708 else
2709 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2710 prev_stmt_info = vinfo_for_stmt (new_stmt);
2711 }
2712 break;
2713
2714 case WIDEN:
2715 /* In case the vectorization factor (VF) is bigger than the number
2716 of elements that we can fit in a vectype (nunits), we have to
2717 generate more than one vector stmt - i.e - we need to "unroll"
2718 the vector stmt by a factor VF/nunits. */
2719 for (j = 0; j < ncopies; j++)
2720 {
4a00c761 2721 /* Handle uses. */
ebfd146a 2722 if (j == 0)
4a00c761
JJ
2723 {
2724 if (slp_node)
2725 {
2726 if (code == WIDEN_LSHIFT_EXPR)
2727 {
2728 unsigned int k;
ebfd146a 2729
4a00c761
JJ
2730 vec_oprnd1 = op1;
2731 /* Store vec_oprnd1 for every vector stmt to be created
2732 for SLP_NODE. We check during the analysis that all
2733 the shift arguments are the same. */
2734 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2735 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2736
2737 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2738 slp_node, -1);
2739 }
2740 else
2741 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2742 &vec_oprnds1, slp_node, -1);
2743 }
2744 else
2745 {
2746 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2747 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2748 if (op_type == binary_op)
2749 {
2750 if (code == WIDEN_LSHIFT_EXPR)
2751 vec_oprnd1 = op1;
2752 else
2753 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2754 NULL);
9771b263 2755 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2756 }
2757 }
2758 }
ebfd146a 2759 else
4a00c761
JJ
2760 {
2761 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2762 vec_oprnds0.truncate (0);
2763 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2764 if (op_type == binary_op)
2765 {
2766 if (code == WIDEN_LSHIFT_EXPR)
2767 vec_oprnd1 = op1;
2768 else
2769 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2770 vec_oprnd1);
9771b263
DN
2771 vec_oprnds1.truncate (0);
2772 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2773 }
2774 }
ebfd146a 2775
4a00c761
JJ
2776 /* Arguments are ready. Create the new vector stmts. */
2777 for (i = multi_step_cvt; i >= 0; i--)
2778 {
9771b263 2779 tree this_dest = vec_dsts[i];
4a00c761
JJ
2780 enum tree_code c1 = code1, c2 = code2;
2781 if (i == 0 && codecvt2 != ERROR_MARK)
2782 {
2783 c1 = codecvt1;
2784 c2 = codecvt2;
2785 }
2786 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2787 &vec_oprnds1,
2788 stmt, this_dest, gsi,
2789 c1, c2, decl1, decl2,
2790 op_type);
2791 }
2792
9771b263 2793 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2794 {
2795 if (cvt_type)
2796 {
2797 if (codecvt1 == CALL_EXPR)
2798 {
2799 new_stmt = gimple_build_call (decl1, 1, vop0);
2800 new_temp = make_ssa_name (vec_dest, new_stmt);
2801 gimple_call_set_lhs (new_stmt, new_temp);
2802 }
2803 else
2804 {
2805 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2806 new_temp = make_ssa_name (vec_dest, NULL);
2807 new_stmt = gimple_build_assign_with_ops (codecvt1,
2808 new_temp,
2809 vop0, NULL);
2810 }
2811
2812 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2813 }
2814 else
2815 new_stmt = SSA_NAME_DEF_STMT (vop0);
2816
2817 if (slp_node)
9771b263 2818 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2819 else
2820 {
2821 if (!prev_stmt_info)
2822 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2823 else
2824 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2825 prev_stmt_info = vinfo_for_stmt (new_stmt);
2826 }
2827 }
ebfd146a 2828 }
4a00c761
JJ
2829
2830 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2831 break;
2832
2833 case NARROW:
2834 /* In case the vectorization factor (VF) is bigger than the number
2835 of elements that we can fit in a vectype (nunits), we have to
2836 generate more than one vector stmt - i.e - we need to "unroll"
2837 the vector stmt by a factor VF/nunits. */
2838 for (j = 0; j < ncopies; j++)
2839 {
2840 /* Handle uses. */
4a00c761
JJ
2841 if (slp_node)
2842 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2843 slp_node, -1);
ebfd146a
IR
2844 else
2845 {
9771b263 2846 vec_oprnds0.truncate (0);
4a00c761
JJ
2847 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2848 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2849 }
2850
4a00c761
JJ
2851 /* Arguments are ready. Create the new vector stmts. */
2852 if (cvt_type)
9771b263 2853 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2854 {
2855 if (codecvt1 == CALL_EXPR)
2856 {
2857 new_stmt = gimple_build_call (decl1, 1, vop0);
2858 new_temp = make_ssa_name (vec_dest, new_stmt);
2859 gimple_call_set_lhs (new_stmt, new_temp);
2860 }
2861 else
2862 {
2863 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2864 new_temp = make_ssa_name (vec_dest, NULL);
2865 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2866 vop0, NULL);
2867 }
ebfd146a 2868
4a00c761 2869 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2870 vec_oprnds0[i] = new_temp;
4a00c761 2871 }
ebfd146a 2872
4a00c761
JJ
2873 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2874 stmt, vec_dsts, gsi,
2875 slp_node, code1,
2876 &prev_stmt_info);
ebfd146a
IR
2877 }
2878
2879 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2880 break;
ebfd146a
IR
2881 }
2882
9771b263
DN
2883 vec_oprnds0.release ();
2884 vec_oprnds1.release ();
2885 vec_dsts.release ();
2886 interm_types.release ();
ebfd146a
IR
2887
2888 return true;
2889}
ff802fa1
IR
2890
2891
ebfd146a
IR
2892/* Function vectorizable_assignment.
2893
b8698a0f
L
2894 Check if STMT performs an assignment (copy) that can be vectorized.
2895 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2896 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2897 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2898
2899static bool
2900vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2901 gimple *vec_stmt, slp_tree slp_node)
2902{
2903 tree vec_dest;
2904 tree scalar_dest;
2905 tree op;
2906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2907 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2908 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2909 tree new_temp;
2910 tree def;
2911 gimple def_stmt;
2912 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2913 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2914 int ncopies;
f18b55bd 2915 int i, j;
6e1aa848 2916 vec<tree> vec_oprnds = vNULL;
ebfd146a 2917 tree vop;
a70d6342 2918 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2919 gimple new_stmt = NULL;
2920 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2921 enum tree_code code;
2922 tree vectype_in;
ebfd146a
IR
2923
2924 /* Multiple types in SLP are handled by creating the appropriate number of
2925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2926 case of SLP. */
437f4a00 2927 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2928 ncopies = 1;
2929 else
2930 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2931
2932 gcc_assert (ncopies >= 1);
ebfd146a 2933
a70d6342 2934 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2935 return false;
2936
8644a673 2937 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2938 return false;
2939
2940 /* Is vectorizable assignment? */
2941 if (!is_gimple_assign (stmt))
2942 return false;
2943
2944 scalar_dest = gimple_assign_lhs (stmt);
2945 if (TREE_CODE (scalar_dest) != SSA_NAME)
2946 return false;
2947
fde9c428 2948 code = gimple_assign_rhs_code (stmt);
ebfd146a 2949 if (gimple_assign_single_p (stmt)
fde9c428
RG
2950 || code == PAREN_EXPR
2951 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2952 op = gimple_assign_rhs1 (stmt);
2953 else
2954 return false;
2955
7b7ec6c5
RG
2956 if (code == VIEW_CONVERT_EXPR)
2957 op = TREE_OPERAND (op, 0);
2958
24ee1384 2959 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2960 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2961 {
73fbfcad 2962 if (dump_enabled_p ())
78c60e3d 2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2964 "use not simple.\n");
ebfd146a
IR
2965 return false;
2966 }
2967
fde9c428
RG
2968 /* We can handle NOP_EXPR conversions that do not change the number
2969 of elements or the vector size. */
7b7ec6c5
RG
2970 if ((CONVERT_EXPR_CODE_P (code)
2971 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2972 && (!vectype_in
2973 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2974 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2975 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2976 return false;
2977
7b7b1813
RG
2978 /* We do not handle bit-precision changes. */
2979 if ((CONVERT_EXPR_CODE_P (code)
2980 || code == VIEW_CONVERT_EXPR)
2981 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2982 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2983 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2984 || ((TYPE_PRECISION (TREE_TYPE (op))
2985 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2986 /* But a conversion that does not change the bit-pattern is ok. */
2987 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2988 > TYPE_PRECISION (TREE_TYPE (op)))
2989 && TYPE_UNSIGNED (TREE_TYPE (op))))
2990 {
73fbfcad 2991 if (dump_enabled_p ())
78c60e3d
SS
2992 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2993 "type conversion to/from bit-precision "
e645e942 2994 "unsupported.\n");
7b7b1813
RG
2995 return false;
2996 }
2997
ebfd146a
IR
2998 if (!vec_stmt) /* transformation not required. */
2999 {
3000 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3001 if (dump_enabled_p ())
78c60e3d 3002 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3003 "=== vectorizable_assignment ===\n");
c3e7ee41 3004 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3005 return true;
3006 }
3007
3008 /** Transform. **/
73fbfcad 3009 if (dump_enabled_p ())
e645e942 3010 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
3011
3012 /* Handle def. */
3013 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3014
3015 /* Handle use. */
f18b55bd 3016 for (j = 0; j < ncopies; j++)
ebfd146a 3017 {
f18b55bd
IR
3018 /* Handle uses. */
3019 if (j == 0)
d092494c 3020 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3021 else
3022 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3023
3024 /* Arguments are ready. create the new vector stmt. */
9771b263 3025 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3026 {
7b7ec6c5
RG
3027 if (CONVERT_EXPR_CODE_P (code)
3028 || code == VIEW_CONVERT_EXPR)
4a73490d 3029 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3030 new_stmt = gimple_build_assign (vec_dest, vop);
3031 new_temp = make_ssa_name (vec_dest, new_stmt);
3032 gimple_assign_set_lhs (new_stmt, new_temp);
3033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3034 if (slp_node)
9771b263 3035 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3036 }
ebfd146a
IR
3037
3038 if (slp_node)
f18b55bd
IR
3039 continue;
3040
3041 if (j == 0)
3042 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3043 else
3044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3045
3046 prev_stmt_info = vinfo_for_stmt (new_stmt);
3047 }
b8698a0f 3048
9771b263 3049 vec_oprnds.release ();
ebfd146a
IR
3050 return true;
3051}
3052
9dc3f7de 3053
1107f3ae
IR
3054/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3055 either as shift by a scalar or by a vector. */
3056
3057bool
3058vect_supportable_shift (enum tree_code code, tree scalar_type)
3059{
3060
3061 enum machine_mode vec_mode;
3062 optab optab;
3063 int icode;
3064 tree vectype;
3065
3066 vectype = get_vectype_for_scalar_type (scalar_type);
3067 if (!vectype)
3068 return false;
3069
3070 optab = optab_for_tree_code (code, vectype, optab_scalar);
3071 if (!optab
3072 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3073 {
3074 optab = optab_for_tree_code (code, vectype, optab_vector);
3075 if (!optab
3076 || (optab_handler (optab, TYPE_MODE (vectype))
3077 == CODE_FOR_nothing))
3078 return false;
3079 }
3080
3081 vec_mode = TYPE_MODE (vectype);
3082 icode = (int) optab_handler (optab, vec_mode);
3083 if (icode == CODE_FOR_nothing)
3084 return false;
3085
3086 return true;
3087}
3088
3089
9dc3f7de
IR
3090/* Function vectorizable_shift.
3091
3092 Check if STMT performs a shift operation that can be vectorized.
3093 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3094 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3096
3097static bool
3098vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3099 gimple *vec_stmt, slp_tree slp_node)
3100{
3101 tree vec_dest;
3102 tree scalar_dest;
3103 tree op0, op1 = NULL;
3104 tree vec_oprnd1 = NULL_TREE;
3105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3106 tree vectype;
3107 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3108 enum tree_code code;
3109 enum machine_mode vec_mode;
3110 tree new_temp;
3111 optab optab;
3112 int icode;
3113 enum machine_mode optab_op2_mode;
3114 tree def;
3115 gimple def_stmt;
3116 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3117 gimple new_stmt = NULL;
3118 stmt_vec_info prev_stmt_info;
3119 int nunits_in;
3120 int nunits_out;
3121 tree vectype_out;
cede2577 3122 tree op1_vectype;
9dc3f7de
IR
3123 int ncopies;
3124 int j, i;
6e1aa848
DN
3125 vec<tree> vec_oprnds0 = vNULL;
3126 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3127 tree vop0, vop1;
3128 unsigned int k;
49eab32e 3129 bool scalar_shift_arg = true;
9dc3f7de
IR
3130 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3131 int vf;
3132
3133 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3134 return false;
3135
3136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3137 return false;
3138
3139 /* Is STMT a vectorizable binary/unary operation? */
3140 if (!is_gimple_assign (stmt))
3141 return false;
3142
3143 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3144 return false;
3145
3146 code = gimple_assign_rhs_code (stmt);
3147
3148 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3149 || code == RROTATE_EXPR))
3150 return false;
3151
3152 scalar_dest = gimple_assign_lhs (stmt);
3153 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3154 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3155 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3156 {
73fbfcad 3157 if (dump_enabled_p ())
78c60e3d 3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3159 "bit-precision shifts not supported.\n");
7b7b1813
RG
3160 return false;
3161 }
9dc3f7de
IR
3162
3163 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3164 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3165 &def_stmt, &def, &dt[0], &vectype))
3166 {
73fbfcad 3167 if (dump_enabled_p ())
78c60e3d 3168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3169 "use not simple.\n");
9dc3f7de
IR
3170 return false;
3171 }
3172 /* If op0 is an external or constant def use a vector type with
3173 the same size as the output vector type. */
3174 if (!vectype)
3175 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3176 if (vec_stmt)
3177 gcc_assert (vectype);
3178 if (!vectype)
3179 {
73fbfcad 3180 if (dump_enabled_p ())
78c60e3d 3181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3182 "no vectype for scalar type\n");
9dc3f7de
IR
3183 return false;
3184 }
3185
3186 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3187 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3188 if (nunits_out != nunits_in)
3189 return false;
3190
3191 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3192 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3193 &def, &dt[1], &op1_vectype))
9dc3f7de 3194 {
73fbfcad 3195 if (dump_enabled_p ())
78c60e3d 3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3197 "use not simple.\n");
9dc3f7de
IR
3198 return false;
3199 }
3200
3201 if (loop_vinfo)
3202 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3203 else
3204 vf = 1;
3205
3206 /* Multiple types in SLP are handled by creating the appropriate number of
3207 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3208 case of SLP. */
437f4a00 3209 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3210 ncopies = 1;
3211 else
3212 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3213
3214 gcc_assert (ncopies >= 1);
3215
3216 /* Determine whether the shift amount is a vector, or scalar. If the
3217 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3218
49eab32e
JJ
3219 if (dt[1] == vect_internal_def && !slp_node)
3220 scalar_shift_arg = false;
3221 else if (dt[1] == vect_constant_def
3222 || dt[1] == vect_external_def
3223 || dt[1] == vect_internal_def)
3224 {
3225 /* In SLP, need to check whether the shift count is the same,
3226 in loops if it is a constant or invariant, it is always
3227 a scalar shift. */
3228 if (slp_node)
3229 {
9771b263 3230 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3231 gimple slpstmt;
3232
9771b263 3233 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3234 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3235 scalar_shift_arg = false;
3236 }
3237 }
3238 else
3239 {
73fbfcad 3240 if (dump_enabled_p ())
78c60e3d 3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3242 "operand mode requires invariant argument.\n");
49eab32e
JJ
3243 return false;
3244 }
3245
9dc3f7de 3246 /* Vector shifted by vector. */
49eab32e 3247 if (!scalar_shift_arg)
9dc3f7de
IR
3248 {
3249 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3250 if (dump_enabled_p ())
78c60e3d 3251 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3252 "vector/vector shift/rotate found.\n");
78c60e3d 3253
aa948027
JJ
3254 if (!op1_vectype)
3255 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3256 if (op1_vectype == NULL_TREE
3257 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3258 {
73fbfcad 3259 if (dump_enabled_p ())
78c60e3d
SS
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3261 "unusable type for last operand in"
e645e942 3262 " vector/vector shift/rotate.\n");
cede2577
JJ
3263 return false;
3264 }
9dc3f7de
IR
3265 }
3266 /* See if the machine has a vector shifted by scalar insn and if not
3267 then see if it has a vector shifted by vector insn. */
49eab32e 3268 else
9dc3f7de
IR
3269 {
3270 optab = optab_for_tree_code (code, vectype, optab_scalar);
3271 if (optab
3272 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3273 {
73fbfcad 3274 if (dump_enabled_p ())
78c60e3d 3275 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3276 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
3277 }
3278 else
3279 {
3280 optab = optab_for_tree_code (code, vectype, optab_vector);
3281 if (optab
3282 && (optab_handler (optab, TYPE_MODE (vectype))
3283 != CODE_FOR_nothing))
3284 {
49eab32e
JJ
3285 scalar_shift_arg = false;
3286
73fbfcad 3287 if (dump_enabled_p ())
78c60e3d 3288 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3289 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
3290
3291 /* Unlike the other binary operators, shifts/rotates have
3292 the rhs being int, instead of the same type as the lhs,
3293 so make sure the scalar is the right type if we are
aa948027 3294 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3295 if (dt[1] == vect_constant_def)
3296 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3297 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3298 TREE_TYPE (op1)))
3299 {
3300 if (slp_node
3301 && TYPE_MODE (TREE_TYPE (vectype))
3302 != TYPE_MODE (TREE_TYPE (op1)))
3303 {
73fbfcad 3304 if (dump_enabled_p ())
78c60e3d
SS
3305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3306 "unusable type for last operand in"
e645e942 3307 " vector/vector shift/rotate.\n");
aa948027
JJ
3308 return false;
3309 }
3310 if (vec_stmt && !slp_node)
3311 {
3312 op1 = fold_convert (TREE_TYPE (vectype), op1);
3313 op1 = vect_init_vector (stmt, op1,
3314 TREE_TYPE (vectype), NULL);
3315 }
3316 }
9dc3f7de
IR
3317 }
3318 }
3319 }
9dc3f7de
IR
3320
3321 /* Supportable by target? */
3322 if (!optab)
3323 {
73fbfcad 3324 if (dump_enabled_p ())
78c60e3d 3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3326 "no optab.\n");
9dc3f7de
IR
3327 return false;
3328 }
3329 vec_mode = TYPE_MODE (vectype);
3330 icode = (int) optab_handler (optab, vec_mode);
3331 if (icode == CODE_FOR_nothing)
3332 {
73fbfcad 3333 if (dump_enabled_p ())
78c60e3d 3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3335 "op not supported by target.\n");
9dc3f7de
IR
3336 /* Check only during analysis. */
3337 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3338 || (vf < vect_min_worthwhile_factor (code)
3339 && !vec_stmt))
3340 return false;
73fbfcad 3341 if (dump_enabled_p ())
e645e942
TJ
3342 dump_printf_loc (MSG_NOTE, vect_location,
3343 "proceeding using word mode.\n");
9dc3f7de
IR
3344 }
3345
3346 /* Worthwhile without SIMD support? Check only during analysis. */
3347 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3348 && vf < vect_min_worthwhile_factor (code)
3349 && !vec_stmt)
3350 {
73fbfcad 3351 if (dump_enabled_p ())
78c60e3d 3352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3353 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
3354 return false;
3355 }
3356
3357 if (!vec_stmt) /* transformation not required. */
3358 {
3359 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3360 if (dump_enabled_p ())
e645e942
TJ
3361 dump_printf_loc (MSG_NOTE, vect_location,
3362 "=== vectorizable_shift ===\n");
c3e7ee41 3363 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3364 return true;
3365 }
3366
3367 /** Transform. **/
3368
73fbfcad 3369 if (dump_enabled_p ())
78c60e3d 3370 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3371 "transform binary/unary operation.\n");
9dc3f7de
IR
3372
3373 /* Handle def. */
3374 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3375
9dc3f7de
IR
3376 prev_stmt_info = NULL;
3377 for (j = 0; j < ncopies; j++)
3378 {
3379 /* Handle uses. */
3380 if (j == 0)
3381 {
3382 if (scalar_shift_arg)
3383 {
3384 /* Vector shl and shr insn patterns can be defined with scalar
3385 operand 2 (shift operand). In this case, use constant or loop
3386 invariant op1 directly, without extending it to vector mode
3387 first. */
3388 optab_op2_mode = insn_data[icode].operand[2].mode;
3389 if (!VECTOR_MODE_P (optab_op2_mode))
3390 {
73fbfcad 3391 if (dump_enabled_p ())
78c60e3d 3392 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3393 "operand 1 using scalar mode.\n");
9dc3f7de 3394 vec_oprnd1 = op1;
8930f723 3395 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3396 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3397 if (slp_node)
3398 {
3399 /* Store vec_oprnd1 for every vector stmt to be created
3400 for SLP_NODE. We check during the analysis that all
3401 the shift arguments are the same.
3402 TODO: Allow different constants for different vector
3403 stmts generated for an SLP instance. */
3404 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3405 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3406 }
3407 }
3408 }
3409
3410 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3411 (a special case for certain kind of vector shifts); otherwise,
3412 operand 1 should be of a vector type (the usual case). */
3413 if (vec_oprnd1)
3414 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3415 slp_node, -1);
9dc3f7de
IR
3416 else
3417 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3418 slp_node, -1);
9dc3f7de
IR
3419 }
3420 else
3421 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3422
3423 /* Arguments are ready. Create the new vector stmt. */
9771b263 3424 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3425 {
9771b263 3426 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3427 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3428 new_temp = make_ssa_name (vec_dest, new_stmt);
3429 gimple_assign_set_lhs (new_stmt, new_temp);
3430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3431 if (slp_node)
9771b263 3432 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3433 }
3434
3435 if (slp_node)
3436 continue;
3437
3438 if (j == 0)
3439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3440 else
3441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3442 prev_stmt_info = vinfo_for_stmt (new_stmt);
3443 }
3444
9771b263
DN
3445 vec_oprnds0.release ();
3446 vec_oprnds1.release ();
9dc3f7de
IR
3447
3448 return true;
3449}
3450
3451
5deb57cb
JJ
3452static tree permute_vec_elements (tree, tree, tree, gimple,
3453 gimple_stmt_iterator *);
3454
3455
ebfd146a
IR
3456/* Function vectorizable_operation.
3457
16949072
RG
3458 Check if STMT performs a binary, unary or ternary operation that can
3459 be vectorized.
b8698a0f 3460 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3461 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3462 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3463
3464static bool
3465vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3466 gimple *vec_stmt, slp_tree slp_node)
3467{
00f07b86 3468 tree vec_dest;
ebfd146a 3469 tree scalar_dest;
16949072 3470 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3472 tree vectype;
ebfd146a
IR
3473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3474 enum tree_code code;
3475 enum machine_mode vec_mode;
3476 tree new_temp;
3477 int op_type;
00f07b86 3478 optab optab;
ebfd146a 3479 int icode;
ebfd146a
IR
3480 tree def;
3481 gimple def_stmt;
16949072
RG
3482 enum vect_def_type dt[3]
3483 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3484 gimple new_stmt = NULL;
3485 stmt_vec_info prev_stmt_info;
b690cc0f 3486 int nunits_in;
ebfd146a
IR
3487 int nunits_out;
3488 tree vectype_out;
3489 int ncopies;
3490 int j, i;
6e1aa848
DN
3491 vec<tree> vec_oprnds0 = vNULL;
3492 vec<tree> vec_oprnds1 = vNULL;
3493 vec<tree> vec_oprnds2 = vNULL;
16949072 3494 tree vop0, vop1, vop2;
a70d6342
IR
3495 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3496 int vf;
3497
a70d6342 3498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3499 return false;
3500
8644a673 3501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3502 return false;
3503
3504 /* Is STMT a vectorizable binary/unary operation? */
3505 if (!is_gimple_assign (stmt))
3506 return false;
3507
3508 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509 return false;
3510
ebfd146a
IR
3511 code = gimple_assign_rhs_code (stmt);
3512
3513 /* For pointer addition, we should use the normal plus for
3514 the vector addition. */
3515 if (code == POINTER_PLUS_EXPR)
3516 code = PLUS_EXPR;
3517
3518 /* Support only unary or binary operations. */
3519 op_type = TREE_CODE_LENGTH (code);
16949072 3520 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3521 {
73fbfcad 3522 if (dump_enabled_p ())
78c60e3d 3523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3524 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 3525 op_type);
ebfd146a
IR
3526 return false;
3527 }
3528
b690cc0f
RG
3529 scalar_dest = gimple_assign_lhs (stmt);
3530 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3531
7b7b1813
RG
3532 /* Most operations cannot handle bit-precision types without extra
3533 truncations. */
3534 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3535 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3536 /* Exception are bitwise binary operations. */
3537 && code != BIT_IOR_EXPR
3538 && code != BIT_XOR_EXPR
3539 && code != BIT_AND_EXPR)
3540 {
73fbfcad 3541 if (dump_enabled_p ())
78c60e3d 3542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3543 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
3544 return false;
3545 }
3546
ebfd146a 3547 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3548 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3549 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3550 {
73fbfcad 3551 if (dump_enabled_p ())
78c60e3d 3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3553 "use not simple.\n");
ebfd146a
IR
3554 return false;
3555 }
b690cc0f
RG
3556 /* If op0 is an external or constant def use a vector type with
3557 the same size as the output vector type. */
3558 if (!vectype)
3559 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3560 if (vec_stmt)
3561 gcc_assert (vectype);
3562 if (!vectype)
3563 {
73fbfcad 3564 if (dump_enabled_p ())
7d8930a0 3565 {
78c60e3d
SS
3566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3567 "no vectype for scalar type ");
3568 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3569 TREE_TYPE (op0));
e645e942 3570 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3571 }
3572
3573 return false;
3574 }
b690cc0f
RG
3575
3576 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3577 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3578 if (nunits_out != nunits_in)
3579 return false;
ebfd146a 3580
16949072 3581 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3582 {
3583 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3584 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3585 &def, &dt[1]))
ebfd146a 3586 {
73fbfcad 3587 if (dump_enabled_p ())
78c60e3d 3588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3589 "use not simple.\n");
ebfd146a
IR
3590 return false;
3591 }
3592 }
16949072
RG
3593 if (op_type == ternary_op)
3594 {
3595 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3596 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3597 &def, &dt[2]))
16949072 3598 {
73fbfcad 3599 if (dump_enabled_p ())
78c60e3d 3600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3601 "use not simple.\n");
16949072
RG
3602 return false;
3603 }
3604 }
ebfd146a 3605
b690cc0f
RG
3606 if (loop_vinfo)
3607 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3608 else
3609 vf = 1;
3610
3611 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3613 case of SLP. */
437f4a00 3614 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3615 ncopies = 1;
3616 else
3617 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3618
3619 gcc_assert (ncopies >= 1);
3620
9dc3f7de 3621 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3622 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3623 || code == RROTATE_EXPR)
9dc3f7de 3624 return false;
ebfd146a 3625
ebfd146a 3626 /* Supportable by target? */
00f07b86
RH
3627
3628 vec_mode = TYPE_MODE (vectype);
3629 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3630 {
00f07b86 3631 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3632 icode = LAST_INSN_CODE;
00f07b86
RH
3633 else
3634 icode = CODE_FOR_nothing;
ebfd146a 3635 }
00f07b86
RH
3636 else
3637 {
3638 optab = optab_for_tree_code (code, vectype, optab_default);
3639 if (!optab)
5deb57cb 3640 {
73fbfcad 3641 if (dump_enabled_p ())
78c60e3d 3642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3643 "no optab.\n");
00f07b86 3644 return false;
5deb57cb 3645 }
00f07b86 3646 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3647 }
3648
ebfd146a
IR
3649 if (icode == CODE_FOR_nothing)
3650 {
73fbfcad 3651 if (dump_enabled_p ())
78c60e3d 3652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3653 "op not supported by target.\n");
ebfd146a
IR
3654 /* Check only during analysis. */
3655 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3656 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3657 return false;
73fbfcad 3658 if (dump_enabled_p ())
e645e942
TJ
3659 dump_printf_loc (MSG_NOTE, vect_location,
3660 "proceeding using word mode.\n");
383d9c83
IR
3661 }
3662
4a00c761 3663 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3664 if (!VECTOR_MODE_P (vec_mode)
3665 && !vec_stmt
3666 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3667 {
73fbfcad 3668 if (dump_enabled_p ())
78c60e3d 3669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3670 "not worthwhile without SIMD support.\n");
e34842c6 3671 return false;
7d8930a0 3672 }
ebfd146a 3673
ebfd146a
IR
3674 if (!vec_stmt) /* transformation not required. */
3675 {
4a00c761 3676 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3677 if (dump_enabled_p ())
78c60e3d 3678 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3679 "=== vectorizable_operation ===\n");
c3e7ee41 3680 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3681 return true;
3682 }
3683
3684 /** Transform. **/
3685
73fbfcad 3686 if (dump_enabled_p ())
78c60e3d 3687 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3688 "transform binary/unary operation.\n");
383d9c83 3689
ebfd146a 3690 /* Handle def. */
00f07b86 3691 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3692
ebfd146a
IR
3693 /* In case the vectorization factor (VF) is bigger than the number
3694 of elements that we can fit in a vectype (nunits), we have to generate
3695 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3696 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3697 from one copy of the vector stmt to the next, in the field
3698 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3699 stages to find the correct vector defs to be used when vectorizing
3700 stmts that use the defs of the current stmt. The example below
3701 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3702 we need to create 4 vectorized stmts):
3703
3704 before vectorization:
3705 RELATED_STMT VEC_STMT
3706 S1: x = memref - -
3707 S2: z = x + 1 - -
3708
3709 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3710 there):
3711 RELATED_STMT VEC_STMT
3712 VS1_0: vx0 = memref0 VS1_1 -
3713 VS1_1: vx1 = memref1 VS1_2 -
3714 VS1_2: vx2 = memref2 VS1_3 -
3715 VS1_3: vx3 = memref3 - -
3716 S1: x = load - VS1_0
3717 S2: z = x + 1 - -
3718
3719 step2: vectorize stmt S2 (done here):
3720 To vectorize stmt S2 we first need to find the relevant vector
3721 def for the first operand 'x'. This is, as usual, obtained from
3722 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3723 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3724 relevant vector def 'vx0'. Having found 'vx0' we can generate
3725 the vector stmt VS2_0, and as usual, record it in the
3726 STMT_VINFO_VEC_STMT of stmt S2.
3727 When creating the second copy (VS2_1), we obtain the relevant vector
3728 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3729 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3730 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3731 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3732 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3733 chain of stmts and pointers:
3734 RELATED_STMT VEC_STMT
3735 VS1_0: vx0 = memref0 VS1_1 -
3736 VS1_1: vx1 = memref1 VS1_2 -
3737 VS1_2: vx2 = memref2 VS1_3 -
3738 VS1_3: vx3 = memref3 - -
3739 S1: x = load - VS1_0
3740 VS2_0: vz0 = vx0 + v1 VS2_1 -
3741 VS2_1: vz1 = vx1 + v1 VS2_2 -
3742 VS2_2: vz2 = vx2 + v1 VS2_3 -
3743 VS2_3: vz3 = vx3 + v1 - -
3744 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3745
3746 prev_stmt_info = NULL;
3747 for (j = 0; j < ncopies; j++)
3748 {
3749 /* Handle uses. */
3750 if (j == 0)
4a00c761
JJ
3751 {
3752 if (op_type == binary_op || op_type == ternary_op)
3753 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3754 slp_node, -1);
3755 else
3756 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3757 slp_node, -1);
3758 if (op_type == ternary_op)
36ba4aae 3759 {
9771b263
DN
3760 vec_oprnds2.create (1);
3761 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3762 stmt,
3763 NULL));
36ba4aae 3764 }
4a00c761 3765 }
ebfd146a 3766 else
4a00c761
JJ
3767 {
3768 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3769 if (op_type == ternary_op)
3770 {
9771b263
DN
3771 tree vec_oprnd = vec_oprnds2.pop ();
3772 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3773 vec_oprnd));
4a00c761
JJ
3774 }
3775 }
3776
3777 /* Arguments are ready. Create the new vector stmt. */
9771b263 3778 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3779 {
4a00c761 3780 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3781 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3782 vop2 = ((op_type == ternary_op)
9771b263 3783 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3784 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3785 vop0, vop1, vop2);
4a00c761
JJ
3786 new_temp = make_ssa_name (vec_dest, new_stmt);
3787 gimple_assign_set_lhs (new_stmt, new_temp);
3788 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3789 if (slp_node)
9771b263 3790 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3791 }
3792
4a00c761
JJ
3793 if (slp_node)
3794 continue;
3795
3796 if (j == 0)
3797 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3798 else
3799 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3800 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3801 }
3802
9771b263
DN
3803 vec_oprnds0.release ();
3804 vec_oprnds1.release ();
3805 vec_oprnds2.release ();
ebfd146a 3806
ebfd146a
IR
3807 return true;
3808}
3809
c716e67f
XDL
3810/* A helper function to ensure data reference DR's base alignment
3811 for STMT_INFO. */
3812
3813static void
3814ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3815{
3816 if (!dr->aux)
3817 return;
3818
3819 if (((dataref_aux *)dr->aux)->base_misaligned)
3820 {
3821 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3822 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3823
3824 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3825 DECL_USER_ALIGN (base_decl) = 1;
3826 ((dataref_aux *)dr->aux)->base_misaligned = false;
3827 }
3828}
3829
ebfd146a
IR
3830
3831/* Function vectorizable_store.
3832
b8698a0f
L
3833 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3834 can be vectorized.
3835 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3836 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3837 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3838
3839static bool
3840vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3841 slp_tree slp_node)
ebfd146a
IR
3842{
3843 tree scalar_dest;
3844 tree data_ref;
3845 tree op;
3846 tree vec_oprnd = NULL_TREE;
3847 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3848 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3849 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3850 tree elem_type;
ebfd146a 3851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3852 struct loop *loop = NULL;
ebfd146a
IR
3853 enum machine_mode vec_mode;
3854 tree dummy;
3855 enum dr_alignment_support alignment_support_scheme;
3856 tree def;
3857 gimple def_stmt;
3858 enum vect_def_type dt;
3859 stmt_vec_info prev_stmt_info = NULL;
3860 tree dataref_ptr = NULL_TREE;
74bf76ed 3861 tree dataref_offset = NULL_TREE;
fef4d2b3 3862 gimple ptr_incr = NULL;
ebfd146a
IR
3863 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3864 int ncopies;
3865 int j;
3866 gimple next_stmt, first_stmt = NULL;
0d0293ac 3867 bool grouped_store = false;
272c6793 3868 bool store_lanes_p = false;
ebfd146a 3869 unsigned int group_size, i;
6e1aa848
DN
3870 vec<tree> dr_chain = vNULL;
3871 vec<tree> oprnds = vNULL;
3872 vec<tree> result_chain = vNULL;
ebfd146a 3873 bool inv_p;
6e1aa848 3874 vec<tree> vec_oprnds = vNULL;
ebfd146a 3875 bool slp = (slp_node != NULL);
ebfd146a 3876 unsigned int vec_num;
a70d6342 3877 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3878 tree aggr_type;
a70d6342
IR
3879
3880 if (loop_vinfo)
3881 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3882
3883 /* Multiple types in SLP are handled by creating the appropriate number of
3884 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3885 case of SLP. */
437f4a00 3886 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3887 ncopies = 1;
3888 else
3889 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3890
3891 gcc_assert (ncopies >= 1);
3892
3893 /* FORNOW. This restriction should be relaxed. */
a70d6342 3894 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3895 {
73fbfcad 3896 if (dump_enabled_p ())
78c60e3d 3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3898 "multiple types in nested loop.\n");
ebfd146a
IR
3899 return false;
3900 }
3901
a70d6342 3902 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3903 return false;
3904
8644a673 3905 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3906 return false;
3907
3908 /* Is vectorizable store? */
3909
3910 if (!is_gimple_assign (stmt))
3911 return false;
3912
3913 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3914 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3915 && is_pattern_stmt_p (stmt_info))
3916 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3917 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3918 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3919 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3920 && TREE_CODE (scalar_dest) != COMPONENT_REF
3921 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3922 && TREE_CODE (scalar_dest) != REALPART_EXPR
3923 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3924 return false;
3925
3926 gcc_assert (gimple_assign_single_p (stmt));
3927 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3928 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3929 &def, &dt))
ebfd146a 3930 {
73fbfcad 3931 if (dump_enabled_p ())
78c60e3d 3932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3933 "use not simple.\n");
ebfd146a
IR
3934 return false;
3935 }
3936
272c6793 3937 elem_type = TREE_TYPE (vectype);
ebfd146a 3938 vec_mode = TYPE_MODE (vectype);
7b7b1813 3939
ebfd146a
IR
3940 /* FORNOW. In some cases can vectorize even if data-type not supported
3941 (e.g. - array initialization with 0). */
947131ba 3942 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3943 return false;
3944
3945 if (!STMT_VINFO_DATA_REF (stmt_info))
3946 return false;
3947
a7ce6ec3
RG
3948 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3949 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3950 size_zero_node) < 0)
a1e53f3f 3951 {
73fbfcad 3952 if (dump_enabled_p ())
78c60e3d 3953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3954 "negative step for store.\n");
a1e53f3f
L
3955 return false;
3956 }
3957
0d0293ac 3958 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3959 {
0d0293ac 3960 grouped_store = true;
e14c1050 3961 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3962 if (!slp && !PURE_SLP_STMT (stmt_info))
3963 {
e14c1050 3964 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3965 if (vect_store_lanes_supported (vectype, group_size))
3966 store_lanes_p = true;
0d0293ac 3967 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3968 return false;
3969 }
b8698a0f 3970
ebfd146a
IR
3971 if (first_stmt == stmt)
3972 {
3973 /* STMT is the leader of the group. Check the operands of all the
3974 stmts of the group. */
e14c1050 3975 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3976 while (next_stmt)
3977 {
3978 gcc_assert (gimple_assign_single_p (next_stmt));
3979 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3980 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3981 &def_stmt, &def, &dt))
ebfd146a 3982 {
73fbfcad 3983 if (dump_enabled_p ())
78c60e3d 3984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3985 "use not simple.\n");
ebfd146a
IR
3986 return false;
3987 }
e14c1050 3988 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3989 }
3990 }
3991 }
3992
3993 if (!vec_stmt) /* transformation not required. */
3994 {
3995 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
3996 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3997 NULL, NULL, NULL);
ebfd146a
IR
3998 return true;
3999 }
4000
4001 /** Transform. **/
4002
c716e67f
XDL
4003 ensure_base_align (stmt_info, dr);
4004
0d0293ac 4005 if (grouped_store)
ebfd146a
IR
4006 {
4007 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4008 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4009
e14c1050 4010 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4011
4012 /* FORNOW */
a70d6342 4013 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4014
4015 /* We vectorize all the stmts of the interleaving group when we
4016 reach the last stmt in the group. */
e14c1050
IR
4017 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4018 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4019 && !slp)
4020 {
4021 *vec_stmt = NULL;
4022 return true;
4023 }
4024
4025 if (slp)
4b5caab7 4026 {
0d0293ac 4027 grouped_store = false;
4b5caab7
IR
4028 /* VEC_NUM is the number of vect stmts to be created for this
4029 group. */
4030 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4031 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4032 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4033 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4034 }
ebfd146a 4035 else
4b5caab7
IR
4036 /* VEC_NUM is the number of vect stmts to be created for this
4037 group. */
ebfd146a
IR
4038 vec_num = group_size;
4039 }
b8698a0f 4040 else
ebfd146a
IR
4041 {
4042 first_stmt = stmt;
4043 first_dr = dr;
4044 group_size = vec_num = 1;
ebfd146a 4045 }
b8698a0f 4046
73fbfcad 4047 if (dump_enabled_p ())
78c60e3d 4048 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4049 "transform store. ncopies = %d\n", ncopies);
ebfd146a 4050
9771b263
DN
4051 dr_chain.create (group_size);
4052 oprnds.create (group_size);
ebfd146a 4053
720f5239 4054 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4055 gcc_assert (alignment_support_scheme);
272c6793
RS
4056 /* Targets with store-lane instructions must not require explicit
4057 realignment. */
4058 gcc_assert (!store_lanes_p
4059 || alignment_support_scheme == dr_aligned
4060 || alignment_support_scheme == dr_unaligned_supported);
4061
4062 if (store_lanes_p)
4063 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4064 else
4065 aggr_type = vectype;
ebfd146a
IR
4066
4067 /* In case the vectorization factor (VF) is bigger than the number
4068 of elements that we can fit in a vectype (nunits), we have to generate
4069 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4070 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4071 vect_get_vec_def_for_copy_stmt. */
4072
0d0293ac 4073 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4074
4075 S1: &base + 2 = x2
4076 S2: &base = x0
4077 S3: &base + 1 = x1
4078 S4: &base + 3 = x3
4079
4080 We create vectorized stores starting from base address (the access of the
4081 first stmt in the chain (S2 in the above example), when the last store stmt
4082 of the chain (S4) is reached:
4083
4084 VS1: &base = vx2
4085 VS2: &base + vec_size*1 = vx0
4086 VS3: &base + vec_size*2 = vx1
4087 VS4: &base + vec_size*3 = vx3
4088
4089 Then permutation statements are generated:
4090
3fcc1b55
JJ
4091 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4092 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4093 ...
b8698a0f 4094
ebfd146a
IR
4095 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4096 (the order of the data-refs in the output of vect_permute_store_chain
4097 corresponds to the order of scalar stmts in the interleaving chain - see
4098 the documentation of vect_permute_store_chain()).
4099
4100 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4101 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4102 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4103 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4104 */
4105
4106 prev_stmt_info = NULL;
4107 for (j = 0; j < ncopies; j++)
4108 {
4109 gimple new_stmt;
ebfd146a
IR
4110
4111 if (j == 0)
4112 {
4113 if (slp)
4114 {
4115 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4116 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4117 NULL, slp_node, -1);
ebfd146a 4118
9771b263 4119 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4120 }
4121 else
4122 {
b8698a0f
L
4123 /* For interleaved stores we collect vectorized defs for all the
4124 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4125 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4126 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4127
0d0293ac 4128 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4129 OPRNDS are of size 1. */
b8698a0f 4130 next_stmt = first_stmt;
ebfd146a
IR
4131 for (i = 0; i < group_size; i++)
4132 {
b8698a0f
L
4133 /* Since gaps are not supported for interleaved stores,
4134 GROUP_SIZE is the exact number of stmts in the chain.
4135 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4136 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4137 iteration of the loop will be executed. */
4138 gcc_assert (next_stmt
4139 && gimple_assign_single_p (next_stmt));
4140 op = gimple_assign_rhs1 (next_stmt);
4141
b8698a0f 4142 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4143 NULL);
9771b263
DN
4144 dr_chain.quick_push (vec_oprnd);
4145 oprnds.quick_push (vec_oprnd);
e14c1050 4146 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4147 }
4148 }
4149
4150 /* We should have catched mismatched types earlier. */
4151 gcc_assert (useless_type_conversion_p (vectype,
4152 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4153 bool simd_lane_access_p
4154 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4155 if (simd_lane_access_p
4156 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4157 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4158 && integer_zerop (DR_OFFSET (first_dr))
4159 && integer_zerop (DR_INIT (first_dr))
4160 && alias_sets_conflict_p (get_alias_set (aggr_type),
4161 get_alias_set (DR_REF (first_dr))))
4162 {
4163 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4164 dataref_offset = build_int_cst (reference_alias_ptr_type
4165 (DR_REF (first_dr)), 0);
8928eff3 4166 inv_p = false;
74bf76ed
JJ
4167 }
4168 else
4169 dataref_ptr
4170 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4171 simd_lane_access_p ? loop : NULL,
4172 NULL_TREE, &dummy, gsi, &ptr_incr,
4173 simd_lane_access_p, &inv_p);
a70d6342 4174 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4175 }
b8698a0f 4176 else
ebfd146a 4177 {
b8698a0f
L
4178 /* For interleaved stores we created vectorized defs for all the
4179 defs stored in OPRNDS in the previous iteration (previous copy).
4180 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4181 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4182 next copy.
0d0293ac 4183 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4184 OPRNDS are of size 1. */
4185 for (i = 0; i < group_size; i++)
4186 {
9771b263 4187 op = oprnds[i];
24ee1384
IR
4188 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4189 &def, &dt);
b8698a0f 4190 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4191 dr_chain[i] = vec_oprnd;
4192 oprnds[i] = vec_oprnd;
ebfd146a 4193 }
74bf76ed
JJ
4194 if (dataref_offset)
4195 dataref_offset
4196 = int_const_binop (PLUS_EXPR, dataref_offset,
4197 TYPE_SIZE_UNIT (aggr_type));
4198 else
4199 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4200 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4201 }
4202
272c6793 4203 if (store_lanes_p)
ebfd146a 4204 {
272c6793 4205 tree vec_array;
267d3070 4206
272c6793
RS
4207 /* Combine all the vectors into an array. */
4208 vec_array = create_vector_array (vectype, vec_num);
4209 for (i = 0; i < vec_num; i++)
c2d7ab2a 4210 {
9771b263 4211 vec_oprnd = dr_chain[i];
272c6793 4212 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4213 }
b8698a0f 4214
272c6793
RS
4215 /* Emit:
4216 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4217 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4218 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4219 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4221 }
4222 else
4223 {
4224 new_stmt = NULL;
0d0293ac 4225 if (grouped_store)
272c6793 4226 {
b6b9227d
JJ
4227 if (j == 0)
4228 result_chain.create (group_size);
272c6793
RS
4229 /* Permute. */
4230 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4231 &result_chain);
4232 }
c2d7ab2a 4233
272c6793
RS
4234 next_stmt = first_stmt;
4235 for (i = 0; i < vec_num; i++)
4236 {
644ffefd 4237 unsigned align, misalign;
272c6793
RS
4238
4239 if (i > 0)
4240 /* Bump the vector pointer. */
4241 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4242 stmt, NULL_TREE);
4243
4244 if (slp)
9771b263 4245 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4246 else if (grouped_store)
4247 /* For grouped stores vectorized defs are interleaved in
272c6793 4248 vect_permute_store_chain(). */
9771b263 4249 vec_oprnd = result_chain[i];
272c6793
RS
4250
4251 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4252 dataref_offset
4253 ? dataref_offset
4254 : build_int_cst (reference_alias_ptr_type
4255 (DR_REF (first_dr)), 0));
644ffefd 4256 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4257 if (aligned_access_p (first_dr))
644ffefd 4258 misalign = 0;
272c6793
RS
4259 else if (DR_MISALIGNMENT (first_dr) == -1)
4260 {
4261 TREE_TYPE (data_ref)
4262 = build_aligned_type (TREE_TYPE (data_ref),
4263 TYPE_ALIGN (elem_type));
644ffefd
MJ
4264 align = TYPE_ALIGN_UNIT (elem_type);
4265 misalign = 0;
272c6793
RS
4266 }
4267 else
4268 {
4269 TREE_TYPE (data_ref)
4270 = build_aligned_type (TREE_TYPE (data_ref),
4271 TYPE_ALIGN (elem_type));
644ffefd 4272 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4273 }
74bf76ed
JJ
4274 if (dataref_offset == NULL_TREE)
4275 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4276 misalign);
c2d7ab2a 4277
272c6793
RS
4278 /* Arguments are ready. Create the new vector stmt. */
4279 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4281
4282 if (slp)
4283 continue;
4284
e14c1050 4285 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4286 if (!next_stmt)
4287 break;
4288 }
ebfd146a 4289 }
1da0876c
RS
4290 if (!slp)
4291 {
4292 if (j == 0)
4293 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4294 else
4295 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4296 prev_stmt_info = vinfo_for_stmt (new_stmt);
4297 }
ebfd146a
IR
4298 }
4299
9771b263
DN
4300 dr_chain.release ();
4301 oprnds.release ();
4302 result_chain.release ();
4303 vec_oprnds.release ();
ebfd146a
IR
4304
4305 return true;
4306}
4307
aec7ae7d
JJ
4308/* Given a vector type VECTYPE and permutation SEL returns
4309 the VECTOR_CST mask that implements the permutation of the
4310 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4311
3fcc1b55
JJ
4312tree
4313vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4314{
d2a12ae7 4315 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4316 int i, nunits;
a1e53f3f 4317
22e4dee7 4318 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4319
4320 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4321 return NULL;
4322
96f9265a
RG
4323 mask_elt_type = lang_hooks.types.type_for_mode
4324 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4325 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4326
d2a12ae7 4327 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4328 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4329 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4330 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4331
2635892a 4332 return mask_vec;
a1e53f3f
L
4333}
4334
aec7ae7d
JJ
4335/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4336 reversal of the vector elements. If that is impossible to do,
4337 returns NULL. */
4338
4339static tree
4340perm_mask_for_reverse (tree vectype)
4341{
4342 int i, nunits;
4343 unsigned char *sel;
4344
4345 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4346 sel = XALLOCAVEC (unsigned char, nunits);
4347
4348 for (i = 0; i < nunits; ++i)
4349 sel[i] = nunits - 1 - i;
4350
3fcc1b55 4351 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4352}
4353
4354/* Given a vector variable X and Y, that was generated for the scalar
4355 STMT, generate instructions to permute the vector elements of X and Y
4356 using permutation mask MASK_VEC, insert them at *GSI and return the
4357 permuted vector variable. */
a1e53f3f
L
4358
4359static tree
aec7ae7d
JJ
4360permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4361 gimple_stmt_iterator *gsi)
a1e53f3f
L
4362{
4363 tree vectype = TREE_TYPE (x);
aec7ae7d 4364 tree perm_dest, data_ref;
a1e53f3f
L
4365 gimple perm_stmt;
4366
a1e53f3f 4367 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4368 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4369
4370 /* Generate the permute statement. */
73804b12
RG
4371 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4372 x, y, mask_vec);
a1e53f3f
L
4373 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4374
4375 return data_ref;
4376}
4377
ebfd146a
IR
4378/* vectorizable_load.
4379
b8698a0f
L
4380 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4381 can be vectorized.
4382 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4383 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4384 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4385
4386static bool
4387vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4388 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4389{
4390 tree scalar_dest;
4391 tree vec_dest = NULL;
4392 tree data_ref = NULL;
4393 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4394 stmt_vec_info prev_stmt_info;
ebfd146a 4395 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4396 struct loop *loop = NULL;
ebfd146a 4397 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4398 bool nested_in_vect_loop = false;
c716e67f 4399 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4400 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4401 tree elem_type;
ebfd146a 4402 tree new_temp;
947131ba 4403 enum machine_mode mode;
ebfd146a
IR
4404 gimple new_stmt = NULL;
4405 tree dummy;
4406 enum dr_alignment_support alignment_support_scheme;
4407 tree dataref_ptr = NULL_TREE;
74bf76ed 4408 tree dataref_offset = NULL_TREE;
fef4d2b3 4409 gimple ptr_incr = NULL;
ebfd146a
IR
4410 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4411 int ncopies;
a64b9c26 4412 int i, j, group_size, group_gap;
ebfd146a
IR
4413 tree msq = NULL_TREE, lsq;
4414 tree offset = NULL_TREE;
4415 tree realignment_token = NULL_TREE;
4416 gimple phi = NULL;
6e1aa848 4417 vec<tree> dr_chain = vNULL;
0d0293ac 4418 bool grouped_load = false;
272c6793 4419 bool load_lanes_p = false;
ebfd146a 4420 gimple first_stmt;
ebfd146a 4421 bool inv_p;
319e6439 4422 bool negative = false;
ebfd146a
IR
4423 bool compute_in_loop = false;
4424 struct loop *at_loop;
4425 int vec_num;
4426 bool slp = (slp_node != NULL);
4427 bool slp_perm = false;
4428 enum tree_code code;
a70d6342
IR
4429 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4430 int vf;
272c6793 4431 tree aggr_type;
aec7ae7d
JJ
4432 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4433 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4434 int gather_scale = 1;
4435 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4436
4437 if (loop_vinfo)
4438 {
4439 loop = LOOP_VINFO_LOOP (loop_vinfo);
4440 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4441 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4442 }
4443 else
3533e503 4444 vf = 1;
ebfd146a
IR
4445
4446 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4447 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4448 case of SLP. */
437f4a00 4449 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4450 ncopies = 1;
4451 else
4452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4453
4454 gcc_assert (ncopies >= 1);
4455
4456 /* FORNOW. This restriction should be relaxed. */
4457 if (nested_in_vect_loop && ncopies > 1)
4458 {
73fbfcad 4459 if (dump_enabled_p ())
78c60e3d 4460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4461 "multiple types in nested loop.\n");
ebfd146a
IR
4462 return false;
4463 }
4464
a70d6342 4465 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4466 return false;
4467
8644a673 4468 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4469 return false;
4470
4471 /* Is vectorizable load? */
4472 if (!is_gimple_assign (stmt))
4473 return false;
4474
4475 scalar_dest = gimple_assign_lhs (stmt);
4476 if (TREE_CODE (scalar_dest) != SSA_NAME)
4477 return false;
4478
4479 code = gimple_assign_rhs_code (stmt);
4480 if (code != ARRAY_REF
38000232 4481 && code != BIT_FIELD_REF
ebfd146a 4482 && code != INDIRECT_REF
e9dbe7bb
IR
4483 && code != COMPONENT_REF
4484 && code != IMAGPART_EXPR
70f34814 4485 && code != REALPART_EXPR
42373e0b
RG
4486 && code != MEM_REF
4487 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4488 return false;
4489
4490 if (!STMT_VINFO_DATA_REF (stmt_info))
4491 return false;
4492
7b7b1813 4493 elem_type = TREE_TYPE (vectype);
947131ba 4494 mode = TYPE_MODE (vectype);
ebfd146a
IR
4495
4496 /* FORNOW. In some cases can vectorize even if data-type not supported
4497 (e.g. - data copies). */
947131ba 4498 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4499 {
73fbfcad 4500 if (dump_enabled_p ())
78c60e3d 4501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4502 "Aligned load, but unsupported type.\n");
ebfd146a
IR
4503 return false;
4504 }
4505
ebfd146a 4506 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4507 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4508 {
0d0293ac 4509 grouped_load = true;
ebfd146a 4510 /* FORNOW */
aec7ae7d 4511 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4512
e14c1050 4513 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4514 if (!slp && !PURE_SLP_STMT (stmt_info))
4515 {
e14c1050 4516 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4517 if (vect_load_lanes_supported (vectype, group_size))
4518 load_lanes_p = true;
0d0293ac 4519 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4520 return false;
4521 }
ebfd146a
IR
4522 }
4523
a1e53f3f 4524
aec7ae7d
JJ
4525 if (STMT_VINFO_GATHER_P (stmt_info))
4526 {
4527 gimple def_stmt;
4528 tree def;
4529 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4530 &gather_off, &gather_scale);
4531 gcc_assert (gather_decl);
24ee1384 4532 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4533 &def_stmt, &def, &gather_dt,
4534 &gather_off_vectype))
4535 {
73fbfcad 4536 if (dump_enabled_p ())
78c60e3d 4537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4538 "gather index use not simple.\n");
aec7ae7d
JJ
4539 return false;
4540 }
4541 }
7d75abc8 4542 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4543 ;
319e6439
RG
4544 else
4545 {
4546 negative = tree_int_cst_compare (nested_in_vect_loop
4547 ? STMT_VINFO_DR_STEP (stmt_info)
4548 : DR_STEP (dr),
4549 size_zero_node) < 0;
4550 if (negative && ncopies > 1)
4551 {
73fbfcad 4552 if (dump_enabled_p ())
78c60e3d 4553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4554 "multiple types with negative step.\n");
319e6439
RG
4555 return false;
4556 }
4557
4558 if (negative)
4559 {
08940f33
RB
4560 if (grouped_load)
4561 {
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4564 "negative step for group load not supported"
4565 "\n");
08940f33
RB
4566 return false;
4567 }
319e6439
RG
4568 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4569 if (alignment_support_scheme != dr_aligned
4570 && alignment_support_scheme != dr_unaligned_supported)
4571 {
73fbfcad 4572 if (dump_enabled_p ())
78c60e3d 4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4574 "negative step but alignment required.\n");
319e6439
RG
4575 return false;
4576 }
4577 if (!perm_mask_for_reverse (vectype))
4578 {
73fbfcad 4579 if (dump_enabled_p ())
78c60e3d 4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4581 "negative step and reversing not supported."
4582 "\n");
319e6439
RG
4583 return false;
4584 }
4585 }
7d75abc8 4586 }
aec7ae7d 4587
ebfd146a
IR
4588 if (!vec_stmt) /* transformation not required. */
4589 {
4590 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4591 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4592 return true;
4593 }
4594
73fbfcad 4595 if (dump_enabled_p ())
78c60e3d 4596 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4597 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
4598
4599 /** Transform. **/
4600
c716e67f
XDL
4601 ensure_base_align (stmt_info, dr);
4602
aec7ae7d
JJ
4603 if (STMT_VINFO_GATHER_P (stmt_info))
4604 {
4605 tree vec_oprnd0 = NULL_TREE, op;
4606 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4607 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4608 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4609 edge pe = loop_preheader_edge (loop);
4610 gimple_seq seq;
4611 basic_block new_bb;
4612 enum { NARROW, NONE, WIDEN } modifier;
4613 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4614
4615 if (nunits == gather_off_nunits)
4616 modifier = NONE;
4617 else if (nunits == gather_off_nunits / 2)
4618 {
4619 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4620 modifier = WIDEN;
4621
4622 for (i = 0; i < gather_off_nunits; ++i)
4623 sel[i] = i | nunits;
4624
3fcc1b55 4625 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4626 gcc_assert (perm_mask != NULL_TREE);
4627 }
4628 else if (nunits == gather_off_nunits * 2)
4629 {
4630 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4631 modifier = NARROW;
4632
4633 for (i = 0; i < nunits; ++i)
4634 sel[i] = i < gather_off_nunits
4635 ? i : i + nunits - gather_off_nunits;
4636
3fcc1b55 4637 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4638 gcc_assert (perm_mask != NULL_TREE);
4639 ncopies *= 2;
4640 }
4641 else
4642 gcc_unreachable ();
4643
4644 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4645 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4646 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4647 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4648 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4649 scaletype = TREE_VALUE (arglist);
4650 gcc_checking_assert (types_compatible_p (srctype, rettype)
4651 && types_compatible_p (srctype, masktype));
4652
4653 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4654
4655 ptr = fold_convert (ptrtype, gather_base);
4656 if (!is_gimple_min_invariant (ptr))
4657 {
4658 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4659 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4660 gcc_assert (!new_bb);
4661 }
4662
4663 /* Currently we support only unconditional gather loads,
4664 so mask should be all ones. */
4665 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4666 mask = build_int_cst (TREE_TYPE (masktype), -1);
4667 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4668 {
4669 REAL_VALUE_TYPE r;
4670 long tmp[6];
4671 for (j = 0; j < 6; ++j)
4672 tmp[j] = -1;
4673 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4674 mask = build_real (TREE_TYPE (masktype), r);
4675 }
4676 else
4677 gcc_unreachable ();
4678 mask = build_vector_from_val (masktype, mask);
4679 mask = vect_init_vector (stmt, mask, masktype, NULL);
4680
4681 scale = build_int_cst (scaletype, gather_scale);
4682
4683 prev_stmt_info = NULL;
4684 for (j = 0; j < ncopies; ++j)
4685 {
4686 if (modifier == WIDEN && (j & 1))
4687 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4688 perm_mask, stmt, gsi);
4689 else if (j == 0)
4690 op = vec_oprnd0
4691 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4692 else
4693 op = vec_oprnd0
4694 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4695
4696 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4697 {
4698 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4699 == TYPE_VECTOR_SUBPARTS (idxtype));
4700 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4701 var = make_ssa_name (var, NULL);
4702 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4703 new_stmt
4704 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4705 op, NULL_TREE);
4706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4707 op = var;
4708 }
4709
4710 new_stmt
4711 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4712
4713 if (!useless_type_conversion_p (vectype, rettype))
4714 {
4715 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4716 == TYPE_VECTOR_SUBPARTS (rettype));
4717 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4718 op = make_ssa_name (var, new_stmt);
4719 gimple_call_set_lhs (new_stmt, op);
4720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4721 var = make_ssa_name (vec_dest, NULL);
4722 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4723 new_stmt
4724 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4725 NULL_TREE);
4726 }
4727 else
4728 {
4729 var = make_ssa_name (vec_dest, new_stmt);
4730 gimple_call_set_lhs (new_stmt, var);
4731 }
4732
4733 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4734
4735 if (modifier == NARROW)
4736 {
4737 if ((j & 1) == 0)
4738 {
4739 prev_res = var;
4740 continue;
4741 }
4742 var = permute_vec_elements (prev_res, var,
4743 perm_mask, stmt, gsi);
4744 new_stmt = SSA_NAME_DEF_STMT (var);
4745 }
4746
4747 if (prev_stmt_info == NULL)
4748 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4749 else
4750 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4751 prev_stmt_info = vinfo_for_stmt (new_stmt);
4752 }
4753 return true;
4754 }
7d75abc8
MM
4755 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4756 {
4757 gimple_stmt_iterator incr_gsi;
4758 bool insert_after;
4759 gimple incr;
4760 tree offvar;
7d75abc8
MM
4761 tree ivstep;
4762 tree running_off;
9771b263 4763 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4764 gimple_seq stmts = NULL;
14ac6aa2
RB
4765 tree stride_base, stride_step, alias_off;
4766
4767 gcc_assert (!nested_in_vect_loop);
7d75abc8 4768
14ac6aa2
RB
4769 stride_base
4770 = fold_build_pointer_plus
4771 (unshare_expr (DR_BASE_ADDRESS (dr)),
4772 size_binop (PLUS_EXPR,
4773 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 4774 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 4775 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4776
4777 /* For a load with loop-invariant (but other than power-of-2)
4778 stride (i.e. not a grouped access) like so:
4779
4780 for (i = 0; i < n; i += stride)
4781 ... = array[i];
4782
4783 we generate a new induction variable and new accesses to
4784 form a new vector (or vectors, depending on ncopies):
4785
4786 for (j = 0; ; j += VF*stride)
4787 tmp1 = array[j];
4788 tmp2 = array[j + stride];
4789 ...
4790 vectemp = {tmp1, tmp2, ...}
4791 */
4792
4793 ivstep = stride_step;
4794 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4795 build_int_cst (TREE_TYPE (ivstep), vf));
4796
4797 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4798
4799 create_iv (stride_base, ivstep, NULL,
4800 loop, &incr_gsi, insert_after,
4801 &offvar, NULL);
4802 incr = gsi_stmt (incr_gsi);
4803 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4804
4805 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4806 if (stmts)
4807 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4808
4809 prev_stmt_info = NULL;
4810 running_off = offvar;
14ac6aa2 4811 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4812 for (j = 0; j < ncopies; j++)
4813 {
4814 tree vec_inv;
4815
9771b263 4816 vec_alloc (v, nunits);
7d75abc8
MM
4817 for (i = 0; i < nunits; i++)
4818 {
4819 tree newref, newoff;
4820 gimple incr;
14ac6aa2
RB
4821 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4822 running_off, alias_off);
7d75abc8
MM
4823
4824 newref = force_gimple_operand_gsi (gsi, newref, true,
4825 NULL_TREE, true,
4826 GSI_SAME_STMT);
4827 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4828 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4829 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4830 running_off, stride_step);
7d75abc8
MM
4831 vect_finish_stmt_generation (stmt, incr, gsi);
4832
4833 running_off = newoff;
4834 }
4835
4836 vec_inv = build_constructor (vectype, v);
4837 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4838 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4839
4840 if (j == 0)
4841 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4842 else
4843 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4844 prev_stmt_info = vinfo_for_stmt (new_stmt);
4845 }
4846 return true;
4847 }
aec7ae7d 4848
0d0293ac 4849 if (grouped_load)
ebfd146a 4850 {
e14c1050 4851 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4852 if (slp
01d8bf07 4853 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4854 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4855 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4856
ebfd146a 4857 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4858 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4859 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4860 ??? But we can only do so if there is exactly one
4861 as we have no way to get at the rest. Leave the CSE
4862 opportunity alone.
4863 ??? With the group load eventually participating
4864 in multiple different permutations (having multiple
4865 slp nodes which refer to the same group) the CSE
4866 is even wrong code. See PR56270. */
4867 && !slp)
ebfd146a
IR
4868 {
4869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4870 return true;
4871 }
4872 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4873 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4874
4875 /* VEC_NUM is the number of vect stmts to be created for this group. */
4876 if (slp)
4877 {
0d0293ac 4878 grouped_load = false;
ebfd146a 4879 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4880 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4881 slp_perm = true;
a64b9c26 4882 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4883 }
ebfd146a 4884 else
a64b9c26
RB
4885 {
4886 vec_num = group_size;
4887 group_gap = 0;
4888 }
ebfd146a
IR
4889 }
4890 else
4891 {
4892 first_stmt = stmt;
4893 first_dr = dr;
4894 group_size = vec_num = 1;
a64b9c26 4895 group_gap = 0;
ebfd146a
IR
4896 }
4897
720f5239 4898 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4899 gcc_assert (alignment_support_scheme);
272c6793
RS
4900 /* Targets with load-lane instructions must not require explicit
4901 realignment. */
4902 gcc_assert (!load_lanes_p
4903 || alignment_support_scheme == dr_aligned
4904 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4905
4906 /* In case the vectorization factor (VF) is bigger than the number
4907 of elements that we can fit in a vectype (nunits), we have to generate
4908 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4909 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4910 from one copy of the vector stmt to the next, in the field
ff802fa1 4911 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4912 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4913 stmts that use the defs of the current stmt. The example below
4914 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4915 need to create 4 vectorized stmts):
ebfd146a
IR
4916
4917 before vectorization:
4918 RELATED_STMT VEC_STMT
4919 S1: x = memref - -
4920 S2: z = x + 1 - -
4921
4922 step 1: vectorize stmt S1:
4923 We first create the vector stmt VS1_0, and, as usual, record a
4924 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4925 Next, we create the vector stmt VS1_1, and record a pointer to
4926 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4927 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4928 stmts and pointers:
4929 RELATED_STMT VEC_STMT
4930 VS1_0: vx0 = memref0 VS1_1 -
4931 VS1_1: vx1 = memref1 VS1_2 -
4932 VS1_2: vx2 = memref2 VS1_3 -
4933 VS1_3: vx3 = memref3 - -
4934 S1: x = load - VS1_0
4935 S2: z = x + 1 - -
4936
b8698a0f
L
4937 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4938 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4939 stmt S2. */
4940
0d0293ac 4941 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4942
4943 S1: x2 = &base + 2
4944 S2: x0 = &base
4945 S3: x1 = &base + 1
4946 S4: x3 = &base + 3
4947
b8698a0f 4948 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4949 starting from the access of the first stmt of the chain:
4950
4951 VS1: vx0 = &base
4952 VS2: vx1 = &base + vec_size*1
4953 VS3: vx3 = &base + vec_size*2
4954 VS4: vx4 = &base + vec_size*3
4955
4956 Then permutation statements are generated:
4957
e2c83630
RH
4958 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4959 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4960 ...
4961
4962 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4963 (the order of the data-refs in the output of vect_permute_load_chain
4964 corresponds to the order of scalar stmts in the interleaving chain - see
4965 the documentation of vect_permute_load_chain()).
4966 The generation of permutation stmts and recording them in
0d0293ac 4967 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4968
b8698a0f 4969 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4970 permutation stmts above are created for every copy. The result vector
4971 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4972 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4973
4974 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4975 on a target that supports unaligned accesses (dr_unaligned_supported)
4976 we generate the following code:
4977 p = initial_addr;
4978 indx = 0;
4979 loop {
4980 p = p + indx * vectype_size;
4981 vec_dest = *(p);
4982 indx = indx + 1;
4983 }
4984
4985 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4986 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4987 then generate the following code, in which the data in each iteration is
4988 obtained by two vector loads, one from the previous iteration, and one
4989 from the current iteration:
4990 p1 = initial_addr;
4991 msq_init = *(floor(p1))
4992 p2 = initial_addr + VS - 1;
4993 realignment_token = call target_builtin;
4994 indx = 0;
4995 loop {
4996 p2 = p2 + indx * vectype_size
4997 lsq = *(floor(p2))
4998 vec_dest = realign_load (msq, lsq, realignment_token)
4999 indx = indx + 1;
5000 msq = lsq;
5001 } */
5002
5003 /* If the misalignment remains the same throughout the execution of the
5004 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5005 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5006 This can only occur when vectorizing memory accesses in the inner-loop
5007 nested within an outer-loop that is being vectorized. */
5008
d1e4b493 5009 if (nested_in_vect_loop
211bea38 5010 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5011 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5012 {
5013 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5014 compute_in_loop = true;
5015 }
5016
5017 if ((alignment_support_scheme == dr_explicit_realign_optimized
5018 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5019 && !compute_in_loop)
ebfd146a
IR
5020 {
5021 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5022 alignment_support_scheme, NULL_TREE,
5023 &at_loop);
5024 if (alignment_support_scheme == dr_explicit_realign_optimized)
5025 {
5026 phi = SSA_NAME_DEF_STMT (msq);
5027 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5028 }
5029 }
5030 else
5031 at_loop = loop;
5032
a1e53f3f
L
5033 if (negative)
5034 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5035
272c6793
RS
5036 if (load_lanes_p)
5037 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5038 else
5039 aggr_type = vectype;
5040
ebfd146a
IR
5041 prev_stmt_info = NULL;
5042 for (j = 0; j < ncopies; j++)
b8698a0f 5043 {
272c6793 5044 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5045 if (j == 0)
74bf76ed
JJ
5046 {
5047 bool simd_lane_access_p
5048 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5049 if (simd_lane_access_p
5050 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5051 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5052 && integer_zerop (DR_OFFSET (first_dr))
5053 && integer_zerop (DR_INIT (first_dr))
5054 && alias_sets_conflict_p (get_alias_set (aggr_type),
5055 get_alias_set (DR_REF (first_dr)))
5056 && (alignment_support_scheme == dr_aligned
5057 || alignment_support_scheme == dr_unaligned_supported))
5058 {
5059 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5060 dataref_offset = build_int_cst (reference_alias_ptr_type
5061 (DR_REF (first_dr)), 0);
8928eff3 5062 inv_p = false;
74bf76ed
JJ
5063 }
5064 else
5065 dataref_ptr
5066 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5067 offset, &dummy, gsi, &ptr_incr,
5068 simd_lane_access_p, &inv_p);
5069 }
5070 else if (dataref_offset)
5071 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5072 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5073 else
272c6793
RS
5074 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5075 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5076
0d0293ac 5077 if (grouped_load || slp_perm)
9771b263 5078 dr_chain.create (vec_num);
5ce1ee7f 5079
272c6793 5080 if (load_lanes_p)
ebfd146a 5081 {
272c6793
RS
5082 tree vec_array;
5083
5084 vec_array = create_vector_array (vectype, vec_num);
5085
5086 /* Emit:
5087 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5088 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5089 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5090 gimple_call_set_lhs (new_stmt, vec_array);
5091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5092
272c6793
RS
5093 /* Extract each vector into an SSA_NAME. */
5094 for (i = 0; i < vec_num; i++)
ebfd146a 5095 {
272c6793
RS
5096 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5097 vec_array, i);
9771b263 5098 dr_chain.quick_push (new_temp);
272c6793
RS
5099 }
5100
5101 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5102 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5103 }
5104 else
5105 {
5106 for (i = 0; i < vec_num; i++)
5107 {
5108 if (i > 0)
5109 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5110 stmt, NULL_TREE);
5111
5112 /* 2. Create the vector-load in the loop. */
5113 switch (alignment_support_scheme)
5114 {
5115 case dr_aligned:
5116 case dr_unaligned_supported:
be1ac4ec 5117 {
644ffefd
MJ
5118 unsigned int align, misalign;
5119
272c6793
RS
5120 data_ref
5121 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5122 dataref_offset
5123 ? dataref_offset
5124 : build_int_cst (reference_alias_ptr_type
5125 (DR_REF (first_dr)), 0));
644ffefd 5126 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5127 if (alignment_support_scheme == dr_aligned)
5128 {
5129 gcc_assert (aligned_access_p (first_dr));
644ffefd 5130 misalign = 0;
272c6793
RS
5131 }
5132 else if (DR_MISALIGNMENT (first_dr) == -1)
5133 {
5134 TREE_TYPE (data_ref)
5135 = build_aligned_type (TREE_TYPE (data_ref),
5136 TYPE_ALIGN (elem_type));
644ffefd
MJ
5137 align = TYPE_ALIGN_UNIT (elem_type);
5138 misalign = 0;
272c6793
RS
5139 }
5140 else
5141 {
5142 TREE_TYPE (data_ref)
5143 = build_aligned_type (TREE_TYPE (data_ref),
5144 TYPE_ALIGN (elem_type));
644ffefd 5145 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5146 }
74bf76ed
JJ
5147 if (dataref_offset == NULL_TREE)
5148 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5149 align, misalign);
272c6793 5150 break;
be1ac4ec 5151 }
272c6793 5152 case dr_explicit_realign:
267d3070 5153 {
272c6793
RS
5154 tree ptr, bump;
5155 tree vs_minus_1;
5156
5157 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5158
5159 if (compute_in_loop)
5160 msq = vect_setup_realignment (first_stmt, gsi,
5161 &realignment_token,
5162 dr_explicit_realign,
5163 dataref_ptr, NULL);
5164
070ecdfd 5165 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5166 new_stmt = gimple_build_assign_with_ops
070ecdfd 5167 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5168 build_int_cst
5169 (TREE_TYPE (dataref_ptr),
5170 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5172 data_ref
5173 = build2 (MEM_REF, vectype, ptr,
5174 build_int_cst (reference_alias_ptr_type
5175 (DR_REF (first_dr)), 0));
5176 vec_dest = vect_create_destination_var (scalar_dest,
5177 vectype);
5178 new_stmt = gimple_build_assign (vec_dest, data_ref);
5179 new_temp = make_ssa_name (vec_dest, new_stmt);
5180 gimple_assign_set_lhs (new_stmt, new_temp);
5181 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5182 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5184 msq = new_temp;
5185
5186 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5187 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5188 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5189 new_stmt = gimple_build_assign_with_ops
5190 (BIT_AND_EXPR, NULL_TREE, ptr,
5191 build_int_cst
5192 (TREE_TYPE (ptr),
5193 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5194 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5195 gimple_assign_set_lhs (new_stmt, ptr);
5196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5197 data_ref
5198 = build2 (MEM_REF, vectype, ptr,
5199 build_int_cst (reference_alias_ptr_type
5200 (DR_REF (first_dr)), 0));
5201 break;
267d3070 5202 }
272c6793 5203 case dr_explicit_realign_optimized:
070ecdfd 5204 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5205 new_stmt = gimple_build_assign_with_ops
070ecdfd 5206 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5207 build_int_cst
5208 (TREE_TYPE (dataref_ptr),
5209 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5210 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5211 data_ref
5212 = build2 (MEM_REF, vectype, new_temp,
5213 build_int_cst (reference_alias_ptr_type
5214 (DR_REF (first_dr)), 0));
5215 break;
5216 default:
5217 gcc_unreachable ();
5218 }
ebfd146a 5219 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5220 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5221 new_temp = make_ssa_name (vec_dest, new_stmt);
5222 gimple_assign_set_lhs (new_stmt, new_temp);
5223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5224
272c6793
RS
5225 /* 3. Handle explicit realignment if necessary/supported.
5226 Create in loop:
5227 vec_dest = realign_load (msq, lsq, realignment_token) */
5228 if (alignment_support_scheme == dr_explicit_realign_optimized
5229 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5230 {
272c6793
RS
5231 lsq = gimple_assign_lhs (new_stmt);
5232 if (!realignment_token)
5233 realignment_token = dataref_ptr;
5234 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5235 new_stmt
73804b12
RG
5236 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5237 vec_dest, msq, lsq,
5238 realignment_token);
272c6793
RS
5239 new_temp = make_ssa_name (vec_dest, new_stmt);
5240 gimple_assign_set_lhs (new_stmt, new_temp);
5241 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5242
5243 if (alignment_support_scheme == dr_explicit_realign_optimized)
5244 {
5245 gcc_assert (phi);
5246 if (i == vec_num - 1 && j == ncopies - 1)
5247 add_phi_arg (phi, lsq,
5248 loop_latch_edge (containing_loop),
9e227d60 5249 UNKNOWN_LOCATION);
272c6793
RS
5250 msq = lsq;
5251 }
ebfd146a 5252 }
ebfd146a 5253
59fd17e3
RB
5254 /* 4. Handle invariant-load. */
5255 if (inv_p && !bb_vinfo)
5256 {
5257 gimple_stmt_iterator gsi2 = *gsi;
5258 gcc_assert (!grouped_load);
5259 gsi_next (&gsi2);
5260 new_temp = vect_init_vector (stmt, scalar_dest,
5261 vectype, &gsi2);
5262 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5263 }
5264
272c6793
RS
5265 if (negative)
5266 {
aec7ae7d
JJ
5267 tree perm_mask = perm_mask_for_reverse (vectype);
5268 new_temp = permute_vec_elements (new_temp, new_temp,
5269 perm_mask, stmt, gsi);
ebfd146a
IR
5270 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5271 }
267d3070 5272
272c6793 5273 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5274 vect_transform_grouped_load (). */
5275 if (grouped_load || slp_perm)
9771b263 5276 dr_chain.quick_push (new_temp);
267d3070 5277
272c6793
RS
5278 /* Store vector loads in the corresponding SLP_NODE. */
5279 if (slp && !slp_perm)
9771b263 5280 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5281 }
a64b9c26
RB
5282 /* Bump the vector pointer to account for a gap. */
5283 if (slp && group_gap != 0)
5284 {
5285 tree bump = size_binop (MULT_EXPR,
5286 TYPE_SIZE_UNIT (elem_type),
5287 size_int (group_gap));
5288 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5289 stmt, bump);
5290 }
ebfd146a
IR
5291 }
5292
5293 if (slp && !slp_perm)
5294 continue;
5295
5296 if (slp_perm)
5297 {
01d8bf07 5298 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5299 slp_node_instance, false))
5300 {
9771b263 5301 dr_chain.release ();
ebfd146a
IR
5302 return false;
5303 }
5304 }
5305 else
5306 {
0d0293ac 5307 if (grouped_load)
ebfd146a 5308 {
272c6793 5309 if (!load_lanes_p)
0d0293ac 5310 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5311 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5312 }
5313 else
5314 {
5315 if (j == 0)
5316 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5317 else
5318 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5319 prev_stmt_info = vinfo_for_stmt (new_stmt);
5320 }
5321 }
9771b263 5322 dr_chain.release ();
ebfd146a
IR
5323 }
5324
ebfd146a
IR
5325 return true;
5326}
5327
5328/* Function vect_is_simple_cond.
b8698a0f 5329
ebfd146a
IR
5330 Input:
5331 LOOP - the loop that is being vectorized.
5332 COND - Condition that is checked for simple use.
5333
e9e1d143
RG
5334 Output:
5335 *COMP_VECTYPE - the vector type for the comparison.
5336
ebfd146a
IR
5337 Returns whether a COND can be vectorized. Checks whether
5338 condition operands are supportable using vec_is_simple_use. */
5339
87aab9b2 5340static bool
24ee1384
IR
5341vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5342 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5343{
5344 tree lhs, rhs;
5345 tree def;
5346 enum vect_def_type dt;
e9e1d143 5347 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5348
5349 if (!COMPARISON_CLASS_P (cond))
5350 return false;
5351
5352 lhs = TREE_OPERAND (cond, 0);
5353 rhs = TREE_OPERAND (cond, 1);
5354
5355 if (TREE_CODE (lhs) == SSA_NAME)
5356 {
5357 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5358 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5359 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5360 return false;
5361 }
5362 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5363 && TREE_CODE (lhs) != FIXED_CST)
5364 return false;
5365
5366 if (TREE_CODE (rhs) == SSA_NAME)
5367 {
5368 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5369 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5370 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5371 return false;
5372 }
f7e531cf 5373 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5374 && TREE_CODE (rhs) != FIXED_CST)
5375 return false;
5376
e9e1d143 5377 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5378 return true;
5379}
5380
5381/* vectorizable_condition.
5382
b8698a0f
L
5383 Check if STMT is conditional modify expression that can be vectorized.
5384 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5385 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5386 at GSI.
5387
5388 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5389 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5390 else caluse if it is 2).
ebfd146a
IR
5391
5392 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5393
4bbe8262 5394bool
ebfd146a 5395vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5396 gimple *vec_stmt, tree reduc_def, int reduc_index,
5397 slp_tree slp_node)
ebfd146a
IR
5398{
5399 tree scalar_dest = NULL_TREE;
5400 tree vec_dest = NULL_TREE;
ebfd146a
IR
5401 tree cond_expr, then_clause, else_clause;
5402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5403 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5404 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5405 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5406 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5407 tree vec_compare, vec_cond_expr;
5408 tree new_temp;
5409 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5410 tree def;
a855b1b1 5411 enum vect_def_type dt, dts[4];
ebfd146a 5412 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5413 int ncopies;
ebfd146a 5414 enum tree_code code;
a855b1b1 5415 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5416 int i, j;
5417 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5418 vec<tree> vec_oprnds0 = vNULL;
5419 vec<tree> vec_oprnds1 = vNULL;
5420 vec<tree> vec_oprnds2 = vNULL;
5421 vec<tree> vec_oprnds3 = vNULL;
74946978 5422 tree vec_cmp_type;
b8698a0f 5423
f7e531cf
IR
5424 if (slp_node || PURE_SLP_STMT (stmt_info))
5425 ncopies = 1;
5426 else
5427 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5428
ebfd146a 5429 gcc_assert (ncopies >= 1);
a855b1b1 5430 if (reduc_index && ncopies > 1)
ebfd146a
IR
5431 return false; /* FORNOW */
5432
f7e531cf
IR
5433 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5434 return false;
5435
5436 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5437 return false;
5438
4bbe8262
IR
5439 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5440 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5441 && reduc_def))
ebfd146a
IR
5442 return false;
5443
ebfd146a 5444 /* FORNOW: not yet supported. */
b8698a0f 5445 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5446 {
73fbfcad 5447 if (dump_enabled_p ())
78c60e3d 5448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5449 "value used after loop.\n");
ebfd146a
IR
5450 return false;
5451 }
5452
5453 /* Is vectorizable conditional operation? */
5454 if (!is_gimple_assign (stmt))
5455 return false;
5456
5457 code = gimple_assign_rhs_code (stmt);
5458
5459 if (code != COND_EXPR)
5460 return false;
5461
4e71066d
RG
5462 cond_expr = gimple_assign_rhs1 (stmt);
5463 then_clause = gimple_assign_rhs2 (stmt);
5464 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5465
24ee1384
IR
5466 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5467 &comp_vectype)
e9e1d143 5468 || !comp_vectype)
ebfd146a
IR
5469 return false;
5470
5471 if (TREE_CODE (then_clause) == SSA_NAME)
5472 {
5473 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5474 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5475 &then_def_stmt, &def, &dt))
5476 return false;
5477 }
b8698a0f 5478 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5479 && TREE_CODE (then_clause) != REAL_CST
5480 && TREE_CODE (then_clause) != FIXED_CST)
5481 return false;
5482
5483 if (TREE_CODE (else_clause) == SSA_NAME)
5484 {
5485 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5486 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5487 &else_def_stmt, &def, &dt))
5488 return false;
5489 }
b8698a0f 5490 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5491 && TREE_CODE (else_clause) != REAL_CST
5492 && TREE_CODE (else_clause) != FIXED_CST)
5493 return false;
5494
74946978
MP
5495 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5496 /* The result of a vector comparison should be signed type. */
5497 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5498 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5499 if (vec_cmp_type == NULL_TREE)
5500 return false;
784fb9b3 5501
b8698a0f 5502 if (!vec_stmt)
ebfd146a
IR
5503 {
5504 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5505 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5506 }
5507
f7e531cf
IR
5508 /* Transform. */
5509
5510 if (!slp_node)
5511 {
9771b263
DN
5512 vec_oprnds0.create (1);
5513 vec_oprnds1.create (1);
5514 vec_oprnds2.create (1);
5515 vec_oprnds3.create (1);
f7e531cf 5516 }
ebfd146a
IR
5517
5518 /* Handle def. */
5519 scalar_dest = gimple_assign_lhs (stmt);
5520 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5521
5522 /* Handle cond expr. */
a855b1b1
MM
5523 for (j = 0; j < ncopies; j++)
5524 {
f7e531cf 5525 gimple new_stmt = NULL;
a855b1b1
MM
5526 if (j == 0)
5527 {
f7e531cf
IR
5528 if (slp_node)
5529 {
07687835
TS
5530 stack_vec<tree, 4> ops;
5531 stack_vec<vec<tree>, 4> vec_defs;
9771b263 5532
9771b263
DN
5533 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5534 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5535 ops.safe_push (then_clause);
5536 ops.safe_push (else_clause);
f7e531cf 5537 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5538 vec_oprnds3 = vec_defs.pop ();
5539 vec_oprnds2 = vec_defs.pop ();
5540 vec_oprnds1 = vec_defs.pop ();
5541 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5542
9771b263
DN
5543 ops.release ();
5544 vec_defs.release ();
f7e531cf
IR
5545 }
5546 else
5547 {
5548 gimple gtemp;
5549 vec_cond_lhs =
a855b1b1
MM
5550 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5551 stmt, NULL);
24ee1384
IR
5552 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5553 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5554
5555 vec_cond_rhs =
5556 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5557 stmt, NULL);
24ee1384
IR
5558 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5559 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5560 if (reduc_index == 1)
5561 vec_then_clause = reduc_def;
5562 else
5563 {
5564 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5565 stmt, NULL);
24ee1384 5566 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5567 NULL, &gtemp, &def, &dts[2]);
5568 }
5569 if (reduc_index == 2)
5570 vec_else_clause = reduc_def;
5571 else
5572 {
5573 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5574 stmt, NULL);
24ee1384 5575 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5576 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5577 }
a855b1b1
MM
5578 }
5579 }
5580 else
5581 {
f7e531cf 5582 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5583 vec_oprnds0.pop ());
f7e531cf 5584 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5585 vec_oprnds1.pop ());
a855b1b1 5586 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5587 vec_oprnds2.pop ());
a855b1b1 5588 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5589 vec_oprnds3.pop ());
f7e531cf
IR
5590 }
5591
5592 if (!slp_node)
5593 {
9771b263
DN
5594 vec_oprnds0.quick_push (vec_cond_lhs);
5595 vec_oprnds1.quick_push (vec_cond_rhs);
5596 vec_oprnds2.quick_push (vec_then_clause);
5597 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5598 }
5599
9dc3f7de 5600 /* Arguments are ready. Create the new vector stmt. */
9771b263 5601 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5602 {
9771b263
DN
5603 vec_cond_rhs = vec_oprnds1[i];
5604 vec_then_clause = vec_oprnds2[i];
5605 vec_else_clause = vec_oprnds3[i];
a855b1b1 5606
784fb9b3
JJ
5607 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5608 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5609 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5610 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5611
f7e531cf
IR
5612 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5613 new_temp = make_ssa_name (vec_dest, new_stmt);
5614 gimple_assign_set_lhs (new_stmt, new_temp);
5615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5616 if (slp_node)
9771b263 5617 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5618 }
5619
5620 if (slp_node)
5621 continue;
5622
5623 if (j == 0)
5624 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5625 else
5626 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5627
5628 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5629 }
b8698a0f 5630
9771b263
DN
5631 vec_oprnds0.release ();
5632 vec_oprnds1.release ();
5633 vec_oprnds2.release ();
5634 vec_oprnds3.release ();
f7e531cf 5635
ebfd146a
IR
5636 return true;
5637}
5638
5639
8644a673 5640/* Make sure the statement is vectorizable. */
ebfd146a
IR
5641
5642bool
a70d6342 5643vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5644{
8644a673 5645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5647 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5648 bool ok;
a70d6342 5649 tree scalar_type, vectype;
363477c0
JJ
5650 gimple pattern_stmt;
5651 gimple_seq pattern_def_seq;
ebfd146a 5652
73fbfcad 5653 if (dump_enabled_p ())
ebfd146a 5654 {
78c60e3d
SS
5655 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5657 dump_printf (MSG_NOTE, "\n");
8644a673 5658 }
ebfd146a 5659
1825a1f3 5660 if (gimple_has_volatile_ops (stmt))
b8698a0f 5661 {
73fbfcad 5662 if (dump_enabled_p ())
78c60e3d 5663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5664 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
5665
5666 return false;
5667 }
b8698a0f
L
5668
5669 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5670 to include:
5671 - the COND_EXPR which is the loop exit condition
5672 - any LABEL_EXPRs in the loop
b8698a0f 5673 - computations that are used only for array indexing or loop control.
8644a673 5674 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5675 instance, therefore, all the statements are relevant.
ebfd146a 5676
d092494c 5677 Pattern statement needs to be analyzed instead of the original statement
83197f37 5678 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5679 statements. In basic blocks we are called from some SLP instance
5680 traversal, don't analyze pattern stmts instead, the pattern stmts
5681 already will be part of SLP instance. */
83197f37
IR
5682
5683 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5684 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5685 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5686 {
9d5e7640 5687 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5688 && pattern_stmt
9d5e7640
IR
5689 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5690 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5691 {
83197f37 5692 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5693 stmt = pattern_stmt;
5694 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5695 if (dump_enabled_p ())
9d5e7640 5696 {
78c60e3d
SS
5697 dump_printf_loc (MSG_NOTE, vect_location,
5698 "==> examining pattern statement: ");
5699 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5700 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
5701 }
5702 }
5703 else
5704 {
73fbfcad 5705 if (dump_enabled_p ())
e645e942 5706 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 5707
9d5e7640
IR
5708 return true;
5709 }
8644a673 5710 }
83197f37 5711 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5712 && node == NULL
83197f37
IR
5713 && pattern_stmt
5714 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5715 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5716 {
5717 /* Analyze PATTERN_STMT too. */
73fbfcad 5718 if (dump_enabled_p ())
83197f37 5719 {
78c60e3d
SS
5720 dump_printf_loc (MSG_NOTE, vect_location,
5721 "==> examining pattern statement: ");
5722 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5723 dump_printf (MSG_NOTE, "\n");
83197f37
IR
5724 }
5725
5726 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5727 return false;
5728 }
ebfd146a 5729
1107f3ae 5730 if (is_pattern_stmt_p (stmt_info)
079c527f 5731 && node == NULL
363477c0 5732 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5733 {
363477c0 5734 gimple_stmt_iterator si;
1107f3ae 5735
363477c0
JJ
5736 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5737 {
5738 gimple pattern_def_stmt = gsi_stmt (si);
5739 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5740 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5741 {
5742 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5743 if (dump_enabled_p ())
363477c0 5744 {
78c60e3d
SS
5745 dump_printf_loc (MSG_NOTE, vect_location,
5746 "==> examining pattern def statement: ");
5747 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 5748 dump_printf (MSG_NOTE, "\n");
363477c0 5749 }
1107f3ae 5750
363477c0
JJ
5751 if (!vect_analyze_stmt (pattern_def_stmt,
5752 need_to_vectorize, node))
5753 return false;
5754 }
5755 }
5756 }
1107f3ae 5757
8644a673
IR
5758 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5759 {
5760 case vect_internal_def:
5761 break;
ebfd146a 5762
8644a673 5763 case vect_reduction_def:
7c5222ff 5764 case vect_nested_cycle:
a70d6342 5765 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5766 || relevance == vect_used_in_outer_by_reduction
a70d6342 5767 || relevance == vect_unused_in_scope));
8644a673
IR
5768 break;
5769
5770 case vect_induction_def:
5771 case vect_constant_def:
5772 case vect_external_def:
5773 case vect_unknown_def_type:
5774 default:
5775 gcc_unreachable ();
5776 }
ebfd146a 5777
a70d6342
IR
5778 if (bb_vinfo)
5779 {
5780 gcc_assert (PURE_SLP_STMT (stmt_info));
5781
b690cc0f 5782 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5783 if (dump_enabled_p ())
a70d6342 5784 {
78c60e3d
SS
5785 dump_printf_loc (MSG_NOTE, vect_location,
5786 "get vectype for scalar type: ");
5787 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 5788 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5789 }
5790
5791 vectype = get_vectype_for_scalar_type (scalar_type);
5792 if (!vectype)
5793 {
73fbfcad 5794 if (dump_enabled_p ())
a70d6342 5795 {
78c60e3d
SS
5796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5797 "not SLPed: unsupported data-type ");
5798 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5799 scalar_type);
e645e942 5800 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
5801 }
5802 return false;
5803 }
5804
73fbfcad 5805 if (dump_enabled_p ())
a70d6342 5806 {
78c60e3d
SS
5807 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5808 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 5809 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5810 }
5811
5812 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5813 }
5814
8644a673 5815 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5816 {
8644a673
IR
5817 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5818 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5819 *need_to_vectorize = true;
ebfd146a
IR
5820 }
5821
8644a673 5822 ok = true;
b8698a0f 5823 if (!bb_vinfo
a70d6342
IR
5824 && (STMT_VINFO_RELEVANT_P (stmt_info)
5825 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5826 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5827 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5828 || vectorizable_operation (stmt, NULL, NULL, NULL)
5829 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5830 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5831 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5832 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5833 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5834 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5835 else
5836 {
5837 if (bb_vinfo)
4a00c761
JJ
5838 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5839 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5840 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5841 || vectorizable_assignment (stmt, NULL, NULL, node)
5842 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5843 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5844 || vectorizable_store (stmt, NULL, NULL, node)
5845 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5846 }
8644a673
IR
5847
5848 if (!ok)
ebfd146a 5849 {
73fbfcad 5850 if (dump_enabled_p ())
8644a673 5851 {
78c60e3d
SS
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5853 "not vectorized: relevant stmt not ");
5854 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5856 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5857 }
b8698a0f 5858
ebfd146a
IR
5859 return false;
5860 }
5861
a70d6342
IR
5862 if (bb_vinfo)
5863 return true;
5864
8644a673
IR
5865 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5866 need extra handling, except for vectorizable reductions. */
5867 if (STMT_VINFO_LIVE_P (stmt_info)
5868 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5869 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5870
8644a673 5871 if (!ok)
ebfd146a 5872 {
73fbfcad 5873 if (dump_enabled_p ())
8644a673 5874 {
78c60e3d
SS
5875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5876 "not vectorized: live stmt not ");
5877 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5878 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5879 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5880 }
b8698a0f 5881
8644a673 5882 return false;
ebfd146a
IR
5883 }
5884
ebfd146a
IR
5885 return true;
5886}
5887
5888
5889/* Function vect_transform_stmt.
5890
5891 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5892
5893bool
5894vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5895 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5896 slp_instance slp_node_instance)
5897{
5898 bool is_store = false;
5899 gimple vec_stmt = NULL;
5900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5901 bool done;
ebfd146a
IR
5902
5903 switch (STMT_VINFO_TYPE (stmt_info))
5904 {
5905 case type_demotion_vec_info_type:
ebfd146a 5906 case type_promotion_vec_info_type:
ebfd146a
IR
5907 case type_conversion_vec_info_type:
5908 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5909 gcc_assert (done);
5910 break;
5911
5912 case induc_vec_info_type:
5913 gcc_assert (!slp_node);
5914 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5915 gcc_assert (done);
5916 break;
5917
9dc3f7de
IR
5918 case shift_vec_info_type:
5919 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5920 gcc_assert (done);
5921 break;
5922
ebfd146a
IR
5923 case op_vec_info_type:
5924 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5925 gcc_assert (done);
5926 break;
5927
5928 case assignment_vec_info_type:
5929 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5930 gcc_assert (done);
5931 break;
5932
5933 case load_vec_info_type:
b8698a0f 5934 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5935 slp_node_instance);
5936 gcc_assert (done);
5937 break;
5938
5939 case store_vec_info_type:
5940 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5941 gcc_assert (done);
0d0293ac 5942 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5943 {
5944 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5945 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5946 one are skipped, and there vec_stmt_info shouldn't be freed
5947 meanwhile. */
0d0293ac 5948 *grouped_store = true;
ebfd146a
IR
5949 if (STMT_VINFO_VEC_STMT (stmt_info))
5950 is_store = true;
5951 }
5952 else
5953 is_store = true;
5954 break;
5955
5956 case condition_vec_info_type:
f7e531cf 5957 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5958 gcc_assert (done);
5959 break;
5960
5961 case call_vec_info_type:
190c2236 5962 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5963 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5964 break;
5965
5966 case reduc_vec_info_type:
b5aeb3bb 5967 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5968 gcc_assert (done);
5969 break;
5970
5971 default:
5972 if (!STMT_VINFO_LIVE_P (stmt_info))
5973 {
73fbfcad 5974 if (dump_enabled_p ())
78c60e3d 5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5976 "stmt not supported.\n");
ebfd146a
IR
5977 gcc_unreachable ();
5978 }
5979 }
5980
5981 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5982 is being vectorized, but outside the immediately enclosing loop. */
5983 if (vec_stmt
a70d6342
IR
5984 && STMT_VINFO_LOOP_VINFO (stmt_info)
5985 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5986 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5987 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5988 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5989 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5990 vect_used_in_outer_by_reduction))
ebfd146a 5991 {
a70d6342
IR
5992 struct loop *innerloop = LOOP_VINFO_LOOP (
5993 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5994 imm_use_iterator imm_iter;
5995 use_operand_p use_p;
5996 tree scalar_dest;
5997 gimple exit_phi;
5998
73fbfcad 5999 if (dump_enabled_p ())
78c60e3d 6000 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6001 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
6002
6003 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6004 (to be used when vectorizing outer-loop stmts that use the DEF of
6005 STMT). */
6006 if (gimple_code (stmt) == GIMPLE_PHI)
6007 scalar_dest = PHI_RESULT (stmt);
6008 else
6009 scalar_dest = gimple_assign_lhs (stmt);
6010
6011 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6012 {
6013 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6014 {
6015 exit_phi = USE_STMT (use_p);
6016 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6017 }
6018 }
6019 }
6020
6021 /* Handle stmts whose DEF is used outside the loop-nest that is
6022 being vectorized. */
6023 if (STMT_VINFO_LIVE_P (stmt_info)
6024 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6025 {
6026 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6027 gcc_assert (done);
6028 }
6029
6030 if (vec_stmt)
83197f37 6031 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6032
b8698a0f 6033 return is_store;
ebfd146a
IR
6034}
6035
6036
b8698a0f 6037/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6038 stmt_vec_info. */
6039
6040void
6041vect_remove_stores (gimple first_stmt)
6042{
6043 gimple next = first_stmt;
6044 gimple tmp;
6045 gimple_stmt_iterator next_si;
6046
6047 while (next)
6048 {
78048b1c
JJ
6049 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6050
6051 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6052 if (is_pattern_stmt_p (stmt_info))
6053 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6054 /* Free the attached stmt_vec_info and remove the stmt. */
6055 next_si = gsi_for_stmt (next);
3d3f2249 6056 unlink_stmt_vdef (next);
ebfd146a 6057 gsi_remove (&next_si, true);
3d3f2249 6058 release_defs (next);
ebfd146a
IR
6059 free_stmt_vec_info (next);
6060 next = tmp;
6061 }
6062}
6063
6064
6065/* Function new_stmt_vec_info.
6066
6067 Create and initialize a new stmt_vec_info struct for STMT. */
6068
6069stmt_vec_info
b8698a0f 6070new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6071 bb_vec_info bb_vinfo)
ebfd146a
IR
6072{
6073 stmt_vec_info res;
6074 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6075
6076 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6077 STMT_VINFO_STMT (res) = stmt;
6078 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6079 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6080 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6081 STMT_VINFO_LIVE_P (res) = false;
6082 STMT_VINFO_VECTYPE (res) = NULL;
6083 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6084 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6085 STMT_VINFO_IN_PATTERN_P (res) = false;
6086 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6087 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6088 STMT_VINFO_DATA_REF (res) = NULL;
6089
6090 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6091 STMT_VINFO_DR_OFFSET (res) = NULL;
6092 STMT_VINFO_DR_INIT (res) = NULL;
6093 STMT_VINFO_DR_STEP (res) = NULL;
6094 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6095
6096 if (gimple_code (stmt) == GIMPLE_PHI
6097 && is_loop_header_bb_p (gimple_bb (stmt)))
6098 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6099 else
8644a673
IR
6100 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6101
9771b263 6102 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6103 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6104 GROUP_FIRST_ELEMENT (res) = NULL;
6105 GROUP_NEXT_ELEMENT (res) = NULL;
6106 GROUP_SIZE (res) = 0;
6107 GROUP_STORE_COUNT (res) = 0;
6108 GROUP_GAP (res) = 0;
6109 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6110
6111 return res;
6112}
6113
6114
6115/* Create a hash table for stmt_vec_info. */
6116
6117void
6118init_stmt_vec_info_vec (void)
6119{
9771b263
DN
6120 gcc_assert (!stmt_vec_info_vec.exists ());
6121 stmt_vec_info_vec.create (50);
ebfd146a
IR
6122}
6123
6124
6125/* Free hash table for stmt_vec_info. */
6126
6127void
6128free_stmt_vec_info_vec (void)
6129{
93675444
JJ
6130 unsigned int i;
6131 vec_void_p info;
6132 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6133 if (info != NULL)
6134 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6135 gcc_assert (stmt_vec_info_vec.exists ());
6136 stmt_vec_info_vec.release ();
ebfd146a
IR
6137}
6138
6139
6140/* Free stmt vectorization related info. */
6141
6142void
6143free_stmt_vec_info (gimple stmt)
6144{
6145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6146
6147 if (!stmt_info)
6148 return;
6149
78048b1c
JJ
6150 /* Check if this statement has a related "pattern stmt"
6151 (introduced by the vectorizer during the pattern recognition
6152 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6153 too. */
6154 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6155 {
6156 stmt_vec_info patt_info
6157 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6158 if (patt_info)
6159 {
363477c0
JJ
6160 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6161 if (seq)
6162 {
6163 gimple_stmt_iterator si;
6164 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6165 free_stmt_vec_info (gsi_stmt (si));
6166 }
78048b1c
JJ
6167 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6168 }
6169 }
6170
9771b263 6171 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6172 set_vinfo_for_stmt (stmt, NULL);
6173 free (stmt_info);
6174}
6175
6176
bb67d9c7 6177/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6178
bb67d9c7 6179 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6180 by the target. */
6181
bb67d9c7
RG
6182static tree
6183get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6184{
6185 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6186 enum machine_mode simd_mode;
2f816591 6187 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6188 int nunits;
6189 tree vectype;
6190
cc4b5170 6191 if (nbytes == 0)
ebfd146a
IR
6192 return NULL_TREE;
6193
48f2e373
RB
6194 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6195 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6196 return NULL_TREE;
6197
7b7b1813
RG
6198 /* For vector types of elements whose mode precision doesn't
6199 match their types precision we use a element type of mode
6200 precision. The vectorization routines will have to make sure
48f2e373
RB
6201 they support the proper result truncation/extension.
6202 We also make sure to build vector types with INTEGER_TYPE
6203 component type only. */
6d7971b8 6204 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6205 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6206 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6207 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6208 TYPE_UNSIGNED (scalar_type));
6d7971b8 6209
ccbf5bb4
RG
6210 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6211 When the component mode passes the above test simply use a type
6212 corresponding to that mode. The theory is that any use that
6213 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 6214 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 6215 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
6216 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6217
6218 /* We can't build a vector type of elements with alignment bigger than
6219 their size. */
dfc2e2ac 6220 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6221 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6222 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6223
dfc2e2ac
RB
6224 /* If we felt back to using the mode fail if there was
6225 no scalar type for it. */
6226 if (scalar_type == NULL_TREE)
6227 return NULL_TREE;
6228
bb67d9c7
RG
6229 /* If no size was supplied use the mode the target prefers. Otherwise
6230 lookup a vector mode of the specified size. */
6231 if (size == 0)
6232 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6233 else
6234 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6235 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6236 if (nunits <= 1)
6237 return NULL_TREE;
ebfd146a
IR
6238
6239 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6240
6241 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6242 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6243 return NULL_TREE;
ebfd146a
IR
6244
6245 return vectype;
6246}
6247
bb67d9c7
RG
6248unsigned int current_vector_size;
6249
6250/* Function get_vectype_for_scalar_type.
6251
6252 Returns the vector type corresponding to SCALAR_TYPE as supported
6253 by the target. */
6254
6255tree
6256get_vectype_for_scalar_type (tree scalar_type)
6257{
6258 tree vectype;
6259 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6260 current_vector_size);
6261 if (vectype
6262 && current_vector_size == 0)
6263 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6264 return vectype;
6265}
6266
b690cc0f
RG
6267/* Function get_same_sized_vectype
6268
6269 Returns a vector type corresponding to SCALAR_TYPE of size
6270 VECTOR_TYPE if supported by the target. */
6271
6272tree
bb67d9c7 6273get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6274{
bb67d9c7
RG
6275 return get_vectype_for_scalar_type_and_size
6276 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6277}
6278
ebfd146a
IR
6279/* Function vect_is_simple_use.
6280
6281 Input:
a70d6342
IR
6282 LOOP_VINFO - the vect info of the loop that is being vectorized.
6283 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6284 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6285 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6286
6287 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6288 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6289 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6290 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6291 is the case in reduction/induction computations).
6292 For basic blocks, supportable operands are constants and bb invariants.
6293 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6294
6295bool
24ee1384 6296vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6297 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6298 tree *def, enum vect_def_type *dt)
b8698a0f 6299{
ebfd146a
IR
6300 basic_block bb;
6301 stmt_vec_info stmt_vinfo;
a70d6342 6302 struct loop *loop = NULL;
b8698a0f 6303
a70d6342
IR
6304 if (loop_vinfo)
6305 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6306
6307 *def_stmt = NULL;
6308 *def = NULL_TREE;
b8698a0f 6309
73fbfcad 6310 if (dump_enabled_p ())
ebfd146a 6311 {
78c60e3d
SS
6312 dump_printf_loc (MSG_NOTE, vect_location,
6313 "vect_is_simple_use: operand ");
6314 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 6315 dump_printf (MSG_NOTE, "\n");
ebfd146a 6316 }
b8698a0f 6317
b758f602 6318 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6319 {
6320 *dt = vect_constant_def;
6321 return true;
6322 }
b8698a0f 6323
ebfd146a
IR
6324 if (is_gimple_min_invariant (operand))
6325 {
6326 *def = operand;
8644a673 6327 *dt = vect_external_def;
ebfd146a
IR
6328 return true;
6329 }
6330
6331 if (TREE_CODE (operand) == PAREN_EXPR)
6332 {
73fbfcad 6333 if (dump_enabled_p ())
e645e942 6334 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
6335 operand = TREE_OPERAND (operand, 0);
6336 }
b8698a0f 6337
ebfd146a
IR
6338 if (TREE_CODE (operand) != SSA_NAME)
6339 {
73fbfcad 6340 if (dump_enabled_p ())
78c60e3d 6341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6342 "not ssa-name.\n");
ebfd146a
IR
6343 return false;
6344 }
b8698a0f 6345
ebfd146a
IR
6346 *def_stmt = SSA_NAME_DEF_STMT (operand);
6347 if (*def_stmt == NULL)
6348 {
73fbfcad 6349 if (dump_enabled_p ())
78c60e3d 6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6351 "no def_stmt.\n");
ebfd146a
IR
6352 return false;
6353 }
6354
73fbfcad 6355 if (dump_enabled_p ())
ebfd146a 6356 {
78c60e3d
SS
6357 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6358 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 6359 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
6360 }
6361
8644a673 6362 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6363 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6364 if (gimple_nop_p (*def_stmt))
6365 {
6366 *def = operand;
8644a673 6367 *dt = vect_external_def;
ebfd146a
IR
6368 return true;
6369 }
6370
6371 bb = gimple_bb (*def_stmt);
a70d6342
IR
6372
6373 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6374 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6375 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6376 *dt = vect_external_def;
ebfd146a
IR
6377 else
6378 {
6379 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6380 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6381 }
6382
24ee1384
IR
6383 if (*dt == vect_unknown_def_type
6384 || (stmt
6385 && *dt == vect_double_reduction_def
6386 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6387 {
73fbfcad 6388 if (dump_enabled_p ())
78c60e3d 6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6390 "Unsupported pattern.\n");
ebfd146a
IR
6391 return false;
6392 }
6393
73fbfcad 6394 if (dump_enabled_p ())
e645e942 6395 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
6396
6397 switch (gimple_code (*def_stmt))
6398 {
6399 case GIMPLE_PHI:
6400 *def = gimple_phi_result (*def_stmt);
6401 break;
6402
6403 case GIMPLE_ASSIGN:
6404 *def = gimple_assign_lhs (*def_stmt);
6405 break;
6406
6407 case GIMPLE_CALL:
6408 *def = gimple_call_lhs (*def_stmt);
6409 if (*def != NULL)
6410 break;
6411 /* FALLTHRU */
6412 default:
73fbfcad 6413 if (dump_enabled_p ())
78c60e3d 6414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6415 "unsupported defining stmt:\n");
ebfd146a
IR
6416 return false;
6417 }
6418
6419 return true;
6420}
6421
b690cc0f
RG
6422/* Function vect_is_simple_use_1.
6423
6424 Same as vect_is_simple_use_1 but also determines the vector operand
6425 type of OPERAND and stores it to *VECTYPE. If the definition of
6426 OPERAND is vect_uninitialized_def, vect_constant_def or
6427 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6428 is responsible to compute the best suited vector type for the
6429 scalar operand. */
6430
6431bool
24ee1384 6432vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6433 bb_vec_info bb_vinfo, gimple *def_stmt,
6434 tree *def, enum vect_def_type *dt, tree *vectype)
6435{
24ee1384
IR
6436 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6437 def, dt))
b690cc0f
RG
6438 return false;
6439
6440 /* Now get a vector type if the def is internal, otherwise supply
6441 NULL_TREE and leave it up to the caller to figure out a proper
6442 type for the use stmt. */
6443 if (*dt == vect_internal_def
6444 || *dt == vect_induction_def
6445 || *dt == vect_reduction_def
6446 || *dt == vect_double_reduction_def
6447 || *dt == vect_nested_cycle)
6448 {
6449 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6450
6451 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6452 && !STMT_VINFO_RELEVANT (stmt_info)
6453 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6454 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6455
b690cc0f
RG
6456 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6457 gcc_assert (*vectype != NULL_TREE);
6458 }
6459 else if (*dt == vect_uninitialized_def
6460 || *dt == vect_constant_def
6461 || *dt == vect_external_def)
6462 *vectype = NULL_TREE;
6463 else
6464 gcc_unreachable ();
6465
6466 return true;
6467}
6468
ebfd146a
IR
6469
6470/* Function supportable_widening_operation
6471
b8698a0f
L
6472 Check whether an operation represented by the code CODE is a
6473 widening operation that is supported by the target platform in
b690cc0f
RG
6474 vector form (i.e., when operating on arguments of type VECTYPE_IN
6475 producing a result of type VECTYPE_OUT).
b8698a0f 6476
ebfd146a
IR
6477 Widening operations we currently support are NOP (CONVERT), FLOAT
6478 and WIDEN_MULT. This function checks if these operations are supported
6479 by the target platform either directly (via vector tree-codes), or via
6480 target builtins.
6481
6482 Output:
b8698a0f
L
6483 - CODE1 and CODE2 are codes of vector operations to be used when
6484 vectorizing the operation, if available.
ebfd146a
IR
6485 - MULTI_STEP_CVT determines the number of required intermediate steps in
6486 case of multi-step conversion (like char->short->int - in that case
6487 MULTI_STEP_CVT will be 1).
b8698a0f
L
6488 - INTERM_TYPES contains the intermediate type required to perform the
6489 widening operation (short in the above example). */
ebfd146a
IR
6490
6491bool
b690cc0f
RG
6492supportable_widening_operation (enum tree_code code, gimple stmt,
6493 tree vectype_out, tree vectype_in,
ebfd146a
IR
6494 enum tree_code *code1, enum tree_code *code2,
6495 int *multi_step_cvt,
9771b263 6496 vec<tree> *interm_types)
ebfd146a
IR
6497{
6498 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6499 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6500 struct loop *vect_loop = NULL;
ebfd146a 6501 enum machine_mode vec_mode;
81f40b79 6502 enum insn_code icode1, icode2;
ebfd146a 6503 optab optab1, optab2;
b690cc0f
RG
6504 tree vectype = vectype_in;
6505 tree wide_vectype = vectype_out;
ebfd146a 6506 enum tree_code c1, c2;
4a00c761
JJ
6507 int i;
6508 tree prev_type, intermediate_type;
6509 enum machine_mode intermediate_mode, prev_mode;
6510 optab optab3, optab4;
ebfd146a 6511
4a00c761 6512 *multi_step_cvt = 0;
4ef69dfc
IR
6513 if (loop_info)
6514 vect_loop = LOOP_VINFO_LOOP (loop_info);
6515
ebfd146a
IR
6516 switch (code)
6517 {
6518 case WIDEN_MULT_EXPR:
6ae6116f
RH
6519 /* The result of a vectorized widening operation usually requires
6520 two vectors (because the widened results do not fit into one vector).
6521 The generated vector results would normally be expected to be
6522 generated in the same order as in the original scalar computation,
6523 i.e. if 8 results are generated in each vector iteration, they are
6524 to be organized as follows:
6525 vect1: [res1,res2,res3,res4],
6526 vect2: [res5,res6,res7,res8].
6527
6528 However, in the special case that the result of the widening
6529 operation is used in a reduction computation only, the order doesn't
6530 matter (because when vectorizing a reduction we change the order of
6531 the computation). Some targets can take advantage of this and
6532 generate more efficient code. For example, targets like Altivec,
6533 that support widen_mult using a sequence of {mult_even,mult_odd}
6534 generate the following vectors:
6535 vect1: [res1,res3,res5,res7],
6536 vect2: [res2,res4,res6,res8].
6537
6538 When vectorizing outer-loops, we execute the inner-loop sequentially
6539 (each vectorized inner-loop iteration contributes to VF outer-loop
6540 iterations in parallel). We therefore don't allow to change the
6541 order of the computation in the inner-loop during outer-loop
6542 vectorization. */
6543 /* TODO: Another case in which order doesn't *really* matter is when we
6544 widen and then contract again, e.g. (short)((int)x * y >> 8).
6545 Normally, pack_trunc performs an even/odd permute, whereas the
6546 repack from an even/odd expansion would be an interleave, which
6547 would be significantly simpler for e.g. AVX2. */
6548 /* In any case, in order to avoid duplicating the code below, recurse
6549 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6550 are properly set up for the caller. If we fail, we'll continue with
6551 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6552 if (vect_loop
6553 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6554 && !nested_in_vect_loop_p (vect_loop, stmt)
6555 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6556 stmt, vectype_out, vectype_in,
a86ec597
RH
6557 code1, code2, multi_step_cvt,
6558 interm_types))
6ae6116f 6559 return true;
4a00c761
JJ
6560 c1 = VEC_WIDEN_MULT_LO_EXPR;
6561 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6562 break;
6563
6ae6116f
RH
6564 case VEC_WIDEN_MULT_EVEN_EXPR:
6565 /* Support the recursion induced just above. */
6566 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6567 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6568 break;
6569
36ba4aae 6570 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6571 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6572 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6573 break;
6574
ebfd146a 6575 CASE_CONVERT:
4a00c761
JJ
6576 c1 = VEC_UNPACK_LO_EXPR;
6577 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6578 break;
6579
6580 case FLOAT_EXPR:
4a00c761
JJ
6581 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6582 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6583 break;
6584
6585 case FIX_TRUNC_EXPR:
6586 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6587 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6588 computing the operation. */
6589 return false;
6590
6591 default:
6592 gcc_unreachable ();
6593 }
6594
6ae6116f 6595 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6596 {
6597 enum tree_code ctmp = c1;
6598 c1 = c2;
6599 c2 = ctmp;
6600 }
6601
ebfd146a
IR
6602 if (code == FIX_TRUNC_EXPR)
6603 {
6604 /* The signedness is determined from output operand. */
b690cc0f
RG
6605 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6606 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6607 }
6608 else
6609 {
6610 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6611 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6612 }
6613
6614 if (!optab1 || !optab2)
6615 return false;
6616
6617 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6618 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6619 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6620 return false;
6621
4a00c761
JJ
6622 *code1 = c1;
6623 *code2 = c2;
6624
6625 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6626 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6627 return true;
6628
b8698a0f 6629 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6630 types. */
ebfd146a 6631
4a00c761
JJ
6632 prev_type = vectype;
6633 prev_mode = vec_mode;
b8698a0f 6634
4a00c761
JJ
6635 if (!CONVERT_EXPR_CODE_P (code))
6636 return false;
b8698a0f 6637
4a00c761
JJ
6638 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6639 intermediate steps in promotion sequence. We try
6640 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6641 not. */
9771b263 6642 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6643 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6644 {
6645 intermediate_mode = insn_data[icode1].operand[0].mode;
6646 intermediate_type
6647 = lang_hooks.types.type_for_mode (intermediate_mode,
6648 TYPE_UNSIGNED (prev_type));
6649 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6650 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6651
6652 if (!optab3 || !optab4
6653 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6654 || insn_data[icode1].operand[0].mode != intermediate_mode
6655 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6656 || insn_data[icode2].operand[0].mode != intermediate_mode
6657 || ((icode1 = optab_handler (optab3, intermediate_mode))
6658 == CODE_FOR_nothing)
6659 || ((icode2 = optab_handler (optab4, intermediate_mode))
6660 == CODE_FOR_nothing))
6661 break;
ebfd146a 6662
9771b263 6663 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6664 (*multi_step_cvt)++;
6665
6666 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6667 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6668 return true;
6669
6670 prev_type = intermediate_type;
6671 prev_mode = intermediate_mode;
ebfd146a
IR
6672 }
6673
9771b263 6674 interm_types->release ();
4a00c761 6675 return false;
ebfd146a
IR
6676}
6677
6678
6679/* Function supportable_narrowing_operation
6680
b8698a0f
L
6681 Check whether an operation represented by the code CODE is a
6682 narrowing operation that is supported by the target platform in
b690cc0f
RG
6683 vector form (i.e., when operating on arguments of type VECTYPE_IN
6684 and producing a result of type VECTYPE_OUT).
b8698a0f 6685
ebfd146a 6686 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6687 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6688 the target platform directly via vector tree-codes.
6689
6690 Output:
b8698a0f
L
6691 - CODE1 is the code of a vector operation to be used when
6692 vectorizing the operation, if available.
ebfd146a
IR
6693 - MULTI_STEP_CVT determines the number of required intermediate steps in
6694 case of multi-step conversion (like int->short->char - in that case
6695 MULTI_STEP_CVT will be 1).
6696 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6697 narrowing operation (short in the above example). */
ebfd146a
IR
6698
6699bool
6700supportable_narrowing_operation (enum tree_code code,
b690cc0f 6701 tree vectype_out, tree vectype_in,
ebfd146a 6702 enum tree_code *code1, int *multi_step_cvt,
9771b263 6703 vec<tree> *interm_types)
ebfd146a
IR
6704{
6705 enum machine_mode vec_mode;
6706 enum insn_code icode1;
6707 optab optab1, interm_optab;
b690cc0f
RG
6708 tree vectype = vectype_in;
6709 tree narrow_vectype = vectype_out;
ebfd146a 6710 enum tree_code c1;
4a00c761
JJ
6711 tree intermediate_type;
6712 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6713 int i;
4a00c761 6714 bool uns;
ebfd146a 6715
4a00c761 6716 *multi_step_cvt = 0;
ebfd146a
IR
6717 switch (code)
6718 {
6719 CASE_CONVERT:
6720 c1 = VEC_PACK_TRUNC_EXPR;
6721 break;
6722
6723 case FIX_TRUNC_EXPR:
6724 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6725 break;
6726
6727 case FLOAT_EXPR:
6728 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6729 tree code and optabs used for computing the operation. */
6730 return false;
6731
6732 default:
6733 gcc_unreachable ();
6734 }
6735
6736 if (code == FIX_TRUNC_EXPR)
6737 /* The signedness is determined from output operand. */
b690cc0f 6738 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6739 else
6740 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6741
6742 if (!optab1)
6743 return false;
6744
6745 vec_mode = TYPE_MODE (vectype);
947131ba 6746 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6747 return false;
6748
4a00c761
JJ
6749 *code1 = c1;
6750
6751 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6752 return true;
6753
ebfd146a
IR
6754 /* Check if it's a multi-step conversion that can be done using intermediate
6755 types. */
4a00c761
JJ
6756 prev_mode = vec_mode;
6757 if (code == FIX_TRUNC_EXPR)
6758 uns = TYPE_UNSIGNED (vectype_out);
6759 else
6760 uns = TYPE_UNSIGNED (vectype);
6761
6762 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6763 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6764 costly than signed. */
6765 if (code == FIX_TRUNC_EXPR && uns)
6766 {
6767 enum insn_code icode2;
6768
6769 intermediate_type
6770 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6771 interm_optab
6772 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6773 if (interm_optab != unknown_optab
4a00c761
JJ
6774 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6775 && insn_data[icode1].operand[0].mode
6776 == insn_data[icode2].operand[0].mode)
6777 {
6778 uns = false;
6779 optab1 = interm_optab;
6780 icode1 = icode2;
6781 }
6782 }
ebfd146a 6783
4a00c761
JJ
6784 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6785 intermediate steps in promotion sequence. We try
6786 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6787 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6788 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6789 {
6790 intermediate_mode = insn_data[icode1].operand[0].mode;
6791 intermediate_type
6792 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6793 interm_optab
6794 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6795 optab_default);
6796 if (!interm_optab
6797 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6798 || insn_data[icode1].operand[0].mode != intermediate_mode
6799 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6800 == CODE_FOR_nothing))
6801 break;
6802
9771b263 6803 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6804 (*multi_step_cvt)++;
6805
6806 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6807 return true;
6808
6809 prev_mode = intermediate_mode;
6810 optab1 = interm_optab;
ebfd146a
IR
6811 }
6812
9771b263 6813 interm_types->release ();
4a00c761 6814 return false;
ebfd146a 6815}