]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
toplev.c (process_options): Do not disable -fomit-frame-pointer on a general basis...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
ebfd146a 32#include "tree-flow.h"
ebfd146a 33#include "cfgloop.h"
ebfd146a 34#include "expr.h"
7ee2468b 35#include "recog.h" /* FIXME: for insn_data */
ebfd146a 36#include "optabs.h"
718f9c0f 37#include "diagnostic-core.h"
ebfd146a 38#include "tree-vectorizer.h"
7ee2468b 39#include "dumpfile.h"
ebfd146a 40
7ee2468b
SB
41/* For lang_hooks.types.type_for_mode. */
42#include "langhooks.h"
ebfd146a 43
c3e7ee41
BS
44/* Return the vectorized type for the given statement. */
45
46tree
47stmt_vectype (struct _stmt_vec_info *stmt_info)
48{
49 return STMT_VINFO_VECTYPE (stmt_info);
50}
51
52/* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54bool
55stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56{
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68}
69
70/* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74unsigned
92345349 75record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 77 int misalign, enum vect_cost_model_location where)
c3e7ee41 78{
92345349 79 if (body_cost_vec)
c3e7ee41 80 {
92345349
BS
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
c3e7ee41 85 return (unsigned)
92345349 86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
92345349
BS
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
c3e7ee41
BS
102 }
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
120read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
146write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
150 gimple new_stmt;
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
9771b263 183vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
ebfd146a
IR
186{
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 190 gimple pattern_stmt;
ebfd146a 191
73fbfcad 192 if (dump_enabled_p ())
78c60e3d
SS
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d.", relevant, live_p);
ebfd146a 195
83197f37
IR
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
ebfd146a
IR
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 {
83197f37
IR
202 bool found = false;
203 if (!used_in_pattern)
204 {
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
13c931c9
JJ
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 211
83197f37
IR
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
ebfd146a 216
83197f37
IR
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
ab0ef706
JJ
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 {
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
226
13c931c9
JJ
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
229
ab0ef706
JJ
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 {
233 found = true;
234 break;
235 }
236 }
83197f37
IR
237 }
238
239 if (!found)
240 {
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
245
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
73fbfcad 248 if (dump_enabled_p ())
78c60e3d
SS
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
251 " relevant/live.");
83197f37
IR
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
257 }
ebfd146a
IR
258 }
259
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266 {
73fbfcad 267 if (dump_enabled_p ())
78c60e3d
SS
268 dump_printf_loc (MSG_NOTE, vect_location,
269 "already marked relevant/live.");
ebfd146a
IR
270 return;
271 }
272
9771b263 273 worklist->safe_push (stmt);
ebfd146a
IR
274}
275
276
277/* Function vect_stmt_relevant_p.
278
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
281
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
286
287 CHECKME: what other side effects would the vectorizer allow? */
288
289static bool
290vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
292{
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
298
8644a673 299 *relevant = vect_unused_in_scope;
ebfd146a
IR
300 *live_p = false;
301
302 /* cond stmt other than loop exit cond. */
b8698a0f
L
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
8644a673 306 *relevant = vect_used_in_scope;
ebfd146a
IR
307
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 310 if (gimple_vdef (stmt))
ebfd146a 311 {
73fbfcad 312 if (dump_enabled_p ())
78c60e3d
SS
313 dump_printf_loc (MSG_NOTE, vect_location,
314 "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316 }
317
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320 {
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 {
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
325 {
73fbfcad 326 if (dump_enabled_p ())
78c60e3d
SS
327 dump_printf_loc (MSG_NOTE, vect_location,
328 "vec_stmt_relevant_p: used out of loop.");
ebfd146a 329
3157b0c2
AO
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
332
ebfd146a
IR
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
337
338 *live_p = true;
339 }
340 }
341 }
342
343 return (*live_p || *relevant);
344}
345
346
b8698a0f 347/* Function exist_non_indexing_operands_for_use_p
ebfd146a 348
ff802fa1 349 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
350 used in STMT for anything other than indexing an array. */
351
352static bool
353exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354{
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 357
ff802fa1 358 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
59a05b0c 363
ebfd146a
IR
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
369
370 'var' in the second case corresponds to a def, not a use,
b8698a0f 371 so USE cannot correspond to any operands that are not used
ebfd146a
IR
372 for array indexing.
373
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
ebfd146a
IR
376
377 if (!gimple_assign_copy_p (stmt))
378 return false;
59a05b0c
EB
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
ebfd146a 381 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
384
385 if (operand == use)
386 return true;
387
388 return false;
389}
390
391
b8698a0f 392/*
ebfd146a
IR
393 Function process_use.
394
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 398 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
ebfd146a
IR
402
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 410 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 411 of the respective DEF_STMT is left unchanged.
b8698a0f
L
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
416
417 Return true if everything is as expected. Return false otherwise. */
418
419static bool
b8698a0f 420process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 421 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 422 bool force)
ebfd146a
IR
423{
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
431
b8698a0f 432 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 433 that are used for address computation are not considered relevant. */
aec7ae7d 434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
435 return true;
436
24ee1384 437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 438 {
73fbfcad 439 if (dump_enabled_p ())
78c60e3d
SS
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "not vectorized: unsupported use in stmt.");
ebfd146a
IR
442 return false;
443 }
444
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
447
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
450 {
73fbfcad 451 if (dump_enabled_p ())
78c60e3d 452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
ebfd146a
IR
453 return true;
454 }
455
b8698a0f
L
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
468 {
73fbfcad 469 if (dump_enabled_p ())
78c60e3d
SS
470 dump_printf_loc (MSG_NOTE, vect_location,
471 "reduc-stmt defining reduc-phi in the same nest.");
ebfd146a
IR
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
477 return true;
478 }
479
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488 {
73fbfcad 489 if (dump_enabled_p ())
78c60e3d
SS
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 492
ebfd146a
IR
493 switch (relevant)
494 {
8644a673 495 case vect_unused_in_scope:
7c5222ff
IR
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 498 break;
7c5222ff 499
ebfd146a 500 case vect_used_in_outer_by_reduction:
7c5222ff 501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
502 relevant = vect_used_by_reduction;
503 break;
7c5222ff 504
ebfd146a 505 case vect_used_in_outer:
7c5222ff 506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 507 relevant = vect_used_in_scope;
ebfd146a 508 break;
7c5222ff 509
8644a673 510 case vect_used_in_scope:
ebfd146a
IR
511 break;
512
513 default:
514 gcc_unreachable ();
b8698a0f 515 }
ebfd146a
IR
516 }
517
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
520 ...
521 inner-loop:
522 d = def_stmt
06066f92 523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d
SS
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
b8698a0f 534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
537 break;
538
ebfd146a
IR
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
542
8644a673 543 case vect_used_in_scope:
ebfd146a
IR
544 relevant = vect_used_in_outer;
545 break;
546
547 default:
548 gcc_unreachable ();
549 }
550 }
551
83197f37
IR
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
554 return true;
555}
556
557
558/* Function vect_mark_stmts_to_be_vectorized.
559
560 Not all stmts in the loop need to be vectorized. For example:
561
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
566
567 3. j = j + 1
568
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
571
572 This pass detects such stmts. */
573
574bool
575vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576{
9771b263 577 vec<gimple> worklist;
ebfd146a
IR
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
06066f92
IR
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
ebfd146a 590
73fbfcad 591 if (dump_enabled_p ())
78c60e3d
SS
592 dump_printf_loc (MSG_NOTE, vect_location,
593 "=== vect_mark_stmts_to_be_vectorized ===");
ebfd146a 594
9771b263 595 worklist.create (64);
ebfd146a
IR
596
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
599 {
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 602 {
ebfd146a 603 phi = gsi_stmt (si);
73fbfcad 604 if (dump_enabled_p ())
ebfd146a 605 {
78c60e3d
SS
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
608 }
609
610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 611 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
612 }
613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614 {
615 stmt = gsi_stmt (si);
73fbfcad 616 if (dump_enabled_p ())
ebfd146a 617 {
78c60e3d
SS
618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 620 }
ebfd146a
IR
621
622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
624 }
625 }
626
627 /* 2. Process_worklist */
9771b263 628 while (worklist.length () > 0)
ebfd146a
IR
629 {
630 use_operand_p use_p;
631 ssa_op_iter iter;
632
9771b263 633 stmt = worklist.pop ();
73fbfcad 634 if (dump_enabled_p ())
ebfd146a 635 {
78c60e3d
SS
636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
638 }
639
b8698a0f
L
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
642 liveness and relevance properties of STMT. */
643 stmt_vinfo = vinfo_for_stmt (stmt);
644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
654 live_p = false
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 657 those that are used by a reduction computation, and those that are
ff802fa1 658 (also) used by a regular computation. This allows us later on to
b8698a0f 659 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 660 order of the results that they produce does not have to be kept. */
ebfd146a 661
06066f92
IR
662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663 tmp_relevant = relevant;
664 switch (def_type)
ebfd146a 665 {
06066f92
IR
666 case vect_reduction_def:
667 switch (tmp_relevant)
668 {
669 case vect_unused_in_scope:
670 relevant = vect_used_by_reduction;
671 break;
672
673 case vect_used_by_reduction:
674 if (gimple_code (stmt) == GIMPLE_PHI)
675 break;
676 /* fall through */
677
678 default:
73fbfcad 679 if (dump_enabled_p ())
78c60e3d
SS
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681 "unsupported use of reduction.");
9771b263 682 worklist.release ();
06066f92
IR
683 return false;
684 }
685
b8698a0f 686 live_p = false;
06066f92 687 break;
b8698a0f 688
06066f92
IR
689 case vect_nested_cycle:
690 if (tmp_relevant != vect_unused_in_scope
691 && tmp_relevant != vect_used_in_outer_by_reduction
692 && tmp_relevant != vect_used_in_outer)
693 {
73fbfcad 694 if (dump_enabled_p ())
78c60e3d
SS
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of nested cycle.");
7c5222ff 697
9771b263 698 worklist.release ();
06066f92
IR
699 return false;
700 }
7c5222ff 701
b8698a0f
L
702 live_p = false;
703 break;
704
06066f92
IR
705 case vect_double_reduction_def:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_by_reduction)
708 {
73fbfcad 709 if (dump_enabled_p ())
78c60e3d
SS
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of double reduction.");
7c5222ff 712
9771b263 713 worklist.release ();
7c5222ff 714 return false;
06066f92
IR
715 }
716
717 live_p = false;
b8698a0f 718 break;
7c5222ff 719
06066f92
IR
720 default:
721 break;
7c5222ff 722 }
b8698a0f 723
aec7ae7d 724 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
69d2aade
JJ
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 738 live_p, relevant, &worklist, false)
69d2aade 739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 740 live_p, relevant, &worklist, false))
69d2aade 741 {
9771b263 742 worklist.release ();
69d2aade
JJ
743 return false;
744 }
745 i = 2;
746 }
747 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 748 {
69d2aade 749 op = gimple_op (stmt, i);
9d5e7640 750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 751 &worklist, false))
9d5e7640 752 {
9771b263 753 worklist.release ();
9d5e7640
IR
754 return false;
755 }
756 }
757 }
758 else if (is_gimple_call (stmt))
759 {
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 {
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 764 &worklist, false))
9d5e7640 765 {
9771b263 766 worklist.release ();
9d5e7640
IR
767 return false;
768 }
769 }
770 }
771 }
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 {
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 777 &worklist, false))
9d5e7640 778 {
9771b263 779 worklist.release ();
9d5e7640
IR
780 return false;
781 }
782 }
aec7ae7d
JJ
783
784 if (STMT_VINFO_GATHER_P (stmt_vinfo))
785 {
786 tree off;
787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 gcc_assert (decl);
789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 &worklist, true))
791 {
9771b263 792 worklist.release ();
aec7ae7d
JJ
793 return false;
794 }
795 }
ebfd146a
IR
796 } /* while worklist */
797
9771b263 798 worklist.release ();
ebfd146a
IR
799 return true;
800}
801
802
b8698a0f 803/* Function vect_model_simple_cost.
ebfd146a 804
b8698a0f 805 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
808
809void
b8698a0f 810vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
811 enum vect_def_type *dt,
812 stmt_vector_for_cost *prologue_cost_vec,
813 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
814{
815 int i;
92345349 816 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
817
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info))
820 return;
821
ebfd146a
IR
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i = 0; i < 2; i++)
92345349
BS
824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 stmt_info, 0, vect_prologue);
c3e7ee41
BS
827
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 stmt_info, 0, vect_body);
c3e7ee41 831
73fbfcad 832 if (dump_enabled_p ())
78c60e3d
SS
833 dump_printf_loc (MSG_NOTE, vect_location,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
836}
837
838
8bd37302
BS
839/* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
843
844static void
845vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 enum vect_def_type *dt, int pwr)
847{
848 int i, tmp;
92345349 849 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852 void *target_cost_data;
8bd37302
BS
853
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
857
c3e7ee41
BS
858 if (loop_vinfo)
859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860 else
861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862
8bd37302
BS
863 for (i = 0; i < pwr + 1; i++)
864 {
865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 (i + 1) : i;
c3e7ee41 867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
868 vec_promote_demote, stmt_info, 0,
869 vect_body);
8bd37302
BS
870 }
871
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i = 0; i < 2; i++)
92345349
BS
874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 stmt_info, 0, vect_prologue);
8bd37302 877
73fbfcad 878 if (dump_enabled_p ())
78c60e3d
SS
879 dump_printf_loc (MSG_NOTE, vect_location,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost, prologue_cost);
8bd37302
BS
882}
883
0d0293ac 884/* Function vect_cost_group_size
b8698a0f 885
0d0293ac 886 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
889
890static int
0d0293ac 891vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 892{
e14c1050 893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
894
895 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 896 return GROUP_SIZE (stmt_info);
ebfd146a
IR
897
898 return 1;
899}
900
901
902/* Function vect_model_store_cost
903
0d0293ac
MM
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
906
907void
b8698a0f 908vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 909 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
910 slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
913{
914 int group_size;
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
916 struct data_reference *first_dr;
917 gimple first_stmt;
ebfd146a
IR
918
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info))
921 return;
922
8644a673 923 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
ebfd146a 926
0d0293ac
MM
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
929 {
930 if (slp_node)
931 {
9771b263 932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
933 group_size = 1;
934 }
935 else
936 {
e14c1050 937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 938 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
939 }
940
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942 }
0d0293ac 943 /* Not a grouped access. */
ebfd146a 944 else
720f5239
IR
945 {
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
948 }
ebfd146a 949
272c6793 950 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
955 {
956 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
957
958 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 stmt_info, 0, vect_body);
ebfd146a 961
73fbfcad 962 if (dump_enabled_p ())
78c60e3d
SS
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_store_cost: strided group_size = %d .",
965 group_size);
ebfd146a
IR
966 }
967
968 /* Costs of the stores. */
92345349 969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 970
73fbfcad 971 if (dump_enabled_p ())
78c60e3d
SS
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
975}
976
977
720f5239
IR
978/* Calculate cost of DR's memory access. */
979void
980vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 981 unsigned int *inside_cost,
92345349 982 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
983{
984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
985 gimple stmt = DR_STMT (dr);
986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
987
988 switch (alignment_support_scheme)
989 {
990 case dr_aligned:
991 {
92345349
BS
992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 vector_store, stmt_info, 0,
994 vect_body);
720f5239 995
73fbfcad 996 if (dump_enabled_p ())
78c60e3d
SS
997 dump_printf_loc (MSG_NOTE, vect_location,
998 "vect_model_store_cost: aligned.");
720f5239
IR
999 break;
1000 }
1001
1002 case dr_unaligned_supported:
1003 {
720f5239 1004 /* Here, we assign an additional cost for the unaligned store. */
92345349 1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1006 unaligned_store, stmt_info,
92345349 1007 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1008 if (dump_enabled_p ())
78c60e3d
SS
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: unaligned supported by "
1011 "hardware.");
720f5239
IR
1012 break;
1013 }
1014
38eec4c6
UW
1015 case dr_unaligned_unsupported:
1016 {
1017 *inside_cost = VECT_MAX_COST;
1018
73fbfcad 1019 if (dump_enabled_p ())
78c60e3d
SS
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021 "vect_model_store_cost: unsupported access.");
38eec4c6
UW
1022 break;
1023 }
1024
720f5239
IR
1025 default:
1026 gcc_unreachable ();
1027 }
1028}
1029
1030
ebfd146a
IR
1031/* Function vect_model_load_cost
1032
0d0293ac
MM
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1035 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1036 access scheme chosen. */
1037
1038void
92345349
BS
1039vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 bool load_lanes_p, slp_tree slp_node,
1041 stmt_vector_for_cost *prologue_cost_vec,
1042 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1043{
1044 int group_size;
ebfd146a
IR
1045 gimple first_stmt;
1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1047 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1048
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info))
1051 return;
1052
0d0293ac 1053 /* Grouped accesses? */
e14c1050 1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1056 {
0d0293ac 1057 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 }
0d0293ac 1060 /* Not a grouped access. */
ebfd146a
IR
1061 else
1062 {
1063 group_size = 1;
1064 first_dr = dr;
1065 }
1066
272c6793 1067 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1072 {
1073 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1074 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
ebfd146a 1077
73fbfcad 1078 if (dump_enabled_p ())
78c60e3d
SS
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .",
1081 group_size);
ebfd146a
IR
1082 }
1083
1084 /* The loads themselves. */
a82960aa
RG
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086 {
a21892ad
BS
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1089 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1091 scalar_load, stmt_info, 0, vect_body);
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
a82960aa
RG
1094 }
1095 else
1096 vect_get_load_cost (first_dr, ncopies,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 || group_size > 1 || slp_node),
92345349
BS
1099 &inside_cost, &prologue_cost,
1100 prologue_cost_vec, body_cost_vec, true);
720f5239 1101
73fbfcad 1102 if (dump_enabled_p ())
78c60e3d
SS
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost, prologue_cost);
720f5239
IR
1106}
1107
1108
1109/* Calculate cost of DR's memory access. */
1110void
1111vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1112 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1113 unsigned int *prologue_cost,
1114 stmt_vector_for_cost *prologue_cost_vec,
1115 stmt_vector_for_cost *body_cost_vec,
1116 bool record_prologue_costs)
720f5239
IR
1117{
1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1119 gimple stmt = DR_STMT (dr);
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1121
1122 switch (alignment_support_scheme)
ebfd146a
IR
1123 {
1124 case dr_aligned:
1125 {
92345349
BS
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 stmt_info, 0, vect_body);
ebfd146a 1128
73fbfcad 1129 if (dump_enabled_p ())
78c60e3d
SS
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_load_cost: aligned.");
ebfd146a
IR
1132
1133 break;
1134 }
1135 case dr_unaligned_supported:
1136 {
720f5239 1137 /* Here, we assign an additional cost for the unaligned load. */
92345349 1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1139 unaligned_load, stmt_info,
92345349 1140 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1141
73fbfcad 1142 if (dump_enabled_p ())
78c60e3d
SS
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: unaligned supported by "
1145 "hardware.");
ebfd146a
IR
1146
1147 break;
1148 }
1149 case dr_explicit_realign:
1150 {
92345349
BS
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 vector_load, stmt_info, 0, vect_body);
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1155
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
92345349 1158 prologue costs. */
ebfd146a 1159 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 stmt_info, 0, vect_body);
ebfd146a 1162
73fbfcad 1163 if (dump_enabled_p ())
78c60e3d
SS
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: explicit realign");
8bd37302 1166
ebfd146a
IR
1167 break;
1168 }
1169 case dr_explicit_realign_optimized:
1170 {
73fbfcad 1171 if (dump_enabled_p ())
78c60e3d
SS
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned software "
1174 "pipelined.");
ebfd146a
IR
1175
1176 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1177 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1178 if this is an access in a group of loads, which provide grouped
ebfd146a 1179 access, then the above cost should only be considered for one
ff802fa1 1180 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1181 and a realignment op. */
1182
92345349 1183 if (add_realign_cost && record_prologue_costs)
ebfd146a 1184 {
92345349
BS
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
ebfd146a 1188 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 vector_stmt, stmt_info,
1191 0, vect_prologue);
ebfd146a
IR
1192 }
1193
92345349
BS
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 stmt_info, 0, vect_body);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 stmt_info, 0, vect_body);
8bd37302 1198
73fbfcad 1199 if (dump_enabled_p ())
78c60e3d
SS
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign optimized");
8bd37302 1202
ebfd146a
IR
1203 break;
1204 }
1205
38eec4c6
UW
1206 case dr_unaligned_unsupported:
1207 {
1208 *inside_cost = VECT_MAX_COST;
1209
73fbfcad 1210 if (dump_enabled_p ())
78c60e3d
SS
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212 "vect_model_load_cost: unsupported access.");
38eec4c6
UW
1213 break;
1214 }
1215
ebfd146a
IR
1216 default:
1217 gcc_unreachable ();
1218 }
ebfd146a
IR
1219}
1220
418b7df3
RG
1221/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1223
418b7df3
RG
1224static void
1225vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1226{
ebfd146a 1227 if (gsi)
418b7df3 1228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1229 else
1230 {
418b7df3 1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1233
a70d6342
IR
1234 if (loop_vinfo)
1235 {
1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1237 basic_block new_bb;
1238 edge pe;
a70d6342
IR
1239
1240 if (nested_in_vect_loop_p (loop, stmt))
1241 loop = loop->inner;
b8698a0f 1242
a70d6342 1243 pe = loop_preheader_edge (loop);
418b7df3 1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1245 gcc_assert (!new_bb);
1246 }
1247 else
1248 {
1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250 basic_block bb;
1251 gimple_stmt_iterator gsi_bb_start;
1252
1253 gcc_assert (bb_vinfo);
1254 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1255 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1257 }
ebfd146a
IR
1258 }
1259
73fbfcad 1260 if (dump_enabled_p ())
ebfd146a 1261 {
78c60e3d
SS
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1265 }
418b7df3
RG
1266}
1267
1268/* Function vect_init_vector.
ebfd146a 1269
5467ee52
RG
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
418b7df3
RG
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1277
1278tree
5467ee52 1279vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1280{
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1285
5467ee52
RG
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1288 {
5467ee52 1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1290 {
5467ee52
RG
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1293 else
1294 {
83d5977e 1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1297 new_temp, val,
418b7df3 1298 NULL_TREE);
418b7df3 1299 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1300 val = new_temp;
418b7df3
RG
1301 }
1302 }
5467ee52 1303 val = build_vector_from_val (type, val);
418b7df3
RG
1304 }
1305
5467ee52 1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1307 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1313}
1314
a70d6342 1315
ebfd146a
IR
1316/* Function vect_get_vec_def_for_operand.
1317
ff802fa1 1318 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1319 used in the vectorized stmt for STMT.
1320
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1326
1327tree
1328vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329{
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1335 unsigned int nunits;
ebfd146a 1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1337 tree def;
ebfd146a
IR
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1341
73fbfcad 1342 if (dump_enabled_p ())
ebfd146a 1343 {
78c60e3d
SS
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
ebfd146a
IR
1347 }
1348
24ee1384
IR
1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 &def_stmt, &def, &dt);
ebfd146a 1351 gcc_assert (is_simple_use);
73fbfcad 1352 if (dump_enabled_p ())
ebfd146a 1353 {
78c60e3d 1354 int loc_printed = 0;
ebfd146a
IR
1355 if (def)
1356 {
78c60e3d
SS
1357 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1358 loc_printed = 1;
1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
ebfd146a
IR
1360 }
1361 if (def_stmt)
1362 {
78c60e3d
SS
1363 if (loc_printed)
1364 dump_printf (MSG_NOTE, " def_stmt = ");
1365 else
1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1368 }
1369 }
1370
1371 switch (dt)
1372 {
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def:
1375 {
7569a6cc
RG
1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 gcc_assert (vector_type);
9dc3f7de 1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1379
b8698a0f 1380 if (scalar_def)
ebfd146a
IR
1381 *scalar_def = op;
1382
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1384 if (dump_enabled_p ())
78c60e3d
SS
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "Create vector_cst. nunits = %d", nunits);
ebfd146a 1387
418b7df3 1388 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1389 }
1390
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1392 case vect_external_def:
ebfd146a
IR
1393 {
1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 gcc_assert (vector_type);
ebfd146a 1396
b8698a0f 1397 if (scalar_def)
ebfd146a
IR
1398 *scalar_def = def;
1399
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1401 if (dump_enabled_p ())
78c60e3d 1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
ebfd146a 1403
418b7df3 1404 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1405 }
1406
1407 /* Case 3: operand is defined inside the loop. */
8644a673 1408 case vect_internal_def:
ebfd146a 1409 {
b8698a0f 1410 if (scalar_def)
ebfd146a
IR
1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1415
ebfd146a 1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1417 /* Get vectorized pattern statement. */
1418 if (!vec_stmt
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info))
1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1423 gcc_assert (vec_stmt);
1424 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 vec_oprnd = PHI_RESULT (vec_stmt);
1426 else if (is_gimple_call (vec_stmt))
1427 vec_oprnd = gimple_call_lhs (vec_stmt);
1428 else
1429 vec_oprnd = gimple_assign_lhs (vec_stmt);
1430 return vec_oprnd;
1431 }
1432
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def:
06066f92 1435 case vect_double_reduction_def:
7c5222ff 1436 case vect_nested_cycle:
ebfd146a
IR
1437 {
1438 struct loop *loop;
1439
1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1441 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1442
1443 /* Get the def before the loop */
1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445 return get_initial_def_for_reduction (stmt, op, scalar_def);
1446 }
1447
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def:
1450 {
1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info = vinfo_for_stmt (def_stmt);
1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1456 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 vec_oprnd = PHI_RESULT (vec_stmt);
1458 else
1459 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1460 return vec_oprnd;
1461 }
1462
1463 default:
1464 gcc_unreachable ();
1465 }
1466}
1467
1468
1469/* Function vect_get_vec_def_for_stmt_copy
1470
ff802fa1 1471 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1474 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1476 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1477 DT is the type of the vector def VEC_OPRND.
1478
1479 Context:
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1482 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1483 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1487 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1490
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1492
ebfd146a
IR
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1496 VS1.3: vx.3 = memref3
ebfd146a
IR
1497
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1502
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
b8698a0f
L
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1507 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1508 returns the vector-def 'vx.0'.
1509
b8698a0f
L
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514
b8698a0f
L
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1524
1525tree
1526vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527{
1528 gimple vec_stmt_for_operand;
1529 stmt_vec_info def_stmt_info;
1530
1531 /* Do nothing; can reuse same def. */
8644a673 1532 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1533 return vec_oprnd;
1534
1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537 gcc_assert (def_stmt_info);
1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539 gcc_assert (vec_stmt_for_operand);
1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546}
1547
1548
1549/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1551
1552static void
b8698a0f 1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
ebfd146a 1556{
9771b263 1557 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1560 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1561
9771b263 1562 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1563 {
9771b263 1564 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1566 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1567 }
1568}
1569
1570
d092494c
IR
1571/* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
ebfd146a 1574
d092494c 1575void
ebfd146a 1576vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
d092494c 1579 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1580{
1581 if (slp_node)
d092494c
IR
1582 {
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1584 vec<tree> ops;
1585 ops.create (nops);
37b5ec8f 1586 vec<vec<tree> > vec_defs;
9771b263 1587 vec_defs.create (nops);
d092494c 1588
9771b263 1589 ops.quick_push (op0);
d092494c 1590 if (op1)
9771b263 1591 ops.quick_push (op1);
d092494c
IR
1592
1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594
37b5ec8f 1595 *vec_oprnds0 = vec_defs[0];
d092494c 1596 if (op1)
37b5ec8f 1597 *vec_oprnds1 = vec_defs[1];
d092494c 1598
9771b263
DN
1599 ops.release ();
1600 vec_defs.release ();
d092494c 1601 }
ebfd146a
IR
1602 else
1603 {
1604 tree vec_oprnd;
1605
9771b263 1606 vec_oprnds0->create (1);
b8698a0f 1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1608 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1609
1610 if (op1)
1611 {
9771b263 1612 vec_oprnds1->create (1);
b8698a0f 1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1614 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1615 }
1616 }
1617}
1618
1619
1620/* Function vect_finish_stmt_generation.
1621
1622 Insert a new stmt. */
1623
1624void
1625vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 gimple_stmt_iterator *gsi)
1627{
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1631
1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633
54e8e2c3
RG
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1636 {
1637 gimple at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 {
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 && ((is_gimple_assign (vec_stmt)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 || (is_gimple_call (vec_stmt)
1651 && !(gimple_call_flags (vec_stmt)
1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 {
1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 gimple_set_vdef (vec_stmt, new_vdef);
1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 }
1658 }
1659 }
ebfd146a
IR
1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661
b8698a0f 1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1663 bb_vinfo));
ebfd146a 1664
73fbfcad 1665 if (dump_enabled_p ())
ebfd146a 1666 {
78c60e3d
SS
1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1669 }
1670
ad885386 1671 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1672}
1673
1674/* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1677
1678tree
1679vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680{
1681 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1682
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1687
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1692
62f7fd21 1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1694 vectype_in);
1695}
1696
1697/* Function vectorizable_call.
1698
b8698a0f
L
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704static bool
190c2236
JJ
1705vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
ebfd146a
IR
1707{
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1718 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1719 gimple def_stmt;
0502fb85
UB
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1722 gimple new_stmt = NULL;
ebfd146a 1723 int ncopies, j;
6e1aa848 1724 vec<tree> vargs = vNULL;
ebfd146a
IR
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
9d5e7640 1727 tree lhs;
ebfd146a 1728
190c2236 1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1730 return false;
1731
8644a673 1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1733 return false;
1734
ebfd146a
IR
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1738
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1741
822ba6d7 1742 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1743 return false;
1744
b690cc0f
RG
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
ebfd146a
IR
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
b690cc0f 1749 vectype_in = NULL_TREE;
ebfd146a
IR
1750 nargs = gimple_call_num_args (stmt);
1751
1b1562a5
MM
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1756 return false;
1757
1758 for (i = 0; i < nargs; i++)
1759 {
b690cc0f
RG
1760 tree opvectype;
1761
ebfd146a
IR
1762 op = gimple_call_arg (stmt, i);
1763
1764 /* We can only handle calls with arguments of the same type. */
1765 if (rhs_type
8533c9d8 1766 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1767 {
73fbfcad 1768 if (dump_enabled_p ())
78c60e3d
SS
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770 "argument types differ.");
ebfd146a
IR
1771 return false;
1772 }
b690cc0f
RG
1773 if (!rhs_type)
1774 rhs_type = TREE_TYPE (op);
ebfd146a 1775
24ee1384 1776 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1777 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1778 {
73fbfcad 1779 if (dump_enabled_p ())
78c60e3d
SS
1780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781 "use not simple.");
ebfd146a
IR
1782 return false;
1783 }
ebfd146a 1784
b690cc0f
RG
1785 if (!vectype_in)
1786 vectype_in = opvectype;
1787 else if (opvectype
1788 && opvectype != vectype_in)
1789 {
73fbfcad 1790 if (dump_enabled_p ())
78c60e3d
SS
1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1792 "argument vector types differ.");
b690cc0f
RG
1793 return false;
1794 }
1795 }
1796 /* If all arguments are external or constant defs use a vector type with
1797 the same size as the output vector type. */
ebfd146a 1798 if (!vectype_in)
b690cc0f 1799 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1800 if (vec_stmt)
1801 gcc_assert (vectype_in);
1802 if (!vectype_in)
1803 {
73fbfcad 1804 if (dump_enabled_p ())
7d8930a0 1805 {
78c60e3d
SS
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "no vectype for scalar type ");
1808 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
7d8930a0
IR
1809 }
1810
1811 return false;
1812 }
ebfd146a
IR
1813
1814 /* FORNOW */
b690cc0f
RG
1815 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1816 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1817 if (nunits_in == nunits_out / 2)
1818 modifier = NARROW;
1819 else if (nunits_out == nunits_in)
1820 modifier = NONE;
1821 else if (nunits_out == nunits_in / 2)
1822 modifier = WIDEN;
1823 else
1824 return false;
1825
1826 /* For now, we only vectorize functions if a target specific builtin
1827 is available. TODO -- in some cases, it might be profitable to
1828 insert the calls for pieces of the vector, in order to be able
1829 to vectorize other operations in the loop. */
1830 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1831 if (fndecl == NULL_TREE)
1832 {
73fbfcad 1833 if (dump_enabled_p ())
78c60e3d
SS
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "function is not vectorizable.");
ebfd146a
IR
1836
1837 return false;
1838 }
1839
5006671f 1840 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1841
190c2236
JJ
1842 if (slp_node || PURE_SLP_STMT (stmt_info))
1843 ncopies = 1;
1844 else if (modifier == NARROW)
ebfd146a
IR
1845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1846 else
1847 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1848
1849 /* Sanity check: make sure that at least one copy of the vectorized stmt
1850 needs to be generated. */
1851 gcc_assert (ncopies >= 1);
1852
1853 if (!vec_stmt) /* transformation not required. */
1854 {
1855 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1856 if (dump_enabled_p ())
78c60e3d 1857 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
c3e7ee41 1858 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1859 return true;
1860 }
1861
1862 /** Transform. **/
1863
73fbfcad 1864 if (dump_enabled_p ())
78c60e3d 1865 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
ebfd146a
IR
1866
1867 /* Handle def. */
1868 scalar_dest = gimple_call_lhs (stmt);
1869 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1870
1871 prev_stmt_info = NULL;
1872 switch (modifier)
1873 {
1874 case NONE:
1875 for (j = 0; j < ncopies; ++j)
1876 {
1877 /* Build argument list for the vectorized call. */
1878 if (j == 0)
9771b263 1879 vargs.create (nargs);
ebfd146a 1880 else
9771b263 1881 vargs.truncate (0);
ebfd146a 1882
190c2236
JJ
1883 if (slp_node)
1884 {
37b5ec8f 1885 vec<vec<tree> > vec_defs;
9771b263
DN
1886 vec_defs.create (nargs);
1887 vec<tree> vec_oprnds0;
190c2236
JJ
1888
1889 for (i = 0; i < nargs; i++)
9771b263 1890 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1891 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1892 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1893
1894 /* Arguments are ready. Create the new vector stmt. */
9771b263 1895 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1896 {
1897 size_t k;
1898 for (k = 0; k < nargs; k++)
1899 {
37b5ec8f 1900 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1901 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1902 }
1903 new_stmt = gimple_build_call_vec (fndecl, vargs);
1904 new_temp = make_ssa_name (vec_dest, new_stmt);
1905 gimple_call_set_lhs (new_stmt, new_temp);
1906 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1907 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1908 }
1909
1910 for (i = 0; i < nargs; i++)
1911 {
37b5ec8f 1912 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1913 vec_oprndsi.release ();
190c2236 1914 }
9771b263 1915 vec_defs.release ();
190c2236
JJ
1916 continue;
1917 }
1918
ebfd146a
IR
1919 for (i = 0; i < nargs; i++)
1920 {
1921 op = gimple_call_arg (stmt, i);
1922 if (j == 0)
1923 vec_oprnd0
1924 = vect_get_vec_def_for_operand (op, stmt, NULL);
1925 else
63827fb8
IR
1926 {
1927 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1928 vec_oprnd0
1929 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1930 }
ebfd146a 1931
9771b263 1932 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1933 }
1934
1935 new_stmt = gimple_build_call_vec (fndecl, vargs);
1936 new_temp = make_ssa_name (vec_dest, new_stmt);
1937 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
1938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1939
1940 if (j == 0)
1941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1942 else
1943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1944
1945 prev_stmt_info = vinfo_for_stmt (new_stmt);
1946 }
1947
1948 break;
1949
1950 case NARROW:
1951 for (j = 0; j < ncopies; ++j)
1952 {
1953 /* Build argument list for the vectorized call. */
1954 if (j == 0)
9771b263 1955 vargs.create (nargs * 2);
ebfd146a 1956 else
9771b263 1957 vargs.truncate (0);
ebfd146a 1958
190c2236
JJ
1959 if (slp_node)
1960 {
37b5ec8f 1961 vec<vec<tree> > vec_defs;
9771b263
DN
1962 vec_defs.create (nargs);
1963 vec<tree> vec_oprnds0;
190c2236
JJ
1964
1965 for (i = 0; i < nargs; i++)
9771b263 1966 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1967 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1968 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1969
1970 /* Arguments are ready. Create the new vector stmt. */
9771b263 1971 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
1972 {
1973 size_t k;
9771b263 1974 vargs.truncate (0);
190c2236
JJ
1975 for (k = 0; k < nargs; k++)
1976 {
37b5ec8f 1977 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
1978 vargs.quick_push (vec_oprndsk[i]);
1979 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
1980 }
1981 new_stmt = gimple_build_call_vec (fndecl, vargs);
1982 new_temp = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, new_temp);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1985 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1986 }
1987
1988 for (i = 0; i < nargs; i++)
1989 {
37b5ec8f 1990 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1991 vec_oprndsi.release ();
190c2236 1992 }
9771b263 1993 vec_defs.release ();
190c2236
JJ
1994 continue;
1995 }
1996
ebfd146a
IR
1997 for (i = 0; i < nargs; i++)
1998 {
1999 op = gimple_call_arg (stmt, i);
2000 if (j == 0)
2001 {
2002 vec_oprnd0
2003 = vect_get_vec_def_for_operand (op, stmt, NULL);
2004 vec_oprnd1
63827fb8 2005 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2006 }
2007 else
2008 {
336ecb65 2009 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2010 vec_oprnd0
63827fb8 2011 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2012 vec_oprnd1
63827fb8 2013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2014 }
2015
9771b263
DN
2016 vargs.quick_push (vec_oprnd0);
2017 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2018 }
2019
2020 new_stmt = gimple_build_call_vec (fndecl, vargs);
2021 new_temp = make_ssa_name (vec_dest, new_stmt);
2022 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024
2025 if (j == 0)
2026 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2027 else
2028 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2029
2030 prev_stmt_info = vinfo_for_stmt (new_stmt);
2031 }
2032
2033 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2034
2035 break;
2036
2037 case WIDEN:
2038 /* No current target implements this case. */
2039 return false;
2040 }
2041
9771b263 2042 vargs.release ();
ebfd146a
IR
2043
2044 /* Update the exception handling table with the vector stmt if necessary. */
2045 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2046 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2047
2048 /* The call in STMT might prevent it from being removed in dce.
2049 We however cannot remove it here, due to the way the ssa name
2050 it defines is mapped to the new definition. So just replace
2051 rhs of the statement with something harmless. */
2052
dd34c087
JJ
2053 if (slp_node)
2054 return true;
2055
ebfd146a 2056 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2057 if (is_pattern_stmt_p (stmt_info))
2058 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2059 else
2060 lhs = gimple_call_lhs (stmt);
2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2062 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2063 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2064 STMT_VINFO_STMT (stmt_info) = new_stmt;
2065 gsi_replace (gsi, new_stmt, false);
2066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2067
2068 return true;
2069}
2070
2071
2072/* Function vect_gen_widened_results_half
2073
2074 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2075 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2076 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2077 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078 needs to be created (DECL is a function-decl of a target-builtin).
2079 STMT is the original scalar stmt that we are vectorizing. */
2080
2081static gimple
2082vect_gen_widened_results_half (enum tree_code code,
2083 tree decl,
2084 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2085 tree vec_dest, gimple_stmt_iterator *gsi,
2086 gimple stmt)
b8698a0f 2087{
ebfd146a 2088 gimple new_stmt;
b8698a0f
L
2089 tree new_temp;
2090
2091 /* Generate half of the widened result: */
2092 if (code == CALL_EXPR)
2093 {
2094 /* Target specific support */
ebfd146a
IR
2095 if (op_type == binary_op)
2096 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2097 else
2098 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2099 new_temp = make_ssa_name (vec_dest, new_stmt);
2100 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2101 }
2102 else
ebfd146a 2103 {
b8698a0f
L
2104 /* Generic support */
2105 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2106 if (op_type != binary_op)
2107 vec_oprnd1 = NULL;
2108 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2109 vec_oprnd1);
2110 new_temp = make_ssa_name (vec_dest, new_stmt);
2111 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2112 }
ebfd146a
IR
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114
ebfd146a
IR
2115 return new_stmt;
2116}
2117
4a00c761
JJ
2118
2119/* Get vectorized definitions for loop-based vectorization. For the first
2120 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121 scalar operand), and for the rest we get a copy with
2122 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124 The vectors are collected into VEC_OPRNDS. */
2125
2126static void
2127vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2128 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2129{
2130 tree vec_oprnd;
2131
2132 /* Get first vector operand. */
2133 /* All the vector operands except the very first one (that is scalar oprnd)
2134 are stmt copies. */
2135 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2136 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2137 else
2138 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2139
9771b263 2140 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2141
2142 /* Get second vector operand. */
2143 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2144 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2145
2146 *oprnd = vec_oprnd;
2147
2148 /* For conversion in multiple steps, continue to get operands
2149 recursively. */
2150 if (multi_step_cvt)
2151 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2152}
2153
2154
2155/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156 For multi-step conversions store the resulting vectors and call the function
2157 recursively. */
2158
2159static void
9771b263 2160vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2161 int multi_step_cvt, gimple stmt,
9771b263 2162 vec<tree> vec_dsts,
4a00c761
JJ
2163 gimple_stmt_iterator *gsi,
2164 slp_tree slp_node, enum tree_code code,
2165 stmt_vec_info *prev_stmt_info)
2166{
2167 unsigned int i;
2168 tree vop0, vop1, new_tmp, vec_dest;
2169 gimple new_stmt;
2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171
9771b263 2172 vec_dest = vec_dsts.pop ();
4a00c761 2173
9771b263 2174 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2175 {
2176 /* Create demotion operation. */
9771b263
DN
2177 vop0 = (*vec_oprnds)[i];
2178 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2179 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2180 new_tmp = make_ssa_name (vec_dest, new_stmt);
2181 gimple_assign_set_lhs (new_stmt, new_tmp);
2182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2183
2184 if (multi_step_cvt)
2185 /* Store the resulting vector for next recursive call. */
9771b263 2186 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2187 else
2188 {
2189 /* This is the last step of the conversion sequence. Store the
2190 vectors in SLP_NODE or in vector info of the scalar statement
2191 (or in STMT_VINFO_RELATED_STMT chain). */
2192 if (slp_node)
9771b263 2193 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2194 else
2195 {
2196 if (!*prev_stmt_info)
2197 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2198 else
2199 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2200
2201 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2202 }
2203 }
2204 }
2205
2206 /* For multi-step demotion operations we first generate demotion operations
2207 from the source type to the intermediate types, and then combine the
2208 results (stored in VEC_OPRNDS) in demotion operation to the destination
2209 type. */
2210 if (multi_step_cvt)
2211 {
2212 /* At each level of recursion we have half of the operands we had at the
2213 previous level. */
9771b263 2214 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2215 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2216 stmt, vec_dsts, gsi, slp_node,
2217 VEC_PACK_TRUNC_EXPR,
2218 prev_stmt_info);
2219 }
2220
9771b263 2221 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2222}
2223
2224
2225/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2227 the resulting vectors and call the function recursively. */
2228
2229static void
9771b263
DN
2230vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2231 vec<tree> *vec_oprnds1,
4a00c761
JJ
2232 gimple stmt, tree vec_dest,
2233 gimple_stmt_iterator *gsi,
2234 enum tree_code code1,
2235 enum tree_code code2, tree decl1,
2236 tree decl2, int op_type)
2237{
2238 int i;
2239 tree vop0, vop1, new_tmp1, new_tmp2;
2240 gimple new_stmt1, new_stmt2;
6e1aa848 2241 vec<tree> vec_tmp = vNULL;
4a00c761 2242
9771b263
DN
2243 vec_tmp.create (vec_oprnds0->length () * 2);
2244 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2245 {
2246 if (op_type == binary_op)
9771b263 2247 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2248 else
2249 vop1 = NULL_TREE;
2250
2251 /* Generate the two halves of promotion operation. */
2252 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2253 op_type, vec_dest, gsi, stmt);
2254 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2255 op_type, vec_dest, gsi, stmt);
2256 if (is_gimple_call (new_stmt1))
2257 {
2258 new_tmp1 = gimple_call_lhs (new_stmt1);
2259 new_tmp2 = gimple_call_lhs (new_stmt2);
2260 }
2261 else
2262 {
2263 new_tmp1 = gimple_assign_lhs (new_stmt1);
2264 new_tmp2 = gimple_assign_lhs (new_stmt2);
2265 }
2266
2267 /* Store the results for the next step. */
9771b263
DN
2268 vec_tmp.quick_push (new_tmp1);
2269 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2270 }
2271
689eaba3 2272 vec_oprnds0->release ();
4a00c761
JJ
2273 *vec_oprnds0 = vec_tmp;
2274}
2275
2276
b8698a0f
L
2277/* Check if STMT performs a conversion operation, that can be vectorized.
2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2279 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2281
2282static bool
2283vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2284 gimple *vec_stmt, slp_tree slp_node)
2285{
2286 tree vec_dest;
2287 tree scalar_dest;
4a00c761 2288 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2289 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2292 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2293 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2294 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2295 tree new_temp;
2296 tree def;
2297 gimple def_stmt;
2298 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2299 gimple new_stmt = NULL;
2300 stmt_vec_info prev_stmt_info;
2301 int nunits_in;
2302 int nunits_out;
2303 tree vectype_out, vectype_in;
4a00c761
JJ
2304 int ncopies, i, j;
2305 tree lhs_type, rhs_type;
ebfd146a 2306 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2307 vec<tree> vec_oprnds0 = vNULL;
2308 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2309 tree vop0;
4a00c761
JJ
2310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2311 int multi_step_cvt = 0;
6e1aa848
DN
2312 vec<tree> vec_dsts = vNULL;
2313 vec<tree> interm_types = vNULL;
4a00c761
JJ
2314 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2315 int op_type;
2316 enum machine_mode rhs_mode;
2317 unsigned short fltsz;
ebfd146a
IR
2318
2319 /* Is STMT a vectorizable conversion? */
2320
4a00c761 2321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2322 return false;
2323
8644a673 2324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2325 return false;
2326
2327 if (!is_gimple_assign (stmt))
2328 return false;
2329
2330 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2331 return false;
2332
2333 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2334 if (!CONVERT_EXPR_CODE_P (code)
2335 && code != FIX_TRUNC_EXPR
2336 && code != FLOAT_EXPR
2337 && code != WIDEN_MULT_EXPR
2338 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2339 return false;
2340
4a00c761
JJ
2341 op_type = TREE_CODE_LENGTH (code);
2342
ebfd146a 2343 /* Check types of lhs and rhs. */
b690cc0f 2344 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2345 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2346 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2347
ebfd146a
IR
2348 op0 = gimple_assign_rhs1 (stmt);
2349 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2350
2351 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2352 && !((INTEGRAL_TYPE_P (lhs_type)
2353 && INTEGRAL_TYPE_P (rhs_type))
2354 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2355 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2356 return false;
2357
2358 if ((INTEGRAL_TYPE_P (lhs_type)
2359 && (TYPE_PRECISION (lhs_type)
2360 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2361 || (INTEGRAL_TYPE_P (rhs_type)
2362 && (TYPE_PRECISION (rhs_type)
2363 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2364 {
73fbfcad 2365 if (dump_enabled_p ())
78c60e3d
SS
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "type conversion to/from bit-precision unsupported.");
4a00c761
JJ
2368 return false;
2369 }
2370
b690cc0f 2371 /* Check the operands of the operation. */
24ee1384 2372 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2373 &def_stmt, &def, &dt[0], &vectype_in))
2374 {
73fbfcad 2375 if (dump_enabled_p ())
78c60e3d
SS
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "use not simple.");
b690cc0f
RG
2378 return false;
2379 }
4a00c761
JJ
2380 if (op_type == binary_op)
2381 {
2382 bool ok;
2383
2384 op1 = gimple_assign_rhs2 (stmt);
2385 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2386 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2387 OP1. */
2388 if (CONSTANT_CLASS_P (op0))
f5709183 2389 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2390 &def_stmt, &def, &dt[1], &vectype_in);
2391 else
f5709183 2392 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2393 &def, &dt[1]);
4a00c761
JJ
2394
2395 if (!ok)
2396 {
73fbfcad 2397 if (dump_enabled_p ())
78c60e3d
SS
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "use not simple.");
4a00c761
JJ
2400 return false;
2401 }
2402 }
2403
b690cc0f
RG
2404 /* If op0 is an external or constant defs use a vector type of
2405 the same size as the output vector type. */
ebfd146a 2406 if (!vectype_in)
b690cc0f 2407 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2408 if (vec_stmt)
2409 gcc_assert (vectype_in);
2410 if (!vectype_in)
2411 {
73fbfcad 2412 if (dump_enabled_p ())
4a00c761 2413 {
78c60e3d
SS
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415 "no vectype for scalar type ");
2416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4a00c761 2417 }
7d8930a0
IR
2418
2419 return false;
2420 }
ebfd146a 2421
b690cc0f
RG
2422 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2424 if (nunits_in < nunits_out)
ebfd146a
IR
2425 modifier = NARROW;
2426 else if (nunits_out == nunits_in)
2427 modifier = NONE;
ebfd146a 2428 else
4a00c761 2429 modifier = WIDEN;
ebfd146a 2430
ff802fa1
IR
2431 /* Multiple types in SLP are handled by creating the appropriate number of
2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2433 case of SLP. */
437f4a00 2434 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2435 ncopies = 1;
4a00c761
JJ
2436 else if (modifier == NARROW)
2437 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2438 else
2439 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2440
ebfd146a
IR
2441 /* Sanity check: make sure that at least one copy of the vectorized stmt
2442 needs to be generated. */
2443 gcc_assert (ncopies >= 1);
2444
ebfd146a 2445 /* Supportable by target? */
4a00c761 2446 switch (modifier)
ebfd146a 2447 {
4a00c761
JJ
2448 case NONE:
2449 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2450 return false;
2451 if (supportable_convert_operation (code, vectype_out, vectype_in,
2452 &decl1, &code1))
2453 break;
2454 /* FALLTHRU */
2455 unsupported:
73fbfcad 2456 if (dump_enabled_p ())
78c60e3d
SS
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458 "conversion not supported by target.");
ebfd146a 2459 return false;
ebfd146a 2460
4a00c761
JJ
2461 case WIDEN:
2462 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2463 &code1, &code2, &multi_step_cvt,
2464 &interm_types))
4a00c761
JJ
2465 {
2466 /* Binary widening operation can only be supported directly by the
2467 architecture. */
2468 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2469 break;
2470 }
2471
2472 if (code != FLOAT_EXPR
2473 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2474 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2475 goto unsupported;
2476
2477 rhs_mode = TYPE_MODE (rhs_type);
2478 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2479 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2480 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2481 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2482 {
2483 cvt_type
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 if (cvt_type == NULL_TREE)
2487 goto unsupported;
2488
2489 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2490 {
2491 if (!supportable_convert_operation (code, vectype_out,
2492 cvt_type, &decl1, &codecvt1))
2493 goto unsupported;
2494 }
2495 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2496 cvt_type, &codecvt1,
2497 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2498 &interm_types))
2499 continue;
2500 else
2501 gcc_assert (multi_step_cvt == 0);
2502
2503 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2504 vectype_in, &code1, &code2,
2505 &multi_step_cvt, &interm_types))
4a00c761
JJ
2506 break;
2507 }
2508
2509 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2510 goto unsupported;
2511
2512 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2513 codecvt2 = ERROR_MARK;
2514 else
2515 {
2516 multi_step_cvt++;
9771b263 2517 interm_types.safe_push (cvt_type);
4a00c761
JJ
2518 cvt_type = NULL_TREE;
2519 }
2520 break;
2521
2522 case NARROW:
2523 gcc_assert (op_type == unary_op);
2524 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2525 &code1, &multi_step_cvt,
2526 &interm_types))
2527 break;
2528
2529 if (code != FIX_TRUNC_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532 goto unsupported;
2533
2534 rhs_mode = TYPE_MODE (rhs_type);
2535 cvt_type
2536 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2537 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2538 if (cvt_type == NULL_TREE)
2539 goto unsupported;
2540 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2541 &decl1, &codecvt1))
2542 goto unsupported;
2543 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2544 &code1, &multi_step_cvt,
2545 &interm_types))
2546 break;
2547 goto unsupported;
2548
2549 default:
2550 gcc_unreachable ();
ebfd146a
IR
2551 }
2552
2553 if (!vec_stmt) /* transformation not required. */
2554 {
73fbfcad 2555 if (dump_enabled_p ())
78c60e3d
SS
2556 dump_printf_loc (MSG_NOTE, vect_location,
2557 "=== vectorizable_conversion ===");
4a00c761 2558 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2559 {
2560 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2561 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2562 }
4a00c761
JJ
2563 else if (modifier == NARROW)
2564 {
2565 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2566 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2567 }
2568 else
2569 {
2570 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2571 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2572 }
9771b263 2573 interm_types.release ();
ebfd146a
IR
2574 return true;
2575 }
2576
2577 /** Transform. **/
73fbfcad 2578 if (dump_enabled_p ())
78c60e3d
SS
2579 dump_printf_loc (MSG_NOTE, vect_location,
2580 "transform conversion. ncopies = %d.", ncopies);
ebfd146a 2581
4a00c761
JJ
2582 if (op_type == binary_op)
2583 {
2584 if (CONSTANT_CLASS_P (op0))
2585 op0 = fold_convert (TREE_TYPE (op1), op0);
2586 else if (CONSTANT_CLASS_P (op1))
2587 op1 = fold_convert (TREE_TYPE (op0), op1);
2588 }
2589
2590 /* In case of multi-step conversion, we first generate conversion operations
2591 to the intermediate types, and then from that types to the final one.
2592 We create vector destinations for the intermediate type (TYPES) received
2593 from supportable_*_operation, and store them in the correct order
2594 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2595 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2596 vec_dest = vect_create_destination_var (scalar_dest,
2597 (cvt_type && modifier == WIDEN)
2598 ? cvt_type : vectype_out);
9771b263 2599 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2600
2601 if (multi_step_cvt)
2602 {
9771b263
DN
2603 for (i = interm_types.length () - 1;
2604 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2605 {
2606 vec_dest = vect_create_destination_var (scalar_dest,
2607 intermediate_type);
9771b263 2608 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2609 }
2610 }
ebfd146a 2611
4a00c761 2612 if (cvt_type)
82294ec1
JJ
2613 vec_dest = vect_create_destination_var (scalar_dest,
2614 modifier == WIDEN
2615 ? vectype_out : cvt_type);
4a00c761
JJ
2616
2617 if (!slp_node)
2618 {
30862efc 2619 if (modifier == WIDEN)
4a00c761 2620 {
9771b263 2621 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
4a00c761 2622 if (op_type == binary_op)
9771b263 2623 vec_oprnds1.create (1);
4a00c761 2624 }
30862efc 2625 else if (modifier == NARROW)
9771b263
DN
2626 vec_oprnds0.create (
2627 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2628 }
2629 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2630 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2631
4a00c761 2632 last_oprnd = op0;
ebfd146a
IR
2633 prev_stmt_info = NULL;
2634 switch (modifier)
2635 {
2636 case NONE:
2637 for (j = 0; j < ncopies; j++)
2638 {
ebfd146a 2639 if (j == 0)
d092494c
IR
2640 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2641 -1);
ebfd146a
IR
2642 else
2643 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2644
9771b263 2645 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2646 {
2647 /* Arguments are ready, create the new vector stmt. */
2648 if (code1 == CALL_EXPR)
2649 {
2650 new_stmt = gimple_build_call (decl1, 1, vop0);
2651 new_temp = make_ssa_name (vec_dest, new_stmt);
2652 gimple_call_set_lhs (new_stmt, new_temp);
2653 }
2654 else
2655 {
2656 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2657 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2658 vop0, NULL);
2659 new_temp = make_ssa_name (vec_dest, new_stmt);
2660 gimple_assign_set_lhs (new_stmt, new_temp);
2661 }
2662
2663 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2664 if (slp_node)
9771b263 2665 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2666 }
2667
ebfd146a
IR
2668 if (j == 0)
2669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2670 else
2671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2672 prev_stmt_info = vinfo_for_stmt (new_stmt);
2673 }
2674 break;
2675
2676 case WIDEN:
2677 /* In case the vectorization factor (VF) is bigger than the number
2678 of elements that we can fit in a vectype (nunits), we have to
2679 generate more than one vector stmt - i.e - we need to "unroll"
2680 the vector stmt by a factor VF/nunits. */
2681 for (j = 0; j < ncopies; j++)
2682 {
4a00c761 2683 /* Handle uses. */
ebfd146a 2684 if (j == 0)
4a00c761
JJ
2685 {
2686 if (slp_node)
2687 {
2688 if (code == WIDEN_LSHIFT_EXPR)
2689 {
2690 unsigned int k;
ebfd146a 2691
4a00c761
JJ
2692 vec_oprnd1 = op1;
2693 /* Store vec_oprnd1 for every vector stmt to be created
2694 for SLP_NODE. We check during the analysis that all
2695 the shift arguments are the same. */
2696 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2697 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2698
2699 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2700 slp_node, -1);
2701 }
2702 else
2703 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2704 &vec_oprnds1, slp_node, -1);
2705 }
2706 else
2707 {
2708 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2709 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2710 if (op_type == binary_op)
2711 {
2712 if (code == WIDEN_LSHIFT_EXPR)
2713 vec_oprnd1 = op1;
2714 else
2715 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2716 NULL);
9771b263 2717 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2718 }
2719 }
2720 }
ebfd146a 2721 else
4a00c761
JJ
2722 {
2723 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2724 vec_oprnds0.truncate (0);
2725 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2726 if (op_type == binary_op)
2727 {
2728 if (code == WIDEN_LSHIFT_EXPR)
2729 vec_oprnd1 = op1;
2730 else
2731 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2732 vec_oprnd1);
9771b263
DN
2733 vec_oprnds1.truncate (0);
2734 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2735 }
2736 }
ebfd146a 2737
4a00c761
JJ
2738 /* Arguments are ready. Create the new vector stmts. */
2739 for (i = multi_step_cvt; i >= 0; i--)
2740 {
9771b263 2741 tree this_dest = vec_dsts[i];
4a00c761
JJ
2742 enum tree_code c1 = code1, c2 = code2;
2743 if (i == 0 && codecvt2 != ERROR_MARK)
2744 {
2745 c1 = codecvt1;
2746 c2 = codecvt2;
2747 }
2748 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2749 &vec_oprnds1,
2750 stmt, this_dest, gsi,
2751 c1, c2, decl1, decl2,
2752 op_type);
2753 }
2754
9771b263 2755 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2756 {
2757 if (cvt_type)
2758 {
2759 if (codecvt1 == CALL_EXPR)
2760 {
2761 new_stmt = gimple_build_call (decl1, 1, vop0);
2762 new_temp = make_ssa_name (vec_dest, new_stmt);
2763 gimple_call_set_lhs (new_stmt, new_temp);
2764 }
2765 else
2766 {
2767 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2768 new_temp = make_ssa_name (vec_dest, NULL);
2769 new_stmt = gimple_build_assign_with_ops (codecvt1,
2770 new_temp,
2771 vop0, NULL);
2772 }
2773
2774 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2775 }
2776 else
2777 new_stmt = SSA_NAME_DEF_STMT (vop0);
2778
2779 if (slp_node)
9771b263 2780 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2781 else
2782 {
2783 if (!prev_stmt_info)
2784 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2785 else
2786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2787 prev_stmt_info = vinfo_for_stmt (new_stmt);
2788 }
2789 }
ebfd146a 2790 }
4a00c761
JJ
2791
2792 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2793 break;
2794
2795 case NARROW:
2796 /* In case the vectorization factor (VF) is bigger than the number
2797 of elements that we can fit in a vectype (nunits), we have to
2798 generate more than one vector stmt - i.e - we need to "unroll"
2799 the vector stmt by a factor VF/nunits. */
2800 for (j = 0; j < ncopies; j++)
2801 {
2802 /* Handle uses. */
4a00c761
JJ
2803 if (slp_node)
2804 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2805 slp_node, -1);
ebfd146a
IR
2806 else
2807 {
9771b263 2808 vec_oprnds0.truncate (0);
4a00c761
JJ
2809 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2810 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2811 }
2812
4a00c761
JJ
2813 /* Arguments are ready. Create the new vector stmts. */
2814 if (cvt_type)
9771b263 2815 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2816 {
2817 if (codecvt1 == CALL_EXPR)
2818 {
2819 new_stmt = gimple_build_call (decl1, 1, vop0);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 gimple_call_set_lhs (new_stmt, new_temp);
2822 }
2823 else
2824 {
2825 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2826 new_temp = make_ssa_name (vec_dest, NULL);
2827 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2828 vop0, NULL);
2829 }
ebfd146a 2830
4a00c761 2831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2832 vec_oprnds0[i] = new_temp;
4a00c761 2833 }
ebfd146a 2834
4a00c761
JJ
2835 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2836 stmt, vec_dsts, gsi,
2837 slp_node, code1,
2838 &prev_stmt_info);
ebfd146a
IR
2839 }
2840
2841 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2842 break;
ebfd146a
IR
2843 }
2844
9771b263
DN
2845 vec_oprnds0.release ();
2846 vec_oprnds1.release ();
2847 vec_dsts.release ();
2848 interm_types.release ();
ebfd146a
IR
2849
2850 return true;
2851}
ff802fa1
IR
2852
2853
ebfd146a
IR
2854/* Function vectorizable_assignment.
2855
b8698a0f
L
2856 Check if STMT performs an assignment (copy) that can be vectorized.
2857 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2858 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2859 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2860
2861static bool
2862vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2863 gimple *vec_stmt, slp_tree slp_node)
2864{
2865 tree vec_dest;
2866 tree scalar_dest;
2867 tree op;
2868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2871 tree new_temp;
2872 tree def;
2873 gimple def_stmt;
2874 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2875 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2876 int ncopies;
f18b55bd 2877 int i, j;
6e1aa848 2878 vec<tree> vec_oprnds = vNULL;
ebfd146a 2879 tree vop;
a70d6342 2880 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2881 gimple new_stmt = NULL;
2882 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2883 enum tree_code code;
2884 tree vectype_in;
ebfd146a
IR
2885
2886 /* Multiple types in SLP are handled by creating the appropriate number of
2887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2888 case of SLP. */
437f4a00 2889 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2890 ncopies = 1;
2891 else
2892 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2893
2894 gcc_assert (ncopies >= 1);
ebfd146a 2895
a70d6342 2896 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2897 return false;
2898
8644a673 2899 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2900 return false;
2901
2902 /* Is vectorizable assignment? */
2903 if (!is_gimple_assign (stmt))
2904 return false;
2905
2906 scalar_dest = gimple_assign_lhs (stmt);
2907 if (TREE_CODE (scalar_dest) != SSA_NAME)
2908 return false;
2909
fde9c428 2910 code = gimple_assign_rhs_code (stmt);
ebfd146a 2911 if (gimple_assign_single_p (stmt)
fde9c428
RG
2912 || code == PAREN_EXPR
2913 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2914 op = gimple_assign_rhs1 (stmt);
2915 else
2916 return false;
2917
7b7ec6c5
RG
2918 if (code == VIEW_CONVERT_EXPR)
2919 op = TREE_OPERAND (op, 0);
2920
24ee1384 2921 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2922 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2923 {
73fbfcad 2924 if (dump_enabled_p ())
78c60e3d
SS
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926 "use not simple.");
ebfd146a
IR
2927 return false;
2928 }
2929
fde9c428
RG
2930 /* We can handle NOP_EXPR conversions that do not change the number
2931 of elements or the vector size. */
7b7ec6c5
RG
2932 if ((CONVERT_EXPR_CODE_P (code)
2933 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2934 && (!vectype_in
2935 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2936 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2937 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2938 return false;
2939
7b7b1813
RG
2940 /* We do not handle bit-precision changes. */
2941 if ((CONVERT_EXPR_CODE_P (code)
2942 || code == VIEW_CONVERT_EXPR)
2943 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2944 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2945 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2946 || ((TYPE_PRECISION (TREE_TYPE (op))
2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2948 /* But a conversion that does not change the bit-pattern is ok. */
2949 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2950 > TYPE_PRECISION (TREE_TYPE (op)))
2951 && TYPE_UNSIGNED (TREE_TYPE (op))))
2952 {
73fbfcad 2953 if (dump_enabled_p ())
78c60e3d
SS
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2955 "type conversion to/from bit-precision "
2956 "unsupported.");
7b7b1813
RG
2957 return false;
2958 }
2959
ebfd146a
IR
2960 if (!vec_stmt) /* transformation not required. */
2961 {
2962 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 2963 if (dump_enabled_p ())
78c60e3d
SS
2964 dump_printf_loc (MSG_NOTE, vect_location,
2965 "=== vectorizable_assignment ===");
c3e7ee41 2966 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2967 return true;
2968 }
2969
2970 /** Transform. **/
73fbfcad 2971 if (dump_enabled_p ())
78c60e3d 2972 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
ebfd146a
IR
2973
2974 /* Handle def. */
2975 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2976
2977 /* Handle use. */
f18b55bd 2978 for (j = 0; j < ncopies; j++)
ebfd146a 2979 {
f18b55bd
IR
2980 /* Handle uses. */
2981 if (j == 0)
d092494c 2982 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
2983 else
2984 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2985
2986 /* Arguments are ready. create the new vector stmt. */
9771b263 2987 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 2988 {
7b7ec6c5
RG
2989 if (CONVERT_EXPR_CODE_P (code)
2990 || code == VIEW_CONVERT_EXPR)
4a73490d 2991 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2992 new_stmt = gimple_build_assign (vec_dest, vop);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 gimple_assign_set_lhs (new_stmt, new_temp);
2995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996 if (slp_node)
9771b263 2997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 2998 }
ebfd146a
IR
2999
3000 if (slp_node)
f18b55bd
IR
3001 continue;
3002
3003 if (j == 0)
3004 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3005 else
3006 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3007
3008 prev_stmt_info = vinfo_for_stmt (new_stmt);
3009 }
b8698a0f 3010
9771b263 3011 vec_oprnds.release ();
ebfd146a
IR
3012 return true;
3013}
3014
9dc3f7de 3015
1107f3ae
IR
3016/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3017 either as shift by a scalar or by a vector. */
3018
3019bool
3020vect_supportable_shift (enum tree_code code, tree scalar_type)
3021{
3022
3023 enum machine_mode vec_mode;
3024 optab optab;
3025 int icode;
3026 tree vectype;
3027
3028 vectype = get_vectype_for_scalar_type (scalar_type);
3029 if (!vectype)
3030 return false;
3031
3032 optab = optab_for_tree_code (code, vectype, optab_scalar);
3033 if (!optab
3034 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3035 {
3036 optab = optab_for_tree_code (code, vectype, optab_vector);
3037 if (!optab
3038 || (optab_handler (optab, TYPE_MODE (vectype))
3039 == CODE_FOR_nothing))
3040 return false;
3041 }
3042
3043 vec_mode = TYPE_MODE (vectype);
3044 icode = (int) optab_handler (optab, vec_mode);
3045 if (icode == CODE_FOR_nothing)
3046 return false;
3047
3048 return true;
3049}
3050
3051
9dc3f7de
IR
3052/* Function vectorizable_shift.
3053
3054 Check if STMT performs a shift operation that can be vectorized.
3055 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3056 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3057 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3058
3059static bool
3060vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3061 gimple *vec_stmt, slp_tree slp_node)
3062{
3063 tree vec_dest;
3064 tree scalar_dest;
3065 tree op0, op1 = NULL;
3066 tree vec_oprnd1 = NULL_TREE;
3067 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3068 tree vectype;
3069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3070 enum tree_code code;
3071 enum machine_mode vec_mode;
3072 tree new_temp;
3073 optab optab;
3074 int icode;
3075 enum machine_mode optab_op2_mode;
3076 tree def;
3077 gimple def_stmt;
3078 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3079 gimple new_stmt = NULL;
3080 stmt_vec_info prev_stmt_info;
3081 int nunits_in;
3082 int nunits_out;
3083 tree vectype_out;
cede2577 3084 tree op1_vectype;
9dc3f7de
IR
3085 int ncopies;
3086 int j, i;
6e1aa848
DN
3087 vec<tree> vec_oprnds0 = vNULL;
3088 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3089 tree vop0, vop1;
3090 unsigned int k;
49eab32e 3091 bool scalar_shift_arg = true;
9dc3f7de
IR
3092 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3093 int vf;
3094
3095 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3096 return false;
3097
3098 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3099 return false;
3100
3101 /* Is STMT a vectorizable binary/unary operation? */
3102 if (!is_gimple_assign (stmt))
3103 return false;
3104
3105 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3106 return false;
3107
3108 code = gimple_assign_rhs_code (stmt);
3109
3110 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3111 || code == RROTATE_EXPR))
3112 return false;
3113
3114 scalar_dest = gimple_assign_lhs (stmt);
3115 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3116 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3118 {
73fbfcad 3119 if (dump_enabled_p ())
78c60e3d
SS
3120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3121 "bit-precision shifts not supported.");
7b7b1813
RG
3122 return false;
3123 }
9dc3f7de
IR
3124
3125 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3126 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3127 &def_stmt, &def, &dt[0], &vectype))
3128 {
73fbfcad 3129 if (dump_enabled_p ())
78c60e3d
SS
3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131 "use not simple.");
9dc3f7de
IR
3132 return false;
3133 }
3134 /* If op0 is an external or constant def use a vector type with
3135 the same size as the output vector type. */
3136 if (!vectype)
3137 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3138 if (vec_stmt)
3139 gcc_assert (vectype);
3140 if (!vectype)
3141 {
73fbfcad 3142 if (dump_enabled_p ())
78c60e3d
SS
3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144 "no vectype for scalar type ");
9dc3f7de
IR
3145 return false;
3146 }
3147
3148 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3149 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3150 if (nunits_out != nunits_in)
3151 return false;
3152
3153 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3154 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3155 &def, &dt[1], &op1_vectype))
9dc3f7de 3156 {
73fbfcad 3157 if (dump_enabled_p ())
78c60e3d
SS
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3159 "use not simple.");
9dc3f7de
IR
3160 return false;
3161 }
3162
3163 if (loop_vinfo)
3164 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3165 else
3166 vf = 1;
3167
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3170 case of SLP. */
437f4a00 3171 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3172 ncopies = 1;
3173 else
3174 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3175
3176 gcc_assert (ncopies >= 1);
3177
3178 /* Determine whether the shift amount is a vector, or scalar. If the
3179 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3180
49eab32e
JJ
3181 if (dt[1] == vect_internal_def && !slp_node)
3182 scalar_shift_arg = false;
3183 else if (dt[1] == vect_constant_def
3184 || dt[1] == vect_external_def
3185 || dt[1] == vect_internal_def)
3186 {
3187 /* In SLP, need to check whether the shift count is the same,
3188 in loops if it is a constant or invariant, it is always
3189 a scalar shift. */
3190 if (slp_node)
3191 {
9771b263 3192 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3193 gimple slpstmt;
3194
9771b263 3195 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3196 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3197 scalar_shift_arg = false;
3198 }
3199 }
3200 else
3201 {
73fbfcad 3202 if (dump_enabled_p ())
78c60e3d
SS
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3204 "operand mode requires invariant argument.");
49eab32e
JJ
3205 return false;
3206 }
3207
9dc3f7de 3208 /* Vector shifted by vector. */
49eab32e 3209 if (!scalar_shift_arg)
9dc3f7de
IR
3210 {
3211 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3212 if (dump_enabled_p ())
78c60e3d
SS
3213 dump_printf_loc (MSG_NOTE, vect_location,
3214 "vector/vector shift/rotate found.");
3215
aa948027
JJ
3216 if (!op1_vectype)
3217 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3218 if (op1_vectype == NULL_TREE
3219 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3220 {
73fbfcad 3221 if (dump_enabled_p ())
78c60e3d
SS
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 "unusable type for last operand in"
3224 " vector/vector shift/rotate.");
cede2577
JJ
3225 return false;
3226 }
9dc3f7de
IR
3227 }
3228 /* See if the machine has a vector shifted by scalar insn and if not
3229 then see if it has a vector shifted by vector insn. */
49eab32e 3230 else
9dc3f7de
IR
3231 {
3232 optab = optab_for_tree_code (code, vectype, optab_scalar);
3233 if (optab
3234 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3235 {
73fbfcad 3236 if (dump_enabled_p ())
78c60e3d
SS
3237 dump_printf_loc (MSG_NOTE, vect_location,
3238 "vector/scalar shift/rotate found.");
9dc3f7de
IR
3239 }
3240 else
3241 {
3242 optab = optab_for_tree_code (code, vectype, optab_vector);
3243 if (optab
3244 && (optab_handler (optab, TYPE_MODE (vectype))
3245 != CODE_FOR_nothing))
3246 {
49eab32e
JJ
3247 scalar_shift_arg = false;
3248
73fbfcad 3249 if (dump_enabled_p ())
78c60e3d
SS
3250 dump_printf_loc (MSG_NOTE, vect_location,
3251 "vector/vector shift/rotate found.");
9dc3f7de
IR
3252
3253 /* Unlike the other binary operators, shifts/rotates have
3254 the rhs being int, instead of the same type as the lhs,
3255 so make sure the scalar is the right type if we are
aa948027 3256 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3257 if (dt[1] == vect_constant_def)
3258 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3259 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3260 TREE_TYPE (op1)))
3261 {
3262 if (slp_node
3263 && TYPE_MODE (TREE_TYPE (vectype))
3264 != TYPE_MODE (TREE_TYPE (op1)))
3265 {
73fbfcad 3266 if (dump_enabled_p ())
78c60e3d
SS
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "unusable type for last operand in"
3269 " vector/vector shift/rotate.");
aa948027
JJ
3270 return false;
3271 }
3272 if (vec_stmt && !slp_node)
3273 {
3274 op1 = fold_convert (TREE_TYPE (vectype), op1);
3275 op1 = vect_init_vector (stmt, op1,
3276 TREE_TYPE (vectype), NULL);
3277 }
3278 }
9dc3f7de
IR
3279 }
3280 }
3281 }
9dc3f7de
IR
3282
3283 /* Supportable by target? */
3284 if (!optab)
3285 {
73fbfcad 3286 if (dump_enabled_p ())
78c60e3d
SS
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288 "no optab.");
9dc3f7de
IR
3289 return false;
3290 }
3291 vec_mode = TYPE_MODE (vectype);
3292 icode = (int) optab_handler (optab, vec_mode);
3293 if (icode == CODE_FOR_nothing)
3294 {
73fbfcad 3295 if (dump_enabled_p ())
78c60e3d
SS
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297 "op not supported by target.");
9dc3f7de
IR
3298 /* Check only during analysis. */
3299 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3300 || (vf < vect_min_worthwhile_factor (code)
3301 && !vec_stmt))
3302 return false;
73fbfcad 3303 if (dump_enabled_p ())
78c60e3d 3304 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
9dc3f7de
IR
3305 }
3306
3307 /* Worthwhile without SIMD support? Check only during analysis. */
3308 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3309 && vf < vect_min_worthwhile_factor (code)
3310 && !vec_stmt)
3311 {
73fbfcad 3312 if (dump_enabled_p ())
78c60e3d
SS
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314 "not worthwhile without SIMD support.");
9dc3f7de
IR
3315 return false;
3316 }
3317
3318 if (!vec_stmt) /* transformation not required. */
3319 {
3320 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3321 if (dump_enabled_p ())
78c60e3d 3322 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
c3e7ee41 3323 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3324 return true;
3325 }
3326
3327 /** Transform. **/
3328
73fbfcad 3329 if (dump_enabled_p ())
78c60e3d
SS
3330 dump_printf_loc (MSG_NOTE, vect_location,
3331 "transform binary/unary operation.");
9dc3f7de
IR
3332
3333 /* Handle def. */
3334 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3335
9dc3f7de
IR
3336 prev_stmt_info = NULL;
3337 for (j = 0; j < ncopies; j++)
3338 {
3339 /* Handle uses. */
3340 if (j == 0)
3341 {
3342 if (scalar_shift_arg)
3343 {
3344 /* Vector shl and shr insn patterns can be defined with scalar
3345 operand 2 (shift operand). In this case, use constant or loop
3346 invariant op1 directly, without extending it to vector mode
3347 first. */
3348 optab_op2_mode = insn_data[icode].operand[2].mode;
3349 if (!VECTOR_MODE_P (optab_op2_mode))
3350 {
73fbfcad 3351 if (dump_enabled_p ())
78c60e3d
SS
3352 dump_printf_loc (MSG_NOTE, vect_location,
3353 "operand 1 using scalar mode.");
9dc3f7de 3354 vec_oprnd1 = op1;
8930f723 3355 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3356 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3357 if (slp_node)
3358 {
3359 /* Store vec_oprnd1 for every vector stmt to be created
3360 for SLP_NODE. We check during the analysis that all
3361 the shift arguments are the same.
3362 TODO: Allow different constants for different vector
3363 stmts generated for an SLP instance. */
3364 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3365 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3366 }
3367 }
3368 }
3369
3370 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3371 (a special case for certain kind of vector shifts); otherwise,
3372 operand 1 should be of a vector type (the usual case). */
3373 if (vec_oprnd1)
3374 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3375 slp_node, -1);
9dc3f7de
IR
3376 else
3377 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3378 slp_node, -1);
9dc3f7de
IR
3379 }
3380 else
3381 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3382
3383 /* Arguments are ready. Create the new vector stmt. */
9771b263 3384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3385 {
9771b263 3386 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3387 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3388 new_temp = make_ssa_name (vec_dest, new_stmt);
3389 gimple_assign_set_lhs (new_stmt, new_temp);
3390 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3391 if (slp_node)
9771b263 3392 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3393 }
3394
3395 if (slp_node)
3396 continue;
3397
3398 if (j == 0)
3399 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3400 else
3401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3402 prev_stmt_info = vinfo_for_stmt (new_stmt);
3403 }
3404
9771b263
DN
3405 vec_oprnds0.release ();
3406 vec_oprnds1.release ();
9dc3f7de
IR
3407
3408 return true;
3409}
3410
3411
5deb57cb
JJ
3412static tree permute_vec_elements (tree, tree, tree, gimple,
3413 gimple_stmt_iterator *);
3414
3415
ebfd146a
IR
3416/* Function vectorizable_operation.
3417
16949072
RG
3418 Check if STMT performs a binary, unary or ternary operation that can
3419 be vectorized.
b8698a0f 3420 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3421 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3422 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3423
3424static bool
3425vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3426 gimple *vec_stmt, slp_tree slp_node)
3427{
00f07b86 3428 tree vec_dest;
ebfd146a 3429 tree scalar_dest;
16949072 3430 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3432 tree vectype;
ebfd146a
IR
3433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3434 enum tree_code code;
3435 enum machine_mode vec_mode;
3436 tree new_temp;
3437 int op_type;
00f07b86 3438 optab optab;
ebfd146a 3439 int icode;
ebfd146a
IR
3440 tree def;
3441 gimple def_stmt;
16949072
RG
3442 enum vect_def_type dt[3]
3443 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3444 gimple new_stmt = NULL;
3445 stmt_vec_info prev_stmt_info;
b690cc0f 3446 int nunits_in;
ebfd146a
IR
3447 int nunits_out;
3448 tree vectype_out;
3449 int ncopies;
3450 int j, i;
6e1aa848
DN
3451 vec<tree> vec_oprnds0 = vNULL;
3452 vec<tree> vec_oprnds1 = vNULL;
3453 vec<tree> vec_oprnds2 = vNULL;
16949072 3454 tree vop0, vop1, vop2;
a70d6342
IR
3455 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3456 int vf;
3457
a70d6342 3458 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3459 return false;
3460
8644a673 3461 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3462 return false;
3463
3464 /* Is STMT a vectorizable binary/unary operation? */
3465 if (!is_gimple_assign (stmt))
3466 return false;
3467
3468 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3469 return false;
3470
ebfd146a
IR
3471 code = gimple_assign_rhs_code (stmt);
3472
3473 /* For pointer addition, we should use the normal plus for
3474 the vector addition. */
3475 if (code == POINTER_PLUS_EXPR)
3476 code = PLUS_EXPR;
3477
3478 /* Support only unary or binary operations. */
3479 op_type = TREE_CODE_LENGTH (code);
16949072 3480 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3481 {
73fbfcad 3482 if (dump_enabled_p ())
78c60e3d
SS
3483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3484 "num. args = %d (not unary/binary/ternary op).",
3485 op_type);
ebfd146a
IR
3486 return false;
3487 }
3488
b690cc0f
RG
3489 scalar_dest = gimple_assign_lhs (stmt);
3490 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3491
7b7b1813
RG
3492 /* Most operations cannot handle bit-precision types without extra
3493 truncations. */
3494 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3495 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3496 /* Exception are bitwise binary operations. */
3497 && code != BIT_IOR_EXPR
3498 && code != BIT_XOR_EXPR
3499 && code != BIT_AND_EXPR)
3500 {
73fbfcad 3501 if (dump_enabled_p ())
78c60e3d
SS
3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3503 "bit-precision arithmetic not supported.");
7b7b1813
RG
3504 return false;
3505 }
3506
ebfd146a 3507 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3508 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3509 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3510 {
73fbfcad 3511 if (dump_enabled_p ())
78c60e3d
SS
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513 "use not simple.");
ebfd146a
IR
3514 return false;
3515 }
b690cc0f
RG
3516 /* If op0 is an external or constant def use a vector type with
3517 the same size as the output vector type. */
3518 if (!vectype)
3519 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3520 if (vec_stmt)
3521 gcc_assert (vectype);
3522 if (!vectype)
3523 {
73fbfcad 3524 if (dump_enabled_p ())
7d8930a0 3525 {
78c60e3d
SS
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527 "no vectype for scalar type ");
3528 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3529 TREE_TYPE (op0));
7d8930a0
IR
3530 }
3531
3532 return false;
3533 }
b690cc0f
RG
3534
3535 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3536 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3537 if (nunits_out != nunits_in)
3538 return false;
ebfd146a 3539
16949072 3540 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3541 {
3542 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3543 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3544 &def, &dt[1]))
ebfd146a 3545 {
73fbfcad 3546 if (dump_enabled_p ())
78c60e3d
SS
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "use not simple.");
ebfd146a
IR
3549 return false;
3550 }
3551 }
16949072
RG
3552 if (op_type == ternary_op)
3553 {
3554 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3555 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3556 &def, &dt[2]))
16949072 3557 {
73fbfcad 3558 if (dump_enabled_p ())
78c60e3d
SS
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "use not simple.");
16949072
RG
3561 return false;
3562 }
3563 }
ebfd146a 3564
b690cc0f
RG
3565 if (loop_vinfo)
3566 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3567 else
3568 vf = 1;
3569
3570 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3571 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3572 case of SLP. */
437f4a00 3573 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3574 ncopies = 1;
3575 else
3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577
3578 gcc_assert (ncopies >= 1);
3579
9dc3f7de 3580 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3581 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3582 || code == RROTATE_EXPR)
9dc3f7de 3583 return false;
ebfd146a 3584
ebfd146a 3585 /* Supportable by target? */
00f07b86
RH
3586
3587 vec_mode = TYPE_MODE (vectype);
3588 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3589 {
00f07b86 3590 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3591 icode = LAST_INSN_CODE;
00f07b86
RH
3592 else
3593 icode = CODE_FOR_nothing;
ebfd146a 3594 }
00f07b86
RH
3595 else
3596 {
3597 optab = optab_for_tree_code (code, vectype, optab_default);
3598 if (!optab)
5deb57cb 3599 {
73fbfcad 3600 if (dump_enabled_p ())
78c60e3d
SS
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602 "no optab.");
00f07b86 3603 return false;
5deb57cb 3604 }
00f07b86 3605 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3606 }
3607
ebfd146a
IR
3608 if (icode == CODE_FOR_nothing)
3609 {
73fbfcad 3610 if (dump_enabled_p ())
78c60e3d
SS
3611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3612 "op not supported by target.");
ebfd146a
IR
3613 /* Check only during analysis. */
3614 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3615 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3616 return false;
73fbfcad 3617 if (dump_enabled_p ())
78c60e3d 3618 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
383d9c83
IR
3619 }
3620
4a00c761 3621 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3622 if (!VECTOR_MODE_P (vec_mode)
3623 && !vec_stmt
3624 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3625 {
73fbfcad 3626 if (dump_enabled_p ())
78c60e3d
SS
3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628 "not worthwhile without SIMD support.");
e34842c6 3629 return false;
7d8930a0 3630 }
ebfd146a 3631
ebfd146a
IR
3632 if (!vec_stmt) /* transformation not required. */
3633 {
4a00c761 3634 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3635 if (dump_enabled_p ())
78c60e3d
SS
3636 dump_printf_loc (MSG_NOTE, vect_location,
3637 "=== vectorizable_operation ===");
c3e7ee41 3638 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3639 return true;
3640 }
3641
3642 /** Transform. **/
3643
73fbfcad 3644 if (dump_enabled_p ())
78c60e3d
SS
3645 dump_printf_loc (MSG_NOTE, vect_location,
3646 "transform binary/unary operation.");
383d9c83 3647
ebfd146a 3648 /* Handle def. */
00f07b86 3649 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3650
ebfd146a
IR
3651 /* In case the vectorization factor (VF) is bigger than the number
3652 of elements that we can fit in a vectype (nunits), we have to generate
3653 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3654 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3655 from one copy of the vector stmt to the next, in the field
3656 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3657 stages to find the correct vector defs to be used when vectorizing
3658 stmts that use the defs of the current stmt. The example below
3659 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3660 we need to create 4 vectorized stmts):
3661
3662 before vectorization:
3663 RELATED_STMT VEC_STMT
3664 S1: x = memref - -
3665 S2: z = x + 1 - -
3666
3667 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3668 there):
3669 RELATED_STMT VEC_STMT
3670 VS1_0: vx0 = memref0 VS1_1 -
3671 VS1_1: vx1 = memref1 VS1_2 -
3672 VS1_2: vx2 = memref2 VS1_3 -
3673 VS1_3: vx3 = memref3 - -
3674 S1: x = load - VS1_0
3675 S2: z = x + 1 - -
3676
3677 step2: vectorize stmt S2 (done here):
3678 To vectorize stmt S2 we first need to find the relevant vector
3679 def for the first operand 'x'. This is, as usual, obtained from
3680 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3681 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3682 relevant vector def 'vx0'. Having found 'vx0' we can generate
3683 the vector stmt VS2_0, and as usual, record it in the
3684 STMT_VINFO_VEC_STMT of stmt S2.
3685 When creating the second copy (VS2_1), we obtain the relevant vector
3686 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3687 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3688 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3689 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3690 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3691 chain of stmts and pointers:
3692 RELATED_STMT VEC_STMT
3693 VS1_0: vx0 = memref0 VS1_1 -
3694 VS1_1: vx1 = memref1 VS1_2 -
3695 VS1_2: vx2 = memref2 VS1_3 -
3696 VS1_3: vx3 = memref3 - -
3697 S1: x = load - VS1_0
3698 VS2_0: vz0 = vx0 + v1 VS2_1 -
3699 VS2_1: vz1 = vx1 + v1 VS2_2 -
3700 VS2_2: vz2 = vx2 + v1 VS2_3 -
3701 VS2_3: vz3 = vx3 + v1 - -
3702 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3703
3704 prev_stmt_info = NULL;
3705 for (j = 0; j < ncopies; j++)
3706 {
3707 /* Handle uses. */
3708 if (j == 0)
4a00c761
JJ
3709 {
3710 if (op_type == binary_op || op_type == ternary_op)
3711 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3712 slp_node, -1);
3713 else
3714 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3715 slp_node, -1);
3716 if (op_type == ternary_op)
36ba4aae 3717 {
9771b263
DN
3718 vec_oprnds2.create (1);
3719 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3720 stmt,
3721 NULL));
36ba4aae 3722 }
4a00c761 3723 }
ebfd146a 3724 else
4a00c761
JJ
3725 {
3726 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3727 if (op_type == ternary_op)
3728 {
9771b263
DN
3729 tree vec_oprnd = vec_oprnds2.pop ();
3730 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3731 vec_oprnd));
4a00c761
JJ
3732 }
3733 }
3734
3735 /* Arguments are ready. Create the new vector stmt. */
9771b263 3736 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3737 {
4a00c761 3738 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3739 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3740 vop2 = ((op_type == ternary_op)
9771b263 3741 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3742 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3743 vop0, vop1, vop2);
4a00c761
JJ
3744 new_temp = make_ssa_name (vec_dest, new_stmt);
3745 gimple_assign_set_lhs (new_stmt, new_temp);
3746 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3747 if (slp_node)
9771b263 3748 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3749 }
3750
4a00c761
JJ
3751 if (slp_node)
3752 continue;
3753
3754 if (j == 0)
3755 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3756 else
3757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3758 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3759 }
3760
9771b263
DN
3761 vec_oprnds0.release ();
3762 vec_oprnds1.release ();
3763 vec_oprnds2.release ();
ebfd146a 3764
ebfd146a
IR
3765 return true;
3766}
3767
3768
3769/* Function vectorizable_store.
3770
b8698a0f
L
3771 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3772 can be vectorized.
3773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3774 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3776
3777static bool
3778vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3779 slp_tree slp_node)
3780{
3781 tree scalar_dest;
3782 tree data_ref;
3783 tree op;
3784 tree vec_oprnd = NULL_TREE;
3785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3786 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3787 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3788 tree elem_type;
ebfd146a 3789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3790 struct loop *loop = NULL;
ebfd146a
IR
3791 enum machine_mode vec_mode;
3792 tree dummy;
3793 enum dr_alignment_support alignment_support_scheme;
3794 tree def;
3795 gimple def_stmt;
3796 enum vect_def_type dt;
3797 stmt_vec_info prev_stmt_info = NULL;
3798 tree dataref_ptr = NULL_TREE;
3799 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3800 int ncopies;
3801 int j;
3802 gimple next_stmt, first_stmt = NULL;
0d0293ac 3803 bool grouped_store = false;
272c6793 3804 bool store_lanes_p = false;
ebfd146a 3805 unsigned int group_size, i;
6e1aa848
DN
3806 vec<tree> dr_chain = vNULL;
3807 vec<tree> oprnds = vNULL;
3808 vec<tree> result_chain = vNULL;
ebfd146a 3809 bool inv_p;
6e1aa848 3810 vec<tree> vec_oprnds = vNULL;
ebfd146a 3811 bool slp = (slp_node != NULL);
ebfd146a 3812 unsigned int vec_num;
a70d6342 3813 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3814 tree aggr_type;
a70d6342
IR
3815
3816 if (loop_vinfo)
3817 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3818
3819 /* Multiple types in SLP are handled by creating the appropriate number of
3820 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3821 case of SLP. */
437f4a00 3822 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3823 ncopies = 1;
3824 else
3825 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3826
3827 gcc_assert (ncopies >= 1);
3828
3829 /* FORNOW. This restriction should be relaxed. */
a70d6342 3830 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3831 {
73fbfcad 3832 if (dump_enabled_p ())
78c60e3d
SS
3833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3834 "multiple types in nested loop.");
ebfd146a
IR
3835 return false;
3836 }
3837
a70d6342 3838 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3839 return false;
3840
8644a673 3841 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3842 return false;
3843
3844 /* Is vectorizable store? */
3845
3846 if (!is_gimple_assign (stmt))
3847 return false;
3848
3849 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3850 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3851 && is_pattern_stmt_p (stmt_info))
3852 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a
IR
3853 if (TREE_CODE (scalar_dest) != ARRAY_REF
3854 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3855 && TREE_CODE (scalar_dest) != COMPONENT_REF
3856 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3857 && TREE_CODE (scalar_dest) != REALPART_EXPR
3858 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3859 return false;
3860
3861 gcc_assert (gimple_assign_single_p (stmt));
3862 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3863 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3864 &def, &dt))
ebfd146a 3865 {
73fbfcad 3866 if (dump_enabled_p ())
78c60e3d
SS
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3868 "use not simple.");
ebfd146a
IR
3869 return false;
3870 }
3871
272c6793 3872 elem_type = TREE_TYPE (vectype);
ebfd146a 3873 vec_mode = TYPE_MODE (vectype);
7b7b1813 3874
ebfd146a
IR
3875 /* FORNOW. In some cases can vectorize even if data-type not supported
3876 (e.g. - array initialization with 0). */
947131ba 3877 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3878 return false;
3879
3880 if (!STMT_VINFO_DATA_REF (stmt_info))
3881 return false;
3882
a7ce6ec3
RG
3883 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3884 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3885 size_zero_node) < 0)
a1e53f3f 3886 {
73fbfcad 3887 if (dump_enabled_p ())
78c60e3d
SS
3888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3889 "negative step for store.");
a1e53f3f
L
3890 return false;
3891 }
3892
0d0293ac 3893 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3894 {
0d0293ac 3895 grouped_store = true;
e14c1050 3896 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3897 if (!slp && !PURE_SLP_STMT (stmt_info))
3898 {
e14c1050 3899 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3900 if (vect_store_lanes_supported (vectype, group_size))
3901 store_lanes_p = true;
0d0293ac 3902 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3903 return false;
3904 }
b8698a0f 3905
ebfd146a
IR
3906 if (first_stmt == stmt)
3907 {
3908 /* STMT is the leader of the group. Check the operands of all the
3909 stmts of the group. */
e14c1050 3910 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3911 while (next_stmt)
3912 {
3913 gcc_assert (gimple_assign_single_p (next_stmt));
3914 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3915 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3916 &def_stmt, &def, &dt))
ebfd146a 3917 {
73fbfcad 3918 if (dump_enabled_p ())
78c60e3d
SS
3919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3920 "use not simple.");
ebfd146a
IR
3921 return false;
3922 }
e14c1050 3923 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3924 }
3925 }
3926 }
3927
3928 if (!vec_stmt) /* transformation not required. */
3929 {
3930 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
3931 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3932 NULL, NULL, NULL);
ebfd146a
IR
3933 return true;
3934 }
3935
3936 /** Transform. **/
3937
0d0293ac 3938 if (grouped_store)
ebfd146a
IR
3939 {
3940 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 3941 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 3942
e14c1050 3943 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
3944
3945 /* FORNOW */
a70d6342 3946 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3947
3948 /* We vectorize all the stmts of the interleaving group when we
3949 reach the last stmt in the group. */
e14c1050
IR
3950 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3951 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3952 && !slp)
3953 {
3954 *vec_stmt = NULL;
3955 return true;
3956 }
3957
3958 if (slp)
4b5caab7 3959 {
0d0293ac 3960 grouped_store = false;
4b5caab7
IR
3961 /* VEC_NUM is the number of vect stmts to be created for this
3962 group. */
3963 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 3964 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 3965 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 3966 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 3967 }
ebfd146a 3968 else
4b5caab7
IR
3969 /* VEC_NUM is the number of vect stmts to be created for this
3970 group. */
ebfd146a
IR
3971 vec_num = group_size;
3972 }
b8698a0f 3973 else
ebfd146a
IR
3974 {
3975 first_stmt = stmt;
3976 first_dr = dr;
3977 group_size = vec_num = 1;
ebfd146a 3978 }
b8698a0f 3979
73fbfcad 3980 if (dump_enabled_p ())
78c60e3d
SS
3981 dump_printf_loc (MSG_NOTE, vect_location,
3982 "transform store. ncopies = %d", ncopies);
ebfd146a 3983
9771b263
DN
3984 dr_chain.create (group_size);
3985 oprnds.create (group_size);
ebfd146a 3986
720f5239 3987 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 3988 gcc_assert (alignment_support_scheme);
272c6793
RS
3989 /* Targets with store-lane instructions must not require explicit
3990 realignment. */
3991 gcc_assert (!store_lanes_p
3992 || alignment_support_scheme == dr_aligned
3993 || alignment_support_scheme == dr_unaligned_supported);
3994
3995 if (store_lanes_p)
3996 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3997 else
3998 aggr_type = vectype;
ebfd146a
IR
3999
4000 /* In case the vectorization factor (VF) is bigger than the number
4001 of elements that we can fit in a vectype (nunits), we have to generate
4002 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4003 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4004 vect_get_vec_def_for_copy_stmt. */
4005
0d0293ac 4006 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4007
4008 S1: &base + 2 = x2
4009 S2: &base = x0
4010 S3: &base + 1 = x1
4011 S4: &base + 3 = x3
4012
4013 We create vectorized stores starting from base address (the access of the
4014 first stmt in the chain (S2 in the above example), when the last store stmt
4015 of the chain (S4) is reached:
4016
4017 VS1: &base = vx2
4018 VS2: &base + vec_size*1 = vx0
4019 VS3: &base + vec_size*2 = vx1
4020 VS4: &base + vec_size*3 = vx3
4021
4022 Then permutation statements are generated:
4023
3fcc1b55
JJ
4024 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4025 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4026 ...
b8698a0f 4027
ebfd146a
IR
4028 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4029 (the order of the data-refs in the output of vect_permute_store_chain
4030 corresponds to the order of scalar stmts in the interleaving chain - see
4031 the documentation of vect_permute_store_chain()).
4032
4033 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4034 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4035 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4036 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4037 */
4038
4039 prev_stmt_info = NULL;
4040 for (j = 0; j < ncopies; j++)
4041 {
4042 gimple new_stmt;
4043 gimple ptr_incr;
4044
4045 if (j == 0)
4046 {
4047 if (slp)
4048 {
4049 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4050 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4051 NULL, slp_node, -1);
ebfd146a 4052
9771b263 4053 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4054 }
4055 else
4056 {
b8698a0f
L
4057 /* For interleaved stores we collect vectorized defs for all the
4058 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4059 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4060 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4061
0d0293ac 4062 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4063 OPRNDS are of size 1. */
b8698a0f 4064 next_stmt = first_stmt;
ebfd146a
IR
4065 for (i = 0; i < group_size; i++)
4066 {
b8698a0f
L
4067 /* Since gaps are not supported for interleaved stores,
4068 GROUP_SIZE is the exact number of stmts in the chain.
4069 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4070 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4071 iteration of the loop will be executed. */
4072 gcc_assert (next_stmt
4073 && gimple_assign_single_p (next_stmt));
4074 op = gimple_assign_rhs1 (next_stmt);
4075
b8698a0f 4076 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4077 NULL);
9771b263
DN
4078 dr_chain.quick_push (vec_oprnd);
4079 oprnds.quick_push (vec_oprnd);
e14c1050 4080 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4081 }
4082 }
4083
4084 /* We should have catched mismatched types earlier. */
4085 gcc_assert (useless_type_conversion_p (vectype,
4086 TREE_TYPE (vec_oprnd)));
272c6793 4087 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
920e8172
RS
4088 NULL_TREE, &dummy, gsi,
4089 &ptr_incr, false, &inv_p);
a70d6342 4090 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4091 }
b8698a0f 4092 else
ebfd146a 4093 {
b8698a0f
L
4094 /* For interleaved stores we created vectorized defs for all the
4095 defs stored in OPRNDS in the previous iteration (previous copy).
4096 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4097 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4098 next copy.
0d0293ac 4099 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4100 OPRNDS are of size 1. */
4101 for (i = 0; i < group_size; i++)
4102 {
9771b263 4103 op = oprnds[i];
24ee1384
IR
4104 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4105 &def, &dt);
b8698a0f 4106 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4107 dr_chain[i] = vec_oprnd;
4108 oprnds[i] = vec_oprnd;
ebfd146a 4109 }
272c6793
RS
4110 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4111 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4112 }
4113
272c6793 4114 if (store_lanes_p)
ebfd146a 4115 {
272c6793 4116 tree vec_array;
267d3070 4117
272c6793
RS
4118 /* Combine all the vectors into an array. */
4119 vec_array = create_vector_array (vectype, vec_num);
4120 for (i = 0; i < vec_num; i++)
c2d7ab2a 4121 {
9771b263 4122 vec_oprnd = dr_chain[i];
272c6793 4123 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4124 }
b8698a0f 4125
272c6793
RS
4126 /* Emit:
4127 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4128 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4129 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4130 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4131 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4132 }
4133 else
4134 {
4135 new_stmt = NULL;
0d0293ac 4136 if (grouped_store)
272c6793 4137 {
b6b9227d
JJ
4138 if (j == 0)
4139 result_chain.create (group_size);
272c6793
RS
4140 /* Permute. */
4141 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4142 &result_chain);
4143 }
c2d7ab2a 4144
272c6793
RS
4145 next_stmt = first_stmt;
4146 for (i = 0; i < vec_num; i++)
4147 {
644ffefd 4148 unsigned align, misalign;
272c6793
RS
4149
4150 if (i > 0)
4151 /* Bump the vector pointer. */
4152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4153 stmt, NULL_TREE);
4154
4155 if (slp)
9771b263 4156 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4157 else if (grouped_store)
4158 /* For grouped stores vectorized defs are interleaved in
272c6793 4159 vect_permute_store_chain(). */
9771b263 4160 vec_oprnd = result_chain[i];
272c6793
RS
4161
4162 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4163 build_int_cst (reference_alias_ptr_type
4164 (DR_REF (first_dr)), 0));
644ffefd 4165 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4166 if (aligned_access_p (first_dr))
644ffefd 4167 misalign = 0;
272c6793
RS
4168 else if (DR_MISALIGNMENT (first_dr) == -1)
4169 {
4170 TREE_TYPE (data_ref)
4171 = build_aligned_type (TREE_TYPE (data_ref),
4172 TYPE_ALIGN (elem_type));
644ffefd
MJ
4173 align = TYPE_ALIGN_UNIT (elem_type);
4174 misalign = 0;
272c6793
RS
4175 }
4176 else
4177 {
4178 TREE_TYPE (data_ref)
4179 = build_aligned_type (TREE_TYPE (data_ref),
4180 TYPE_ALIGN (elem_type));
644ffefd 4181 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4182 }
644ffefd
MJ
4183 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4184 misalign);
c2d7ab2a 4185
272c6793
RS
4186 /* Arguments are ready. Create the new vector stmt. */
4187 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4189
4190 if (slp)
4191 continue;
4192
e14c1050 4193 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4194 if (!next_stmt)
4195 break;
4196 }
ebfd146a 4197 }
1da0876c
RS
4198 if (!slp)
4199 {
4200 if (j == 0)
4201 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4202 else
4203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4204 prev_stmt_info = vinfo_for_stmt (new_stmt);
4205 }
ebfd146a
IR
4206 }
4207
9771b263
DN
4208 dr_chain.release ();
4209 oprnds.release ();
4210 result_chain.release ();
4211 vec_oprnds.release ();
ebfd146a
IR
4212
4213 return true;
4214}
4215
aec7ae7d
JJ
4216/* Given a vector type VECTYPE and permutation SEL returns
4217 the VECTOR_CST mask that implements the permutation of the
4218 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4219
3fcc1b55
JJ
4220tree
4221vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4222{
d2a12ae7 4223 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4224 int i, nunits;
a1e53f3f 4225
22e4dee7 4226 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4227
4228 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4229 return NULL;
4230
96f9265a
RG
4231 mask_elt_type = lang_hooks.types.type_for_mode
4232 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4233 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4234
d2a12ae7 4235 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4236 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4237 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4238 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4239
2635892a 4240 return mask_vec;
a1e53f3f
L
4241}
4242
aec7ae7d
JJ
4243/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4244 reversal of the vector elements. If that is impossible to do,
4245 returns NULL. */
4246
4247static tree
4248perm_mask_for_reverse (tree vectype)
4249{
4250 int i, nunits;
4251 unsigned char *sel;
4252
4253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4254 sel = XALLOCAVEC (unsigned char, nunits);
4255
4256 for (i = 0; i < nunits; ++i)
4257 sel[i] = nunits - 1 - i;
4258
3fcc1b55 4259 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4260}
4261
4262/* Given a vector variable X and Y, that was generated for the scalar
4263 STMT, generate instructions to permute the vector elements of X and Y
4264 using permutation mask MASK_VEC, insert them at *GSI and return the
4265 permuted vector variable. */
a1e53f3f
L
4266
4267static tree
aec7ae7d
JJ
4268permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4269 gimple_stmt_iterator *gsi)
a1e53f3f
L
4270{
4271 tree vectype = TREE_TYPE (x);
aec7ae7d 4272 tree perm_dest, data_ref;
a1e53f3f
L
4273 gimple perm_stmt;
4274
a1e53f3f 4275 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4276 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4277
4278 /* Generate the permute statement. */
73804b12
RG
4279 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4280 x, y, mask_vec);
a1e53f3f
L
4281 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4282
4283 return data_ref;
4284}
4285
ebfd146a
IR
4286/* vectorizable_load.
4287
b8698a0f
L
4288 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4289 can be vectorized.
4290 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4291 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4292 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4293
4294static bool
4295vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4296 slp_tree slp_node, slp_instance slp_node_instance)
4297{
4298 tree scalar_dest;
4299 tree vec_dest = NULL;
4300 tree data_ref = NULL;
4301 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4302 stmt_vec_info prev_stmt_info;
ebfd146a 4303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4304 struct loop *loop = NULL;
ebfd146a 4305 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4306 bool nested_in_vect_loop = false;
ebfd146a
IR
4307 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4308 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4309 tree elem_type;
ebfd146a 4310 tree new_temp;
947131ba 4311 enum machine_mode mode;
ebfd146a
IR
4312 gimple new_stmt = NULL;
4313 tree dummy;
4314 enum dr_alignment_support alignment_support_scheme;
4315 tree dataref_ptr = NULL_TREE;
4316 gimple ptr_incr;
4317 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4318 int ncopies;
4319 int i, j, group_size;
4320 tree msq = NULL_TREE, lsq;
4321 tree offset = NULL_TREE;
4322 tree realignment_token = NULL_TREE;
4323 gimple phi = NULL;
6e1aa848 4324 vec<tree> dr_chain = vNULL;
0d0293ac 4325 bool grouped_load = false;
272c6793 4326 bool load_lanes_p = false;
ebfd146a 4327 gimple first_stmt;
ebfd146a 4328 bool inv_p;
319e6439 4329 bool negative = false;
ebfd146a
IR
4330 bool compute_in_loop = false;
4331 struct loop *at_loop;
4332 int vec_num;
4333 bool slp = (slp_node != NULL);
4334 bool slp_perm = false;
4335 enum tree_code code;
a70d6342
IR
4336 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4337 int vf;
272c6793 4338 tree aggr_type;
aec7ae7d
JJ
4339 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4340 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4341 int gather_scale = 1;
4342 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4343
4344 if (loop_vinfo)
4345 {
4346 loop = LOOP_VINFO_LOOP (loop_vinfo);
4347 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4348 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4349 }
4350 else
3533e503 4351 vf = 1;
ebfd146a
IR
4352
4353 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4354 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4355 case of SLP. */
437f4a00 4356 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4357 ncopies = 1;
4358 else
4359 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4360
4361 gcc_assert (ncopies >= 1);
4362
4363 /* FORNOW. This restriction should be relaxed. */
4364 if (nested_in_vect_loop && ncopies > 1)
4365 {
73fbfcad 4366 if (dump_enabled_p ())
78c60e3d
SS
4367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4368 "multiple types in nested loop.");
ebfd146a
IR
4369 return false;
4370 }
4371
a70d6342 4372 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4373 return false;
4374
8644a673 4375 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4376 return false;
4377
4378 /* Is vectorizable load? */
4379 if (!is_gimple_assign (stmt))
4380 return false;
4381
4382 scalar_dest = gimple_assign_lhs (stmt);
4383 if (TREE_CODE (scalar_dest) != SSA_NAME)
4384 return false;
4385
4386 code = gimple_assign_rhs_code (stmt);
4387 if (code != ARRAY_REF
4388 && code != INDIRECT_REF
e9dbe7bb
IR
4389 && code != COMPONENT_REF
4390 && code != IMAGPART_EXPR
70f34814 4391 && code != REALPART_EXPR
42373e0b
RG
4392 && code != MEM_REF
4393 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4394 return false;
4395
4396 if (!STMT_VINFO_DATA_REF (stmt_info))
4397 return false;
4398
7b7b1813 4399 elem_type = TREE_TYPE (vectype);
947131ba 4400 mode = TYPE_MODE (vectype);
ebfd146a
IR
4401
4402 /* FORNOW. In some cases can vectorize even if data-type not supported
4403 (e.g. - data copies). */
947131ba 4404 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4405 {
73fbfcad 4406 if (dump_enabled_p ())
78c60e3d
SS
4407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4408 "Aligned load, but unsupported type.");
ebfd146a
IR
4409 return false;
4410 }
4411
ebfd146a 4412 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4413 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4414 {
0d0293ac 4415 grouped_load = true;
ebfd146a 4416 /* FORNOW */
aec7ae7d 4417 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4418
e14c1050 4419 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4420 if (!slp && !PURE_SLP_STMT (stmt_info))
4421 {
e14c1050 4422 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4423 if (vect_load_lanes_supported (vectype, group_size))
4424 load_lanes_p = true;
0d0293ac 4425 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4426 return false;
4427 }
ebfd146a
IR
4428 }
4429
a1e53f3f 4430
aec7ae7d
JJ
4431 if (STMT_VINFO_GATHER_P (stmt_info))
4432 {
4433 gimple def_stmt;
4434 tree def;
4435 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4436 &gather_off, &gather_scale);
4437 gcc_assert (gather_decl);
24ee1384 4438 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4439 &def_stmt, &def, &gather_dt,
4440 &gather_off_vectype))
4441 {
73fbfcad 4442 if (dump_enabled_p ())
78c60e3d
SS
4443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4444 "gather index use not simple.");
aec7ae7d
JJ
4445 return false;
4446 }
4447 }
7d75abc8 4448 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4449 ;
319e6439
RG
4450 else
4451 {
4452 negative = tree_int_cst_compare (nested_in_vect_loop
4453 ? STMT_VINFO_DR_STEP (stmt_info)
4454 : DR_STEP (dr),
4455 size_zero_node) < 0;
4456 if (negative && ncopies > 1)
4457 {
73fbfcad 4458 if (dump_enabled_p ())
78c60e3d
SS
4459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4460 "multiple types with negative step.");
319e6439
RG
4461 return false;
4462 }
4463
4464 if (negative)
4465 {
4466 gcc_assert (!grouped_load);
4467 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4468 if (alignment_support_scheme != dr_aligned
4469 && alignment_support_scheme != dr_unaligned_supported)
4470 {
73fbfcad 4471 if (dump_enabled_p ())
78c60e3d
SS
4472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4473 "negative step but alignment required.");
319e6439
RG
4474 return false;
4475 }
4476 if (!perm_mask_for_reverse (vectype))
4477 {
73fbfcad 4478 if (dump_enabled_p ())
78c60e3d
SS
4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4480 "negative step and reversing not supported.");
319e6439
RG
4481 return false;
4482 }
4483 }
7d75abc8 4484 }
aec7ae7d 4485
ebfd146a
IR
4486 if (!vec_stmt) /* transformation not required. */
4487 {
4488 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4489 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4490 return true;
4491 }
4492
73fbfcad 4493 if (dump_enabled_p ())
78c60e3d
SS
4494 dump_printf_loc (MSG_NOTE, vect_location,
4495 "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4496
4497 /** Transform. **/
4498
aec7ae7d
JJ
4499 if (STMT_VINFO_GATHER_P (stmt_info))
4500 {
4501 tree vec_oprnd0 = NULL_TREE, op;
4502 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4503 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4504 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4505 edge pe = loop_preheader_edge (loop);
4506 gimple_seq seq;
4507 basic_block new_bb;
4508 enum { NARROW, NONE, WIDEN } modifier;
4509 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4510
4511 if (nunits == gather_off_nunits)
4512 modifier = NONE;
4513 else if (nunits == gather_off_nunits / 2)
4514 {
4515 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4516 modifier = WIDEN;
4517
4518 for (i = 0; i < gather_off_nunits; ++i)
4519 sel[i] = i | nunits;
4520
3fcc1b55 4521 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4522 gcc_assert (perm_mask != NULL_TREE);
4523 }
4524 else if (nunits == gather_off_nunits * 2)
4525 {
4526 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4527 modifier = NARROW;
4528
4529 for (i = 0; i < nunits; ++i)
4530 sel[i] = i < gather_off_nunits
4531 ? i : i + nunits - gather_off_nunits;
4532
3fcc1b55 4533 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4534 gcc_assert (perm_mask != NULL_TREE);
4535 ncopies *= 2;
4536 }
4537 else
4538 gcc_unreachable ();
4539
4540 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4541 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4542 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4543 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4544 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4545 scaletype = TREE_VALUE (arglist);
4546 gcc_checking_assert (types_compatible_p (srctype, rettype)
4547 && types_compatible_p (srctype, masktype));
4548
4549 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4550
4551 ptr = fold_convert (ptrtype, gather_base);
4552 if (!is_gimple_min_invariant (ptr))
4553 {
4554 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4555 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4556 gcc_assert (!new_bb);
4557 }
4558
4559 /* Currently we support only unconditional gather loads,
4560 so mask should be all ones. */
4561 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4562 mask = build_int_cst (TREE_TYPE (masktype), -1);
4563 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4564 {
4565 REAL_VALUE_TYPE r;
4566 long tmp[6];
4567 for (j = 0; j < 6; ++j)
4568 tmp[j] = -1;
4569 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4570 mask = build_real (TREE_TYPE (masktype), r);
4571 }
4572 else
4573 gcc_unreachable ();
4574 mask = build_vector_from_val (masktype, mask);
4575 mask = vect_init_vector (stmt, mask, masktype, NULL);
4576
4577 scale = build_int_cst (scaletype, gather_scale);
4578
4579 prev_stmt_info = NULL;
4580 for (j = 0; j < ncopies; ++j)
4581 {
4582 if (modifier == WIDEN && (j & 1))
4583 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4584 perm_mask, stmt, gsi);
4585 else if (j == 0)
4586 op = vec_oprnd0
4587 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4588 else
4589 op = vec_oprnd0
4590 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4591
4592 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4593 {
4594 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4595 == TYPE_VECTOR_SUBPARTS (idxtype));
4596 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4597 var = make_ssa_name (var, NULL);
4598 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4599 new_stmt
4600 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4601 op, NULL_TREE);
4602 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4603 op = var;
4604 }
4605
4606 new_stmt
4607 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4608
4609 if (!useless_type_conversion_p (vectype, rettype))
4610 {
4611 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4612 == TYPE_VECTOR_SUBPARTS (rettype));
4613 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4614 op = make_ssa_name (var, new_stmt);
4615 gimple_call_set_lhs (new_stmt, op);
4616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4617 var = make_ssa_name (vec_dest, NULL);
4618 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4619 new_stmt
4620 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4621 NULL_TREE);
4622 }
4623 else
4624 {
4625 var = make_ssa_name (vec_dest, new_stmt);
4626 gimple_call_set_lhs (new_stmt, var);
4627 }
4628
4629 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4630
4631 if (modifier == NARROW)
4632 {
4633 if ((j & 1) == 0)
4634 {
4635 prev_res = var;
4636 continue;
4637 }
4638 var = permute_vec_elements (prev_res, var,
4639 perm_mask, stmt, gsi);
4640 new_stmt = SSA_NAME_DEF_STMT (var);
4641 }
4642
4643 if (prev_stmt_info == NULL)
4644 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4645 else
4646 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4647 prev_stmt_info = vinfo_for_stmt (new_stmt);
4648 }
4649 return true;
4650 }
7d75abc8
MM
4651 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4652 {
4653 gimple_stmt_iterator incr_gsi;
4654 bool insert_after;
4655 gimple incr;
4656 tree offvar;
7d75abc8
MM
4657 tree ivstep;
4658 tree running_off;
9771b263 4659 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4660 gimple_seq stmts = NULL;
14ac6aa2
RB
4661 tree stride_base, stride_step, alias_off;
4662
4663 gcc_assert (!nested_in_vect_loop);
7d75abc8 4664
14ac6aa2
RB
4665 stride_base
4666 = fold_build_pointer_plus
4667 (unshare_expr (DR_BASE_ADDRESS (dr)),
4668 size_binop (PLUS_EXPR,
4669 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4670 convert_to_ptrofftype (DR_INIT(dr))));
4671 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4672
4673 /* For a load with loop-invariant (but other than power-of-2)
4674 stride (i.e. not a grouped access) like so:
4675
4676 for (i = 0; i < n; i += stride)
4677 ... = array[i];
4678
4679 we generate a new induction variable and new accesses to
4680 form a new vector (or vectors, depending on ncopies):
4681
4682 for (j = 0; ; j += VF*stride)
4683 tmp1 = array[j];
4684 tmp2 = array[j + stride];
4685 ...
4686 vectemp = {tmp1, tmp2, ...}
4687 */
4688
4689 ivstep = stride_step;
4690 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4691 build_int_cst (TREE_TYPE (ivstep), vf));
4692
4693 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4694
4695 create_iv (stride_base, ivstep, NULL,
4696 loop, &incr_gsi, insert_after,
4697 &offvar, NULL);
4698 incr = gsi_stmt (incr_gsi);
4699 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4700
4701 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4702 if (stmts)
4703 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4704
4705 prev_stmt_info = NULL;
4706 running_off = offvar;
14ac6aa2 4707 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4708 for (j = 0; j < ncopies; j++)
4709 {
4710 tree vec_inv;
4711
9771b263 4712 vec_alloc (v, nunits);
7d75abc8
MM
4713 for (i = 0; i < nunits; i++)
4714 {
4715 tree newref, newoff;
4716 gimple incr;
14ac6aa2
RB
4717 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4718 running_off, alias_off);
7d75abc8
MM
4719
4720 newref = force_gimple_operand_gsi (gsi, newref, true,
4721 NULL_TREE, true,
4722 GSI_SAME_STMT);
4723 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4724 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4725 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4726 running_off, stride_step);
7d75abc8
MM
4727 vect_finish_stmt_generation (stmt, incr, gsi);
4728
4729 running_off = newoff;
4730 }
4731
4732 vec_inv = build_constructor (vectype, v);
4733 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4734 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4735
4736 if (j == 0)
4737 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4738 else
4739 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4740 prev_stmt_info = vinfo_for_stmt (new_stmt);
4741 }
4742 return true;
4743 }
aec7ae7d 4744
0d0293ac 4745 if (grouped_load)
ebfd146a 4746 {
e14c1050 4747 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4748 if (slp
9771b263
DN
4749 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4750 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4751 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4752
ebfd146a
IR
4753 /* Check if the chain of loads is already vectorized. */
4754 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4755 {
4756 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4757 return true;
4758 }
4759 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4760 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4761
4762 /* VEC_NUM is the number of vect stmts to be created for this group. */
4763 if (slp)
4764 {
0d0293ac 4765 grouped_load = false;
ebfd146a 4766 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4767 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
a70d6342
IR
4768 slp_perm = true;
4769 }
ebfd146a
IR
4770 else
4771 vec_num = group_size;
ebfd146a
IR
4772 }
4773 else
4774 {
4775 first_stmt = stmt;
4776 first_dr = dr;
4777 group_size = vec_num = 1;
4778 }
4779
720f5239 4780 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4781 gcc_assert (alignment_support_scheme);
272c6793
RS
4782 /* Targets with load-lane instructions must not require explicit
4783 realignment. */
4784 gcc_assert (!load_lanes_p
4785 || alignment_support_scheme == dr_aligned
4786 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4787
4788 /* In case the vectorization factor (VF) is bigger than the number
4789 of elements that we can fit in a vectype (nunits), we have to generate
4790 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4791 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4792 from one copy of the vector stmt to the next, in the field
ff802fa1 4793 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4794 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4795 stmts that use the defs of the current stmt. The example below
4796 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4797 need to create 4 vectorized stmts):
ebfd146a
IR
4798
4799 before vectorization:
4800 RELATED_STMT VEC_STMT
4801 S1: x = memref - -
4802 S2: z = x + 1 - -
4803
4804 step 1: vectorize stmt S1:
4805 We first create the vector stmt VS1_0, and, as usual, record a
4806 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4807 Next, we create the vector stmt VS1_1, and record a pointer to
4808 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4809 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4810 stmts and pointers:
4811 RELATED_STMT VEC_STMT
4812 VS1_0: vx0 = memref0 VS1_1 -
4813 VS1_1: vx1 = memref1 VS1_2 -
4814 VS1_2: vx2 = memref2 VS1_3 -
4815 VS1_3: vx3 = memref3 - -
4816 S1: x = load - VS1_0
4817 S2: z = x + 1 - -
4818
b8698a0f
L
4819 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4820 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4821 stmt S2. */
4822
0d0293ac 4823 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4824
4825 S1: x2 = &base + 2
4826 S2: x0 = &base
4827 S3: x1 = &base + 1
4828 S4: x3 = &base + 3
4829
b8698a0f 4830 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4831 starting from the access of the first stmt of the chain:
4832
4833 VS1: vx0 = &base
4834 VS2: vx1 = &base + vec_size*1
4835 VS3: vx3 = &base + vec_size*2
4836 VS4: vx4 = &base + vec_size*3
4837
4838 Then permutation statements are generated:
4839
e2c83630
RH
4840 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4841 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4842 ...
4843
4844 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4845 (the order of the data-refs in the output of vect_permute_load_chain
4846 corresponds to the order of scalar stmts in the interleaving chain - see
4847 the documentation of vect_permute_load_chain()).
4848 The generation of permutation stmts and recording them in
0d0293ac 4849 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4850
b8698a0f 4851 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4852 permutation stmts above are created for every copy. The result vector
4853 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4854 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4855
4856 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4857 on a target that supports unaligned accesses (dr_unaligned_supported)
4858 we generate the following code:
4859 p = initial_addr;
4860 indx = 0;
4861 loop {
4862 p = p + indx * vectype_size;
4863 vec_dest = *(p);
4864 indx = indx + 1;
4865 }
4866
4867 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4868 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4869 then generate the following code, in which the data in each iteration is
4870 obtained by two vector loads, one from the previous iteration, and one
4871 from the current iteration:
4872 p1 = initial_addr;
4873 msq_init = *(floor(p1))
4874 p2 = initial_addr + VS - 1;
4875 realignment_token = call target_builtin;
4876 indx = 0;
4877 loop {
4878 p2 = p2 + indx * vectype_size
4879 lsq = *(floor(p2))
4880 vec_dest = realign_load (msq, lsq, realignment_token)
4881 indx = indx + 1;
4882 msq = lsq;
4883 } */
4884
4885 /* If the misalignment remains the same throughout the execution of the
4886 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4887 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4888 This can only occur when vectorizing memory accesses in the inner-loop
4889 nested within an outer-loop that is being vectorized. */
4890
d1e4b493 4891 if (nested_in_vect_loop
211bea38 4892 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
4893 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4894 {
4895 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4896 compute_in_loop = true;
4897 }
4898
4899 if ((alignment_support_scheme == dr_explicit_realign_optimized
4900 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 4901 && !compute_in_loop)
ebfd146a
IR
4902 {
4903 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4904 alignment_support_scheme, NULL_TREE,
4905 &at_loop);
4906 if (alignment_support_scheme == dr_explicit_realign_optimized)
4907 {
4908 phi = SSA_NAME_DEF_STMT (msq);
4909 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4910 }
4911 }
4912 else
4913 at_loop = loop;
4914
a1e53f3f
L
4915 if (negative)
4916 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4917
272c6793
RS
4918 if (load_lanes_p)
4919 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4920 else
4921 aggr_type = vectype;
4922
ebfd146a
IR
4923 prev_stmt_info = NULL;
4924 for (j = 0; j < ncopies; j++)
b8698a0f 4925 {
272c6793 4926 /* 1. Create the vector or array pointer update chain. */
ebfd146a 4927 if (j == 0)
272c6793 4928 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
920e8172
RS
4929 offset, &dummy, gsi,
4930 &ptr_incr, false, &inv_p);
ebfd146a 4931 else
272c6793
RS
4932 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4933 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 4934
0d0293ac 4935 if (grouped_load || slp_perm)
9771b263 4936 dr_chain.create (vec_num);
5ce1ee7f 4937
272c6793 4938 if (load_lanes_p)
ebfd146a 4939 {
272c6793
RS
4940 tree vec_array;
4941
4942 vec_array = create_vector_array (vectype, vec_num);
4943
4944 /* Emit:
4945 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4946 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4947 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4948 gimple_call_set_lhs (new_stmt, vec_array);
4949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 4950
272c6793
RS
4951 /* Extract each vector into an SSA_NAME. */
4952 for (i = 0; i < vec_num; i++)
ebfd146a 4953 {
272c6793
RS
4954 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4955 vec_array, i);
9771b263 4956 dr_chain.quick_push (new_temp);
272c6793
RS
4957 }
4958
4959 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 4960 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
4961 }
4962 else
4963 {
4964 for (i = 0; i < vec_num; i++)
4965 {
4966 if (i > 0)
4967 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4968 stmt, NULL_TREE);
4969
4970 /* 2. Create the vector-load in the loop. */
4971 switch (alignment_support_scheme)
4972 {
4973 case dr_aligned:
4974 case dr_unaligned_supported:
be1ac4ec 4975 {
644ffefd
MJ
4976 unsigned int align, misalign;
4977
272c6793
RS
4978 data_ref
4979 = build2 (MEM_REF, vectype, dataref_ptr,
4980 build_int_cst (reference_alias_ptr_type
4981 (DR_REF (first_dr)), 0));
644ffefd 4982 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
4983 if (alignment_support_scheme == dr_aligned)
4984 {
4985 gcc_assert (aligned_access_p (first_dr));
644ffefd 4986 misalign = 0;
272c6793
RS
4987 }
4988 else if (DR_MISALIGNMENT (first_dr) == -1)
4989 {
4990 TREE_TYPE (data_ref)
4991 = build_aligned_type (TREE_TYPE (data_ref),
4992 TYPE_ALIGN (elem_type));
644ffefd
MJ
4993 align = TYPE_ALIGN_UNIT (elem_type);
4994 misalign = 0;
272c6793
RS
4995 }
4996 else
4997 {
4998 TREE_TYPE (data_ref)
4999 = build_aligned_type (TREE_TYPE (data_ref),
5000 TYPE_ALIGN (elem_type));
644ffefd 5001 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5002 }
644ffefd
MJ
5003 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5004 align, misalign);
272c6793 5005 break;
be1ac4ec 5006 }
272c6793 5007 case dr_explicit_realign:
267d3070 5008 {
272c6793
RS
5009 tree ptr, bump;
5010 tree vs_minus_1;
5011
5012 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5013
5014 if (compute_in_loop)
5015 msq = vect_setup_realignment (first_stmt, gsi,
5016 &realignment_token,
5017 dr_explicit_realign,
5018 dataref_ptr, NULL);
5019
070ecdfd 5020 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5021 new_stmt = gimple_build_assign_with_ops
070ecdfd 5022 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5023 build_int_cst
5024 (TREE_TYPE (dataref_ptr),
5025 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5027 data_ref
5028 = build2 (MEM_REF, vectype, ptr,
5029 build_int_cst (reference_alias_ptr_type
5030 (DR_REF (first_dr)), 0));
5031 vec_dest = vect_create_destination_var (scalar_dest,
5032 vectype);
5033 new_stmt = gimple_build_assign (vec_dest, data_ref);
5034 new_temp = make_ssa_name (vec_dest, new_stmt);
5035 gimple_assign_set_lhs (new_stmt, new_temp);
5036 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5037 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5039 msq = new_temp;
5040
5041 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5042 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5043 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5044 new_stmt = gimple_build_assign_with_ops
5045 (BIT_AND_EXPR, NULL_TREE, ptr,
5046 build_int_cst
5047 (TREE_TYPE (ptr),
5048 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5049 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5050 gimple_assign_set_lhs (new_stmt, ptr);
5051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5052 data_ref
5053 = build2 (MEM_REF, vectype, ptr,
5054 build_int_cst (reference_alias_ptr_type
5055 (DR_REF (first_dr)), 0));
5056 break;
267d3070 5057 }
272c6793 5058 case dr_explicit_realign_optimized:
070ecdfd 5059 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5060 new_stmt = gimple_build_assign_with_ops
070ecdfd 5061 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5062 build_int_cst
5063 (TREE_TYPE (dataref_ptr),
5064 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5066 data_ref
5067 = build2 (MEM_REF, vectype, new_temp,
5068 build_int_cst (reference_alias_ptr_type
5069 (DR_REF (first_dr)), 0));
5070 break;
5071 default:
5072 gcc_unreachable ();
5073 }
ebfd146a 5074 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5075 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5076 new_temp = make_ssa_name (vec_dest, new_stmt);
5077 gimple_assign_set_lhs (new_stmt, new_temp);
5078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5079
272c6793
RS
5080 /* 3. Handle explicit realignment if necessary/supported.
5081 Create in loop:
5082 vec_dest = realign_load (msq, lsq, realignment_token) */
5083 if (alignment_support_scheme == dr_explicit_realign_optimized
5084 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5085 {
272c6793
RS
5086 lsq = gimple_assign_lhs (new_stmt);
5087 if (!realignment_token)
5088 realignment_token = dataref_ptr;
5089 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5090 new_stmt
73804b12
RG
5091 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5092 vec_dest, msq, lsq,
5093 realignment_token);
272c6793
RS
5094 new_temp = make_ssa_name (vec_dest, new_stmt);
5095 gimple_assign_set_lhs (new_stmt, new_temp);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5097
5098 if (alignment_support_scheme == dr_explicit_realign_optimized)
5099 {
5100 gcc_assert (phi);
5101 if (i == vec_num - 1 && j == ncopies - 1)
5102 add_phi_arg (phi, lsq,
5103 loop_latch_edge (containing_loop),
9e227d60 5104 UNKNOWN_LOCATION);
272c6793
RS
5105 msq = lsq;
5106 }
ebfd146a 5107 }
ebfd146a 5108
59fd17e3
RB
5109 /* 4. Handle invariant-load. */
5110 if (inv_p && !bb_vinfo)
5111 {
5112 gimple_stmt_iterator gsi2 = *gsi;
5113 gcc_assert (!grouped_load);
5114 gsi_next (&gsi2);
5115 new_temp = vect_init_vector (stmt, scalar_dest,
5116 vectype, &gsi2);
5117 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5118 }
5119
272c6793
RS
5120 if (negative)
5121 {
aec7ae7d
JJ
5122 tree perm_mask = perm_mask_for_reverse (vectype);
5123 new_temp = permute_vec_elements (new_temp, new_temp,
5124 perm_mask, stmt, gsi);
ebfd146a
IR
5125 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5126 }
267d3070 5127
272c6793 5128 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5129 vect_transform_grouped_load (). */
5130 if (grouped_load || slp_perm)
9771b263 5131 dr_chain.quick_push (new_temp);
267d3070 5132
272c6793
RS
5133 /* Store vector loads in the corresponding SLP_NODE. */
5134 if (slp && !slp_perm)
9771b263 5135 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5136 }
ebfd146a
IR
5137 }
5138
5139 if (slp && !slp_perm)
5140 continue;
5141
5142 if (slp_perm)
5143 {
a70d6342 5144 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
5145 slp_node_instance, false))
5146 {
9771b263 5147 dr_chain.release ();
ebfd146a
IR
5148 return false;
5149 }
5150 }
5151 else
5152 {
0d0293ac 5153 if (grouped_load)
ebfd146a 5154 {
272c6793 5155 if (!load_lanes_p)
0d0293ac 5156 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5157 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5158 }
5159 else
5160 {
5161 if (j == 0)
5162 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5163 else
5164 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5165 prev_stmt_info = vinfo_for_stmt (new_stmt);
5166 }
5167 }
9771b263 5168 dr_chain.release ();
ebfd146a
IR
5169 }
5170
ebfd146a
IR
5171 return true;
5172}
5173
5174/* Function vect_is_simple_cond.
b8698a0f 5175
ebfd146a
IR
5176 Input:
5177 LOOP - the loop that is being vectorized.
5178 COND - Condition that is checked for simple use.
5179
e9e1d143
RG
5180 Output:
5181 *COMP_VECTYPE - the vector type for the comparison.
5182
ebfd146a
IR
5183 Returns whether a COND can be vectorized. Checks whether
5184 condition operands are supportable using vec_is_simple_use. */
5185
87aab9b2 5186static bool
24ee1384
IR
5187vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5188 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5189{
5190 tree lhs, rhs;
5191 tree def;
5192 enum vect_def_type dt;
e9e1d143 5193 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5194
5195 if (!COMPARISON_CLASS_P (cond))
5196 return false;
5197
5198 lhs = TREE_OPERAND (cond, 0);
5199 rhs = TREE_OPERAND (cond, 1);
5200
5201 if (TREE_CODE (lhs) == SSA_NAME)
5202 {
5203 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5204 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5205 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5206 return false;
5207 }
5208 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5209 && TREE_CODE (lhs) != FIXED_CST)
5210 return false;
5211
5212 if (TREE_CODE (rhs) == SSA_NAME)
5213 {
5214 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5215 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5216 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5217 return false;
5218 }
f7e531cf 5219 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5220 && TREE_CODE (rhs) != FIXED_CST)
5221 return false;
5222
e9e1d143 5223 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5224 return true;
5225}
5226
5227/* vectorizable_condition.
5228
b8698a0f
L
5229 Check if STMT is conditional modify expression that can be vectorized.
5230 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5231 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5232 at GSI.
5233
5234 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5235 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5236 else caluse if it is 2).
ebfd146a
IR
5237
5238 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5239
4bbe8262 5240bool
ebfd146a 5241vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5242 gimple *vec_stmt, tree reduc_def, int reduc_index,
5243 slp_tree slp_node)
ebfd146a
IR
5244{
5245 tree scalar_dest = NULL_TREE;
5246 tree vec_dest = NULL_TREE;
ebfd146a
IR
5247 tree cond_expr, then_clause, else_clause;
5248 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5250 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5251 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5252 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5253 tree vec_compare, vec_cond_expr;
5254 tree new_temp;
5255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5256 tree def;
a855b1b1 5257 enum vect_def_type dt, dts[4];
ebfd146a 5258 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5259 int ncopies;
ebfd146a 5260 enum tree_code code;
a855b1b1 5261 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5262 int i, j;
5263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5264 vec<tree> vec_oprnds0 = vNULL;
5265 vec<tree> vec_oprnds1 = vNULL;
5266 vec<tree> vec_oprnds2 = vNULL;
5267 vec<tree> vec_oprnds3 = vNULL;
784fb9b3 5268 tree vec_cmp_type = vectype;
b8698a0f 5269
f7e531cf
IR
5270 if (slp_node || PURE_SLP_STMT (stmt_info))
5271 ncopies = 1;
5272 else
5273 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5274
ebfd146a 5275 gcc_assert (ncopies >= 1);
a855b1b1 5276 if (reduc_index && ncopies > 1)
ebfd146a
IR
5277 return false; /* FORNOW */
5278
f7e531cf
IR
5279 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5280 return false;
5281
5282 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5283 return false;
5284
4bbe8262
IR
5285 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5286 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5287 && reduc_def))
ebfd146a
IR
5288 return false;
5289
ebfd146a 5290 /* FORNOW: not yet supported. */
b8698a0f 5291 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5292 {
73fbfcad 5293 if (dump_enabled_p ())
78c60e3d
SS
5294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5295 "value used after loop.");
ebfd146a
IR
5296 return false;
5297 }
5298
5299 /* Is vectorizable conditional operation? */
5300 if (!is_gimple_assign (stmt))
5301 return false;
5302
5303 code = gimple_assign_rhs_code (stmt);
5304
5305 if (code != COND_EXPR)
5306 return false;
5307
4e71066d
RG
5308 cond_expr = gimple_assign_rhs1 (stmt);
5309 then_clause = gimple_assign_rhs2 (stmt);
5310 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5311
24ee1384
IR
5312 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5313 &comp_vectype)
e9e1d143 5314 || !comp_vectype)
ebfd146a
IR
5315 return false;
5316
5317 if (TREE_CODE (then_clause) == SSA_NAME)
5318 {
5319 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5320 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5321 &then_def_stmt, &def, &dt))
5322 return false;
5323 }
b8698a0f 5324 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5325 && TREE_CODE (then_clause) != REAL_CST
5326 && TREE_CODE (then_clause) != FIXED_CST)
5327 return false;
5328
5329 if (TREE_CODE (else_clause) == SSA_NAME)
5330 {
5331 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5332 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5333 &else_def_stmt, &def, &dt))
5334 return false;
5335 }
b8698a0f 5336 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5337 && TREE_CODE (else_clause) != REAL_CST
5338 && TREE_CODE (else_clause) != FIXED_CST)
5339 return false;
5340
784fb9b3
JJ
5341 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5342 {
5343 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5344 tree cmp_type = build_nonstandard_integer_type (prec, 1);
5345 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5346 if (vec_cmp_type == NULL_TREE)
5347 return false;
5348 }
5349
b8698a0f 5350 if (!vec_stmt)
ebfd146a
IR
5351 {
5352 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5353 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5354 }
5355
f7e531cf
IR
5356 /* Transform. */
5357
5358 if (!slp_node)
5359 {
9771b263
DN
5360 vec_oprnds0.create (1);
5361 vec_oprnds1.create (1);
5362 vec_oprnds2.create (1);
5363 vec_oprnds3.create (1);
f7e531cf 5364 }
ebfd146a
IR
5365
5366 /* Handle def. */
5367 scalar_dest = gimple_assign_lhs (stmt);
5368 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5369
5370 /* Handle cond expr. */
a855b1b1
MM
5371 for (j = 0; j < ncopies; j++)
5372 {
f7e531cf 5373 gimple new_stmt = NULL;
a855b1b1
MM
5374 if (j == 0)
5375 {
f7e531cf
IR
5376 if (slp_node)
5377 {
9771b263
DN
5378 vec<tree> ops;
5379 ops.create (4);
37b5ec8f 5380 vec<vec<tree> > vec_defs;
9771b263
DN
5381
5382 vec_defs.create (4);
5383 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5384 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5385 ops.safe_push (then_clause);
5386 ops.safe_push (else_clause);
f7e531cf 5387 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5388 vec_oprnds3 = vec_defs.pop ();
5389 vec_oprnds2 = vec_defs.pop ();
5390 vec_oprnds1 = vec_defs.pop ();
5391 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5392
9771b263
DN
5393 ops.release ();
5394 vec_defs.release ();
f7e531cf
IR
5395 }
5396 else
5397 {
5398 gimple gtemp;
5399 vec_cond_lhs =
a855b1b1
MM
5400 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5401 stmt, NULL);
24ee1384
IR
5402 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5403 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5404
5405 vec_cond_rhs =
5406 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5407 stmt, NULL);
24ee1384
IR
5408 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5409 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5410 if (reduc_index == 1)
5411 vec_then_clause = reduc_def;
5412 else
5413 {
5414 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5415 stmt, NULL);
24ee1384 5416 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5417 NULL, &gtemp, &def, &dts[2]);
5418 }
5419 if (reduc_index == 2)
5420 vec_else_clause = reduc_def;
5421 else
5422 {
5423 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5424 stmt, NULL);
24ee1384 5425 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5426 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5427 }
a855b1b1
MM
5428 }
5429 }
5430 else
5431 {
f7e531cf 5432 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5433 vec_oprnds0.pop ());
f7e531cf 5434 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5435 vec_oprnds1.pop ());
a855b1b1 5436 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5437 vec_oprnds2.pop ());
a855b1b1 5438 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5439 vec_oprnds3.pop ());
f7e531cf
IR
5440 }
5441
5442 if (!slp_node)
5443 {
9771b263
DN
5444 vec_oprnds0.quick_push (vec_cond_lhs);
5445 vec_oprnds1.quick_push (vec_cond_rhs);
5446 vec_oprnds2.quick_push (vec_then_clause);
5447 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5448 }
5449
9dc3f7de 5450 /* Arguments are ready. Create the new vector stmt. */
9771b263 5451 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5452 {
9771b263
DN
5453 vec_cond_rhs = vec_oprnds1[i];
5454 vec_then_clause = vec_oprnds2[i];
5455 vec_else_clause = vec_oprnds3[i];
a855b1b1 5456
784fb9b3
JJ
5457 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5458 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5459 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5460 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5461
f7e531cf
IR
5462 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5463 new_temp = make_ssa_name (vec_dest, new_stmt);
5464 gimple_assign_set_lhs (new_stmt, new_temp);
5465 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5466 if (slp_node)
9771b263 5467 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5468 }
5469
5470 if (slp_node)
5471 continue;
5472
5473 if (j == 0)
5474 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5475 else
5476 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5477
5478 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5479 }
b8698a0f 5480
9771b263
DN
5481 vec_oprnds0.release ();
5482 vec_oprnds1.release ();
5483 vec_oprnds2.release ();
5484 vec_oprnds3.release ();
f7e531cf 5485
ebfd146a
IR
5486 return true;
5487}
5488
5489
8644a673 5490/* Make sure the statement is vectorizable. */
ebfd146a
IR
5491
5492bool
a70d6342 5493vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5494{
8644a673 5495 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5496 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5497 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5498 bool ok;
a70d6342 5499 tree scalar_type, vectype;
363477c0
JJ
5500 gimple pattern_stmt;
5501 gimple_seq pattern_def_seq;
ebfd146a 5502
73fbfcad 5503 if (dump_enabled_p ())
ebfd146a 5504 {
78c60e3d
SS
5505 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5506 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 5507 }
ebfd146a 5508
1825a1f3 5509 if (gimple_has_volatile_ops (stmt))
b8698a0f 5510 {
73fbfcad 5511 if (dump_enabled_p ())
78c60e3d
SS
5512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5513 "not vectorized: stmt has volatile operands");
1825a1f3
IR
5514
5515 return false;
5516 }
b8698a0f
L
5517
5518 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5519 to include:
5520 - the COND_EXPR which is the loop exit condition
5521 - any LABEL_EXPRs in the loop
b8698a0f 5522 - computations that are used only for array indexing or loop control.
8644a673 5523 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5524 instance, therefore, all the statements are relevant.
ebfd146a 5525
d092494c 5526 Pattern statement needs to be analyzed instead of the original statement
83197f37 5527 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5528 statements. In basic blocks we are called from some SLP instance
5529 traversal, don't analyze pattern stmts instead, the pattern stmts
5530 already will be part of SLP instance. */
83197f37
IR
5531
5532 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5533 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5534 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5535 {
9d5e7640 5536 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5537 && pattern_stmt
9d5e7640
IR
5538 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5539 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5540 {
83197f37 5541 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5542 stmt = pattern_stmt;
5543 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5544 if (dump_enabled_p ())
9d5e7640 5545 {
78c60e3d
SS
5546 dump_printf_loc (MSG_NOTE, vect_location,
5547 "==> examining pattern statement: ");
5548 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
5549 }
5550 }
5551 else
5552 {
73fbfcad 5553 if (dump_enabled_p ())
78c60e3d 5554 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
ebfd146a 5555
9d5e7640
IR
5556 return true;
5557 }
8644a673 5558 }
83197f37 5559 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5560 && node == NULL
83197f37
IR
5561 && pattern_stmt
5562 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5563 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5564 {
5565 /* Analyze PATTERN_STMT too. */
73fbfcad 5566 if (dump_enabled_p ())
83197f37 5567 {
78c60e3d
SS
5568 dump_printf_loc (MSG_NOTE, vect_location,
5569 "==> examining pattern statement: ");
5570 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
5571 }
5572
5573 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5574 return false;
5575 }
ebfd146a 5576
1107f3ae 5577 if (is_pattern_stmt_p (stmt_info)
079c527f 5578 && node == NULL
363477c0 5579 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5580 {
363477c0 5581 gimple_stmt_iterator si;
1107f3ae 5582
363477c0
JJ
5583 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5584 {
5585 gimple pattern_def_stmt = gsi_stmt (si);
5586 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5587 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5588 {
5589 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5590 if (dump_enabled_p ())
363477c0 5591 {
78c60e3d
SS
5592 dump_printf_loc (MSG_NOTE, vect_location,
5593 "==> examining pattern def statement: ");
5594 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 5595 }
1107f3ae 5596
363477c0
JJ
5597 if (!vect_analyze_stmt (pattern_def_stmt,
5598 need_to_vectorize, node))
5599 return false;
5600 }
5601 }
5602 }
1107f3ae 5603
8644a673
IR
5604 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5605 {
5606 case vect_internal_def:
5607 break;
ebfd146a 5608
8644a673 5609 case vect_reduction_def:
7c5222ff 5610 case vect_nested_cycle:
a70d6342 5611 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5612 || relevance == vect_used_in_outer_by_reduction
a70d6342 5613 || relevance == vect_unused_in_scope));
8644a673
IR
5614 break;
5615
5616 case vect_induction_def:
5617 case vect_constant_def:
5618 case vect_external_def:
5619 case vect_unknown_def_type:
5620 default:
5621 gcc_unreachable ();
5622 }
ebfd146a 5623
a70d6342
IR
5624 if (bb_vinfo)
5625 {
5626 gcc_assert (PURE_SLP_STMT (stmt_info));
5627
b690cc0f 5628 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5629 if (dump_enabled_p ())
a70d6342 5630 {
78c60e3d
SS
5631 dump_printf_loc (MSG_NOTE, vect_location,
5632 "get vectype for scalar type: ");
5633 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
a70d6342
IR
5634 }
5635
5636 vectype = get_vectype_for_scalar_type (scalar_type);
5637 if (!vectype)
5638 {
73fbfcad 5639 if (dump_enabled_p ())
a70d6342 5640 {
78c60e3d
SS
5641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5642 "not SLPed: unsupported data-type ");
5643 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5644 scalar_type);
a70d6342
IR
5645 }
5646 return false;
5647 }
5648
73fbfcad 5649 if (dump_enabled_p ())
a70d6342 5650 {
78c60e3d
SS
5651 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5652 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
a70d6342
IR
5653 }
5654
5655 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5656 }
5657
8644a673 5658 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5659 {
8644a673
IR
5660 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5661 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5662 *need_to_vectorize = true;
ebfd146a
IR
5663 }
5664
8644a673 5665 ok = true;
b8698a0f 5666 if (!bb_vinfo
a70d6342
IR
5667 && (STMT_VINFO_RELEVANT_P (stmt_info)
5668 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5669 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5670 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5671 || vectorizable_operation (stmt, NULL, NULL, NULL)
5672 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5673 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5674 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5675 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5676 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5677 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5678 else
5679 {
5680 if (bb_vinfo)
4a00c761
JJ
5681 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5682 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5683 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5684 || vectorizable_assignment (stmt, NULL, NULL, node)
5685 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5686 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5687 || vectorizable_store (stmt, NULL, NULL, node)
5688 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5689 }
8644a673
IR
5690
5691 if (!ok)
ebfd146a 5692 {
73fbfcad 5693 if (dump_enabled_p ())
8644a673 5694 {
78c60e3d
SS
5695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5696 "not vectorized: relevant stmt not ");
5697 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5698 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5699 }
b8698a0f 5700
ebfd146a
IR
5701 return false;
5702 }
5703
a70d6342
IR
5704 if (bb_vinfo)
5705 return true;
5706
8644a673
IR
5707 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5708 need extra handling, except for vectorizable reductions. */
5709 if (STMT_VINFO_LIVE_P (stmt_info)
5710 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5711 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5712
8644a673 5713 if (!ok)
ebfd146a 5714 {
73fbfcad 5715 if (dump_enabled_p ())
8644a673 5716 {
78c60e3d
SS
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "not vectorized: live stmt not ");
5719 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5720 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5721 }
b8698a0f 5722
8644a673 5723 return false;
ebfd146a
IR
5724 }
5725
ebfd146a
IR
5726 return true;
5727}
5728
5729
5730/* Function vect_transform_stmt.
5731
5732 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5733
5734bool
5735vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5736 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5737 slp_instance slp_node_instance)
5738{
5739 bool is_store = false;
5740 gimple vec_stmt = NULL;
5741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5742 bool done;
ebfd146a
IR
5743
5744 switch (STMT_VINFO_TYPE (stmt_info))
5745 {
5746 case type_demotion_vec_info_type:
ebfd146a 5747 case type_promotion_vec_info_type:
ebfd146a
IR
5748 case type_conversion_vec_info_type:
5749 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5750 gcc_assert (done);
5751 break;
5752
5753 case induc_vec_info_type:
5754 gcc_assert (!slp_node);
5755 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5756 gcc_assert (done);
5757 break;
5758
9dc3f7de
IR
5759 case shift_vec_info_type:
5760 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5761 gcc_assert (done);
5762 break;
5763
ebfd146a
IR
5764 case op_vec_info_type:
5765 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5766 gcc_assert (done);
5767 break;
5768
5769 case assignment_vec_info_type:
5770 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5771 gcc_assert (done);
5772 break;
5773
5774 case load_vec_info_type:
b8698a0f 5775 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5776 slp_node_instance);
5777 gcc_assert (done);
5778 break;
5779
5780 case store_vec_info_type:
5781 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5782 gcc_assert (done);
0d0293ac 5783 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5784 {
5785 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5786 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5787 one are skipped, and there vec_stmt_info shouldn't be freed
5788 meanwhile. */
0d0293ac 5789 *grouped_store = true;
ebfd146a
IR
5790 if (STMT_VINFO_VEC_STMT (stmt_info))
5791 is_store = true;
5792 }
5793 else
5794 is_store = true;
5795 break;
5796
5797 case condition_vec_info_type:
f7e531cf 5798 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5799 gcc_assert (done);
5800 break;
5801
5802 case call_vec_info_type:
190c2236 5803 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5804 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5805 break;
5806
5807 case reduc_vec_info_type:
b5aeb3bb 5808 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5809 gcc_assert (done);
5810 break;
5811
5812 default:
5813 if (!STMT_VINFO_LIVE_P (stmt_info))
5814 {
73fbfcad 5815 if (dump_enabled_p ())
78c60e3d
SS
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817 "stmt not supported.");
ebfd146a
IR
5818 gcc_unreachable ();
5819 }
5820 }
5821
5822 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5823 is being vectorized, but outside the immediately enclosing loop. */
5824 if (vec_stmt
a70d6342
IR
5825 && STMT_VINFO_LOOP_VINFO (stmt_info)
5826 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5827 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5828 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5829 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5830 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5831 vect_used_in_outer_by_reduction))
ebfd146a 5832 {
a70d6342
IR
5833 struct loop *innerloop = LOOP_VINFO_LOOP (
5834 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5835 imm_use_iterator imm_iter;
5836 use_operand_p use_p;
5837 tree scalar_dest;
5838 gimple exit_phi;
5839
73fbfcad 5840 if (dump_enabled_p ())
78c60e3d
SS
5841 dump_printf_loc (MSG_NOTE, vect_location,
5842 "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5843
5844 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5845 (to be used when vectorizing outer-loop stmts that use the DEF of
5846 STMT). */
5847 if (gimple_code (stmt) == GIMPLE_PHI)
5848 scalar_dest = PHI_RESULT (stmt);
5849 else
5850 scalar_dest = gimple_assign_lhs (stmt);
5851
5852 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5853 {
5854 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5855 {
5856 exit_phi = USE_STMT (use_p);
5857 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5858 }
5859 }
5860 }
5861
5862 /* Handle stmts whose DEF is used outside the loop-nest that is
5863 being vectorized. */
5864 if (STMT_VINFO_LIVE_P (stmt_info)
5865 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5866 {
5867 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5868 gcc_assert (done);
5869 }
5870
5871 if (vec_stmt)
83197f37 5872 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 5873
b8698a0f 5874 return is_store;
ebfd146a
IR
5875}
5876
5877
b8698a0f 5878/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
5879 stmt_vec_info. */
5880
5881void
5882vect_remove_stores (gimple first_stmt)
5883{
5884 gimple next = first_stmt;
5885 gimple tmp;
5886 gimple_stmt_iterator next_si;
5887
5888 while (next)
5889 {
78048b1c
JJ
5890 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5891
5892 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5893 if (is_pattern_stmt_p (stmt_info))
5894 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
5895 /* Free the attached stmt_vec_info and remove the stmt. */
5896 next_si = gsi_for_stmt (next);
3d3f2249 5897 unlink_stmt_vdef (next);
ebfd146a 5898 gsi_remove (&next_si, true);
3d3f2249 5899 release_defs (next);
ebfd146a
IR
5900 free_stmt_vec_info (next);
5901 next = tmp;
5902 }
5903}
5904
5905
5906/* Function new_stmt_vec_info.
5907
5908 Create and initialize a new stmt_vec_info struct for STMT. */
5909
5910stmt_vec_info
b8698a0f 5911new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 5912 bb_vec_info bb_vinfo)
ebfd146a
IR
5913{
5914 stmt_vec_info res;
5915 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5916
5917 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5918 STMT_VINFO_STMT (res) = stmt;
5919 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 5920 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 5921 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
5922 STMT_VINFO_LIVE_P (res) = false;
5923 STMT_VINFO_VECTYPE (res) = NULL;
5924 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 5925 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
5926 STMT_VINFO_IN_PATTERN_P (res) = false;
5927 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 5928 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
5929 STMT_VINFO_DATA_REF (res) = NULL;
5930
5931 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5932 STMT_VINFO_DR_OFFSET (res) = NULL;
5933 STMT_VINFO_DR_INIT (res) = NULL;
5934 STMT_VINFO_DR_STEP (res) = NULL;
5935 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5936
5937 if (gimple_code (stmt) == GIMPLE_PHI
5938 && is_loop_header_bb_p (gimple_bb (stmt)))
5939 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5940 else
8644a673
IR
5941 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5942
9771b263 5943 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 5944 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
5945 GROUP_FIRST_ELEMENT (res) = NULL;
5946 GROUP_NEXT_ELEMENT (res) = NULL;
5947 GROUP_SIZE (res) = 0;
5948 GROUP_STORE_COUNT (res) = 0;
5949 GROUP_GAP (res) = 0;
5950 GROUP_SAME_DR_STMT (res) = NULL;
5951 GROUP_READ_WRITE_DEPENDENCE (res) = false;
ebfd146a
IR
5952
5953 return res;
5954}
5955
5956
5957/* Create a hash table for stmt_vec_info. */
5958
5959void
5960init_stmt_vec_info_vec (void)
5961{
9771b263
DN
5962 gcc_assert (!stmt_vec_info_vec.exists ());
5963 stmt_vec_info_vec.create (50);
ebfd146a
IR
5964}
5965
5966
5967/* Free hash table for stmt_vec_info. */
5968
5969void
5970free_stmt_vec_info_vec (void)
5971{
93675444
JJ
5972 unsigned int i;
5973 vec_void_p info;
5974 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
5975 if (info != NULL)
5976 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
5977 gcc_assert (stmt_vec_info_vec.exists ());
5978 stmt_vec_info_vec.release ();
ebfd146a
IR
5979}
5980
5981
5982/* Free stmt vectorization related info. */
5983
5984void
5985free_stmt_vec_info (gimple stmt)
5986{
5987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5988
5989 if (!stmt_info)
5990 return;
5991
78048b1c
JJ
5992 /* Check if this statement has a related "pattern stmt"
5993 (introduced by the vectorizer during the pattern recognition
5994 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5995 too. */
5996 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5997 {
5998 stmt_vec_info patt_info
5999 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6000 if (patt_info)
6001 {
363477c0
JJ
6002 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6003 if (seq)
6004 {
6005 gimple_stmt_iterator si;
6006 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6007 free_stmt_vec_info (gsi_stmt (si));
6008 }
78048b1c
JJ
6009 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6010 }
6011 }
6012
9771b263 6013 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6014 set_vinfo_for_stmt (stmt, NULL);
6015 free (stmt_info);
6016}
6017
6018
bb67d9c7 6019/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6020
bb67d9c7 6021 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6022 by the target. */
6023
bb67d9c7
RG
6024static tree
6025get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6026{
6027 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6028 enum machine_mode simd_mode;
2f816591 6029 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6030 int nunits;
6031 tree vectype;
6032
cc4b5170 6033 if (nbytes == 0)
ebfd146a
IR
6034 return NULL_TREE;
6035
48f2e373
RB
6036 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6037 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6038 return NULL_TREE;
6039
7b7b1813
RG
6040 /* For vector types of elements whose mode precision doesn't
6041 match their types precision we use a element type of mode
6042 precision. The vectorization routines will have to make sure
48f2e373
RB
6043 they support the proper result truncation/extension.
6044 We also make sure to build vector types with INTEGER_TYPE
6045 component type only. */
6d7971b8 6046 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6047 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6048 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6049 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6050 TYPE_UNSIGNED (scalar_type));
6d7971b8 6051
ccbf5bb4
RG
6052 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6053 When the component mode passes the above test simply use a type
6054 corresponding to that mode. The theory is that any use that
6055 would cause problems with this will disable vectorization anyway. */
dfc2e2ac
RB
6056 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6057 && !INTEGRAL_TYPE_P (scalar_type)
6058 && !POINTER_TYPE_P (scalar_type))
60b95d28
RB
6059 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6060
6061 /* We can't build a vector type of elements with alignment bigger than
6062 their size. */
dfc2e2ac 6063 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6064 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6065 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6066
dfc2e2ac
RB
6067 /* If we felt back to using the mode fail if there was
6068 no scalar type for it. */
6069 if (scalar_type == NULL_TREE)
6070 return NULL_TREE;
6071
bb67d9c7
RG
6072 /* If no size was supplied use the mode the target prefers. Otherwise
6073 lookup a vector mode of the specified size. */
6074 if (size == 0)
6075 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6076 else
6077 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6078 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6079 if (nunits <= 1)
6080 return NULL_TREE;
ebfd146a
IR
6081
6082 vectype = build_vector_type (scalar_type, nunits);
73fbfcad 6083 if (dump_enabled_p ())
ebfd146a 6084 {
78c60e3d
SS
6085 dump_printf_loc (MSG_NOTE, vect_location,
6086 "get vectype with %d units of type ", nunits);
6087 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
ebfd146a
IR
6088 }
6089
6090 if (!vectype)
6091 return NULL_TREE;
6092
73fbfcad 6093 if (dump_enabled_p ())
ebfd146a 6094 {
78c60e3d
SS
6095 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6096 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
ebfd146a
IR
6097 }
6098
6099 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6100 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6101 {
73fbfcad 6102 if (dump_enabled_p ())
78c60e3d
SS
6103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6104 "mode not supported by target.");
ebfd146a
IR
6105 return NULL_TREE;
6106 }
6107
6108 return vectype;
6109}
6110
bb67d9c7
RG
6111unsigned int current_vector_size;
6112
6113/* Function get_vectype_for_scalar_type.
6114
6115 Returns the vector type corresponding to SCALAR_TYPE as supported
6116 by the target. */
6117
6118tree
6119get_vectype_for_scalar_type (tree scalar_type)
6120{
6121 tree vectype;
6122 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6123 current_vector_size);
6124 if (vectype
6125 && current_vector_size == 0)
6126 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6127 return vectype;
6128}
6129
b690cc0f
RG
6130/* Function get_same_sized_vectype
6131
6132 Returns a vector type corresponding to SCALAR_TYPE of size
6133 VECTOR_TYPE if supported by the target. */
6134
6135tree
bb67d9c7 6136get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6137{
bb67d9c7
RG
6138 return get_vectype_for_scalar_type_and_size
6139 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6140}
6141
ebfd146a
IR
6142/* Function vect_is_simple_use.
6143
6144 Input:
a70d6342
IR
6145 LOOP_VINFO - the vect info of the loop that is being vectorized.
6146 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6147 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6148 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6149
6150 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6151 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6152 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6153 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6154 is the case in reduction/induction computations).
6155 For basic blocks, supportable operands are constants and bb invariants.
6156 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6157
6158bool
24ee1384 6159vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6160 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6161 tree *def, enum vect_def_type *dt)
b8698a0f 6162{
ebfd146a
IR
6163 basic_block bb;
6164 stmt_vec_info stmt_vinfo;
a70d6342 6165 struct loop *loop = NULL;
b8698a0f 6166
a70d6342
IR
6167 if (loop_vinfo)
6168 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6169
6170 *def_stmt = NULL;
6171 *def = NULL_TREE;
b8698a0f 6172
73fbfcad 6173 if (dump_enabled_p ())
ebfd146a 6174 {
78c60e3d
SS
6175 dump_printf_loc (MSG_NOTE, vect_location,
6176 "vect_is_simple_use: operand ");
6177 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 6178 }
b8698a0f 6179
b758f602 6180 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6181 {
6182 *dt = vect_constant_def;
6183 return true;
6184 }
b8698a0f 6185
ebfd146a
IR
6186 if (is_gimple_min_invariant (operand))
6187 {
6188 *def = operand;
8644a673 6189 *dt = vect_external_def;
ebfd146a
IR
6190 return true;
6191 }
6192
6193 if (TREE_CODE (operand) == PAREN_EXPR)
6194 {
73fbfcad 6195 if (dump_enabled_p ())
78c60e3d 6196 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
ebfd146a
IR
6197 operand = TREE_OPERAND (operand, 0);
6198 }
b8698a0f 6199
ebfd146a
IR
6200 if (TREE_CODE (operand) != SSA_NAME)
6201 {
73fbfcad 6202 if (dump_enabled_p ())
78c60e3d
SS
6203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6204 "not ssa-name.");
ebfd146a
IR
6205 return false;
6206 }
b8698a0f 6207
ebfd146a
IR
6208 *def_stmt = SSA_NAME_DEF_STMT (operand);
6209 if (*def_stmt == NULL)
6210 {
73fbfcad 6211 if (dump_enabled_p ())
78c60e3d
SS
6212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213 "no def_stmt.");
ebfd146a
IR
6214 return false;
6215 }
6216
73fbfcad 6217 if (dump_enabled_p ())
ebfd146a 6218 {
78c60e3d
SS
6219 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6220 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
6221 }
6222
8644a673 6223 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6224 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6225 if (gimple_nop_p (*def_stmt))
6226 {
6227 *def = operand;
8644a673 6228 *dt = vect_external_def;
ebfd146a
IR
6229 return true;
6230 }
6231
6232 bb = gimple_bb (*def_stmt);
a70d6342
IR
6233
6234 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6235 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6236 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6237 *dt = vect_external_def;
ebfd146a
IR
6238 else
6239 {
6240 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6241 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6242 }
6243
24ee1384
IR
6244 if (*dt == vect_unknown_def_type
6245 || (stmt
6246 && *dt == vect_double_reduction_def
6247 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6248 {
73fbfcad 6249 if (dump_enabled_p ())
78c60e3d
SS
6250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251 "Unsupported pattern.");
ebfd146a
IR
6252 return false;
6253 }
6254
73fbfcad 6255 if (dump_enabled_p ())
78c60e3d 6256 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
ebfd146a
IR
6257
6258 switch (gimple_code (*def_stmt))
6259 {
6260 case GIMPLE_PHI:
6261 *def = gimple_phi_result (*def_stmt);
6262 break;
6263
6264 case GIMPLE_ASSIGN:
6265 *def = gimple_assign_lhs (*def_stmt);
6266 break;
6267
6268 case GIMPLE_CALL:
6269 *def = gimple_call_lhs (*def_stmt);
6270 if (*def != NULL)
6271 break;
6272 /* FALLTHRU */
6273 default:
73fbfcad 6274 if (dump_enabled_p ())
78c60e3d
SS
6275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6276 "unsupported defining stmt: ");
ebfd146a
IR
6277 return false;
6278 }
6279
6280 return true;
6281}
6282
b690cc0f
RG
6283/* Function vect_is_simple_use_1.
6284
6285 Same as vect_is_simple_use_1 but also determines the vector operand
6286 type of OPERAND and stores it to *VECTYPE. If the definition of
6287 OPERAND is vect_uninitialized_def, vect_constant_def or
6288 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6289 is responsible to compute the best suited vector type for the
6290 scalar operand. */
6291
6292bool
24ee1384 6293vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6294 bb_vec_info bb_vinfo, gimple *def_stmt,
6295 tree *def, enum vect_def_type *dt, tree *vectype)
6296{
24ee1384
IR
6297 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6298 def, dt))
b690cc0f
RG
6299 return false;
6300
6301 /* Now get a vector type if the def is internal, otherwise supply
6302 NULL_TREE and leave it up to the caller to figure out a proper
6303 type for the use stmt. */
6304 if (*dt == vect_internal_def
6305 || *dt == vect_induction_def
6306 || *dt == vect_reduction_def
6307 || *dt == vect_double_reduction_def
6308 || *dt == vect_nested_cycle)
6309 {
6310 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6311
6312 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6313 && !STMT_VINFO_RELEVANT (stmt_info)
6314 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6315 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6316
b690cc0f
RG
6317 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6318 gcc_assert (*vectype != NULL_TREE);
6319 }
6320 else if (*dt == vect_uninitialized_def
6321 || *dt == vect_constant_def
6322 || *dt == vect_external_def)
6323 *vectype = NULL_TREE;
6324 else
6325 gcc_unreachable ();
6326
6327 return true;
6328}
6329
ebfd146a
IR
6330
6331/* Function supportable_widening_operation
6332
b8698a0f
L
6333 Check whether an operation represented by the code CODE is a
6334 widening operation that is supported by the target platform in
b690cc0f
RG
6335 vector form (i.e., when operating on arguments of type VECTYPE_IN
6336 producing a result of type VECTYPE_OUT).
b8698a0f 6337
ebfd146a
IR
6338 Widening operations we currently support are NOP (CONVERT), FLOAT
6339 and WIDEN_MULT. This function checks if these operations are supported
6340 by the target platform either directly (via vector tree-codes), or via
6341 target builtins.
6342
6343 Output:
b8698a0f
L
6344 - CODE1 and CODE2 are codes of vector operations to be used when
6345 vectorizing the operation, if available.
ebfd146a
IR
6346 - MULTI_STEP_CVT determines the number of required intermediate steps in
6347 case of multi-step conversion (like char->short->int - in that case
6348 MULTI_STEP_CVT will be 1).
b8698a0f
L
6349 - INTERM_TYPES contains the intermediate type required to perform the
6350 widening operation (short in the above example). */
ebfd146a
IR
6351
6352bool
b690cc0f
RG
6353supportable_widening_operation (enum tree_code code, gimple stmt,
6354 tree vectype_out, tree vectype_in,
ebfd146a
IR
6355 enum tree_code *code1, enum tree_code *code2,
6356 int *multi_step_cvt,
9771b263 6357 vec<tree> *interm_types)
ebfd146a
IR
6358{
6359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6360 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6361 struct loop *vect_loop = NULL;
ebfd146a 6362 enum machine_mode vec_mode;
81f40b79 6363 enum insn_code icode1, icode2;
ebfd146a 6364 optab optab1, optab2;
b690cc0f
RG
6365 tree vectype = vectype_in;
6366 tree wide_vectype = vectype_out;
ebfd146a 6367 enum tree_code c1, c2;
4a00c761
JJ
6368 int i;
6369 tree prev_type, intermediate_type;
6370 enum machine_mode intermediate_mode, prev_mode;
6371 optab optab3, optab4;
ebfd146a 6372
4a00c761 6373 *multi_step_cvt = 0;
4ef69dfc
IR
6374 if (loop_info)
6375 vect_loop = LOOP_VINFO_LOOP (loop_info);
6376
ebfd146a
IR
6377 switch (code)
6378 {
6379 case WIDEN_MULT_EXPR:
6ae6116f
RH
6380 /* The result of a vectorized widening operation usually requires
6381 two vectors (because the widened results do not fit into one vector).
6382 The generated vector results would normally be expected to be
6383 generated in the same order as in the original scalar computation,
6384 i.e. if 8 results are generated in each vector iteration, they are
6385 to be organized as follows:
6386 vect1: [res1,res2,res3,res4],
6387 vect2: [res5,res6,res7,res8].
6388
6389 However, in the special case that the result of the widening
6390 operation is used in a reduction computation only, the order doesn't
6391 matter (because when vectorizing a reduction we change the order of
6392 the computation). Some targets can take advantage of this and
6393 generate more efficient code. For example, targets like Altivec,
6394 that support widen_mult using a sequence of {mult_even,mult_odd}
6395 generate the following vectors:
6396 vect1: [res1,res3,res5,res7],
6397 vect2: [res2,res4,res6,res8].
6398
6399 When vectorizing outer-loops, we execute the inner-loop sequentially
6400 (each vectorized inner-loop iteration contributes to VF outer-loop
6401 iterations in parallel). We therefore don't allow to change the
6402 order of the computation in the inner-loop during outer-loop
6403 vectorization. */
6404 /* TODO: Another case in which order doesn't *really* matter is when we
6405 widen and then contract again, e.g. (short)((int)x * y >> 8).
6406 Normally, pack_trunc performs an even/odd permute, whereas the
6407 repack from an even/odd expansion would be an interleave, which
6408 would be significantly simpler for e.g. AVX2. */
6409 /* In any case, in order to avoid duplicating the code below, recurse
6410 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6411 are properly set up for the caller. If we fail, we'll continue with
6412 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6413 if (vect_loop
6414 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6415 && !nested_in_vect_loop_p (vect_loop, stmt)
6416 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6417 stmt, vectype_out, vectype_in,
a86ec597
RH
6418 code1, code2, multi_step_cvt,
6419 interm_types))
6ae6116f 6420 return true;
4a00c761
JJ
6421 c1 = VEC_WIDEN_MULT_LO_EXPR;
6422 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6423 break;
6424
6ae6116f
RH
6425 case VEC_WIDEN_MULT_EVEN_EXPR:
6426 /* Support the recursion induced just above. */
6427 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6428 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6429 break;
6430
36ba4aae 6431 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6432 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6433 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6434 break;
6435
ebfd146a 6436 CASE_CONVERT:
4a00c761
JJ
6437 c1 = VEC_UNPACK_LO_EXPR;
6438 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6439 break;
6440
6441 case FLOAT_EXPR:
4a00c761
JJ
6442 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6443 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6444 break;
6445
6446 case FIX_TRUNC_EXPR:
6447 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6448 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6449 computing the operation. */
6450 return false;
6451
6452 default:
6453 gcc_unreachable ();
6454 }
6455
6ae6116f 6456 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6457 {
6458 enum tree_code ctmp = c1;
6459 c1 = c2;
6460 c2 = ctmp;
6461 }
6462
ebfd146a
IR
6463 if (code == FIX_TRUNC_EXPR)
6464 {
6465 /* The signedness is determined from output operand. */
b690cc0f
RG
6466 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6467 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6468 }
6469 else
6470 {
6471 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6472 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6473 }
6474
6475 if (!optab1 || !optab2)
6476 return false;
6477
6478 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6479 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6480 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6481 return false;
6482
4a00c761
JJ
6483 *code1 = c1;
6484 *code2 = c2;
6485
6486 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6487 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6488 return true;
6489
b8698a0f 6490 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6491 types. */
ebfd146a 6492
4a00c761
JJ
6493 prev_type = vectype;
6494 prev_mode = vec_mode;
b8698a0f 6495
4a00c761
JJ
6496 if (!CONVERT_EXPR_CODE_P (code))
6497 return false;
b8698a0f 6498
4a00c761
JJ
6499 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6500 intermediate steps in promotion sequence. We try
6501 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6502 not. */
9771b263 6503 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6504 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6505 {
6506 intermediate_mode = insn_data[icode1].operand[0].mode;
6507 intermediate_type
6508 = lang_hooks.types.type_for_mode (intermediate_mode,
6509 TYPE_UNSIGNED (prev_type));
6510 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6511 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6512
6513 if (!optab3 || !optab4
6514 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6515 || insn_data[icode1].operand[0].mode != intermediate_mode
6516 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6517 || insn_data[icode2].operand[0].mode != intermediate_mode
6518 || ((icode1 = optab_handler (optab3, intermediate_mode))
6519 == CODE_FOR_nothing)
6520 || ((icode2 = optab_handler (optab4, intermediate_mode))
6521 == CODE_FOR_nothing))
6522 break;
ebfd146a 6523
9771b263 6524 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6525 (*multi_step_cvt)++;
6526
6527 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6528 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6529 return true;
6530
6531 prev_type = intermediate_type;
6532 prev_mode = intermediate_mode;
ebfd146a
IR
6533 }
6534
9771b263 6535 interm_types->release ();
4a00c761 6536 return false;
ebfd146a
IR
6537}
6538
6539
6540/* Function supportable_narrowing_operation
6541
b8698a0f
L
6542 Check whether an operation represented by the code CODE is a
6543 narrowing operation that is supported by the target platform in
b690cc0f
RG
6544 vector form (i.e., when operating on arguments of type VECTYPE_IN
6545 and producing a result of type VECTYPE_OUT).
b8698a0f 6546
ebfd146a 6547 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6548 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6549 the target platform directly via vector tree-codes.
6550
6551 Output:
b8698a0f
L
6552 - CODE1 is the code of a vector operation to be used when
6553 vectorizing the operation, if available.
ebfd146a
IR
6554 - MULTI_STEP_CVT determines the number of required intermediate steps in
6555 case of multi-step conversion (like int->short->char - in that case
6556 MULTI_STEP_CVT will be 1).
6557 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6558 narrowing operation (short in the above example). */
ebfd146a
IR
6559
6560bool
6561supportable_narrowing_operation (enum tree_code code,
b690cc0f 6562 tree vectype_out, tree vectype_in,
ebfd146a 6563 enum tree_code *code1, int *multi_step_cvt,
9771b263 6564 vec<tree> *interm_types)
ebfd146a
IR
6565{
6566 enum machine_mode vec_mode;
6567 enum insn_code icode1;
6568 optab optab1, interm_optab;
b690cc0f
RG
6569 tree vectype = vectype_in;
6570 tree narrow_vectype = vectype_out;
ebfd146a 6571 enum tree_code c1;
4a00c761
JJ
6572 tree intermediate_type;
6573 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6574 int i;
4a00c761 6575 bool uns;
ebfd146a 6576
4a00c761 6577 *multi_step_cvt = 0;
ebfd146a
IR
6578 switch (code)
6579 {
6580 CASE_CONVERT:
6581 c1 = VEC_PACK_TRUNC_EXPR;
6582 break;
6583
6584 case FIX_TRUNC_EXPR:
6585 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6586 break;
6587
6588 case FLOAT_EXPR:
6589 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6590 tree code and optabs used for computing the operation. */
6591 return false;
6592
6593 default:
6594 gcc_unreachable ();
6595 }
6596
6597 if (code == FIX_TRUNC_EXPR)
6598 /* The signedness is determined from output operand. */
b690cc0f 6599 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6600 else
6601 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6602
6603 if (!optab1)
6604 return false;
6605
6606 vec_mode = TYPE_MODE (vectype);
947131ba 6607 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6608 return false;
6609
4a00c761
JJ
6610 *code1 = c1;
6611
6612 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6613 return true;
6614
ebfd146a
IR
6615 /* Check if it's a multi-step conversion that can be done using intermediate
6616 types. */
4a00c761
JJ
6617 prev_mode = vec_mode;
6618 if (code == FIX_TRUNC_EXPR)
6619 uns = TYPE_UNSIGNED (vectype_out);
6620 else
6621 uns = TYPE_UNSIGNED (vectype);
6622
6623 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6624 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6625 costly than signed. */
6626 if (code == FIX_TRUNC_EXPR && uns)
6627 {
6628 enum insn_code icode2;
6629
6630 intermediate_type
6631 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6632 interm_optab
6633 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6634 if (interm_optab != unknown_optab
4a00c761
JJ
6635 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6636 && insn_data[icode1].operand[0].mode
6637 == insn_data[icode2].operand[0].mode)
6638 {
6639 uns = false;
6640 optab1 = interm_optab;
6641 icode1 = icode2;
6642 }
6643 }
ebfd146a 6644
4a00c761
JJ
6645 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6646 intermediate steps in promotion sequence. We try
6647 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6648 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6649 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6650 {
6651 intermediate_mode = insn_data[icode1].operand[0].mode;
6652 intermediate_type
6653 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6654 interm_optab
6655 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6656 optab_default);
6657 if (!interm_optab
6658 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6659 || insn_data[icode1].operand[0].mode != intermediate_mode
6660 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6661 == CODE_FOR_nothing))
6662 break;
6663
9771b263 6664 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6665 (*multi_step_cvt)++;
6666
6667 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6668 return true;
6669
6670 prev_mode = intermediate_mode;
6671 optab1 = interm_optab;
ebfd146a
IR
6672 }
6673
9771b263 6674 interm_types->release ();
4a00c761 6675 return false;
ebfd146a 6676}