]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
ipa-prop.c (remove_described_reference): Accept missing references, return false...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
ebfd146a 32#include "tree-flow.h"
ebfd146a 33#include "cfgloop.h"
ebfd146a 34#include "expr.h"
7ee2468b 35#include "recog.h" /* FIXME: for insn_data */
ebfd146a 36#include "optabs.h"
718f9c0f 37#include "diagnostic-core.h"
ebfd146a 38#include "tree-vectorizer.h"
7ee2468b 39#include "dumpfile.h"
ebfd146a 40
7ee2468b
SB
41/* For lang_hooks.types.type_for_mode. */
42#include "langhooks.h"
ebfd146a 43
c3e7ee41
BS
44/* Return the vectorized type for the given statement. */
45
46tree
47stmt_vectype (struct _stmt_vec_info *stmt_info)
48{
49 return STMT_VINFO_VECTYPE (stmt_info);
50}
51
52/* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54bool
55stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56{
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68}
69
70/* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74unsigned
92345349 75record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 77 int misalign, enum vect_cost_model_location where)
c3e7ee41 78{
92345349 79 if (body_cost_vec)
c3e7ee41 80 {
92345349
BS
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
c3e7ee41 85 return (unsigned)
92345349 86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
92345349
BS
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
c3e7ee41
BS
102 }
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
120read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
146write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
150 gimple new_stmt;
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
9771b263 183vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
ebfd146a
IR
186{
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 190 gimple pattern_stmt;
ebfd146a 191
73fbfcad 192 if (dump_enabled_p ())
78c60e3d
SS
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d.", relevant, live_p);
ebfd146a 195
83197f37
IR
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
ebfd146a
IR
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 {
83197f37
IR
202 bool found = false;
203 if (!used_in_pattern)
204 {
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
13c931c9
JJ
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 211
83197f37
IR
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
ebfd146a 216
83197f37
IR
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
ab0ef706
JJ
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 {
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
226
13c931c9
JJ
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
229
ab0ef706
JJ
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 {
233 found = true;
234 break;
235 }
236 }
83197f37
IR
237 }
238
239 if (!found)
240 {
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
245
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
73fbfcad 248 if (dump_enabled_p ())
78c60e3d
SS
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
251 " relevant/live.");
83197f37
IR
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
257 }
ebfd146a
IR
258 }
259
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266 {
73fbfcad 267 if (dump_enabled_p ())
78c60e3d
SS
268 dump_printf_loc (MSG_NOTE, vect_location,
269 "already marked relevant/live.");
ebfd146a
IR
270 return;
271 }
272
9771b263 273 worklist->safe_push (stmt);
ebfd146a
IR
274}
275
276
277/* Function vect_stmt_relevant_p.
278
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
281
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
286
287 CHECKME: what other side effects would the vectorizer allow? */
288
289static bool
290vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
292{
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
298
8644a673 299 *relevant = vect_unused_in_scope;
ebfd146a
IR
300 *live_p = false;
301
302 /* cond stmt other than loop exit cond. */
b8698a0f
L
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
8644a673 306 *relevant = vect_used_in_scope;
ebfd146a
IR
307
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 310 if (gimple_vdef (stmt))
ebfd146a 311 {
73fbfcad 312 if (dump_enabled_p ())
78c60e3d
SS
313 dump_printf_loc (MSG_NOTE, vect_location,
314 "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316 }
317
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320 {
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 {
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
325 {
73fbfcad 326 if (dump_enabled_p ())
78c60e3d
SS
327 dump_printf_loc (MSG_NOTE, vect_location,
328 "vec_stmt_relevant_p: used out of loop.");
ebfd146a 329
3157b0c2
AO
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
332
ebfd146a
IR
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
337
338 *live_p = true;
339 }
340 }
341 }
342
343 return (*live_p || *relevant);
344}
345
346
b8698a0f 347/* Function exist_non_indexing_operands_for_use_p
ebfd146a 348
ff802fa1 349 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
350 used in STMT for anything other than indexing an array. */
351
352static bool
353exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354{
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 357
ff802fa1 358 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
59a05b0c 363
ebfd146a
IR
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
369
370 'var' in the second case corresponds to a def, not a use,
b8698a0f 371 so USE cannot correspond to any operands that are not used
ebfd146a
IR
372 for array indexing.
373
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
ebfd146a
IR
376
377 if (!gimple_assign_copy_p (stmt))
378 return false;
59a05b0c
EB
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
ebfd146a 381 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
384
385 if (operand == use)
386 return true;
387
388 return false;
389}
390
391
b8698a0f 392/*
ebfd146a
IR
393 Function process_use.
394
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 398 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
ebfd146a
IR
402
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 410 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 411 of the respective DEF_STMT is left unchanged.
b8698a0f
L
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
416
417 Return true if everything is as expected. Return false otherwise. */
418
419static bool
b8698a0f 420process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 421 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 422 bool force)
ebfd146a
IR
423{
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
431
b8698a0f 432 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 433 that are used for address computation are not considered relevant. */
aec7ae7d 434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
435 return true;
436
24ee1384 437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 438 {
73fbfcad 439 if (dump_enabled_p ())
78c60e3d
SS
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "not vectorized: unsupported use in stmt.");
ebfd146a
IR
442 return false;
443 }
444
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
447
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
450 {
73fbfcad 451 if (dump_enabled_p ())
78c60e3d 452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
ebfd146a
IR
453 return true;
454 }
455
b8698a0f
L
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
468 {
73fbfcad 469 if (dump_enabled_p ())
78c60e3d
SS
470 dump_printf_loc (MSG_NOTE, vect_location,
471 "reduc-stmt defining reduc-phi in the same nest.");
ebfd146a
IR
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
477 return true;
478 }
479
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488 {
73fbfcad 489 if (dump_enabled_p ())
78c60e3d
SS
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 492
ebfd146a
IR
493 switch (relevant)
494 {
8644a673 495 case vect_unused_in_scope:
7c5222ff
IR
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 498 break;
7c5222ff 499
ebfd146a 500 case vect_used_in_outer_by_reduction:
7c5222ff 501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
502 relevant = vect_used_by_reduction;
503 break;
7c5222ff 504
ebfd146a 505 case vect_used_in_outer:
7c5222ff 506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 507 relevant = vect_used_in_scope;
ebfd146a 508 break;
7c5222ff 509
8644a673 510 case vect_used_in_scope:
ebfd146a
IR
511 break;
512
513 default:
514 gcc_unreachable ();
b8698a0f 515 }
ebfd146a
IR
516 }
517
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
520 ...
521 inner-loop:
522 d = def_stmt
06066f92 523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d
SS
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
b8698a0f 534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
537 break;
538
ebfd146a
IR
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
542
8644a673 543 case vect_used_in_scope:
ebfd146a
IR
544 relevant = vect_used_in_outer;
545 break;
546
547 default:
548 gcc_unreachable ();
549 }
550 }
551
83197f37
IR
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
554 return true;
555}
556
557
558/* Function vect_mark_stmts_to_be_vectorized.
559
560 Not all stmts in the loop need to be vectorized. For example:
561
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
566
567 3. j = j + 1
568
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
571
572 This pass detects such stmts. */
573
574bool
575vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576{
9771b263 577 vec<gimple> worklist;
ebfd146a
IR
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
06066f92
IR
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
ebfd146a 590
73fbfcad 591 if (dump_enabled_p ())
78c60e3d
SS
592 dump_printf_loc (MSG_NOTE, vect_location,
593 "=== vect_mark_stmts_to_be_vectorized ===");
ebfd146a 594
9771b263 595 worklist.create (64);
ebfd146a
IR
596
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
599 {
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 602 {
ebfd146a 603 phi = gsi_stmt (si);
73fbfcad 604 if (dump_enabled_p ())
ebfd146a 605 {
78c60e3d
SS
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
608 }
609
610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 611 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
612 }
613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614 {
615 stmt = gsi_stmt (si);
73fbfcad 616 if (dump_enabled_p ())
ebfd146a 617 {
78c60e3d
SS
618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 620 }
ebfd146a
IR
621
622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
624 }
625 }
626
627 /* 2. Process_worklist */
9771b263 628 while (worklist.length () > 0)
ebfd146a
IR
629 {
630 use_operand_p use_p;
631 ssa_op_iter iter;
632
9771b263 633 stmt = worklist.pop ();
73fbfcad 634 if (dump_enabled_p ())
ebfd146a 635 {
78c60e3d
SS
636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
638 }
639
b8698a0f
L
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
642 liveness and relevance properties of STMT. */
643 stmt_vinfo = vinfo_for_stmt (stmt);
644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
654 live_p = false
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 657 those that are used by a reduction computation, and those that are
ff802fa1 658 (also) used by a regular computation. This allows us later on to
b8698a0f 659 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 660 order of the results that they produce does not have to be kept. */
ebfd146a 661
06066f92
IR
662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663 tmp_relevant = relevant;
664 switch (def_type)
ebfd146a 665 {
06066f92
IR
666 case vect_reduction_def:
667 switch (tmp_relevant)
668 {
669 case vect_unused_in_scope:
670 relevant = vect_used_by_reduction;
671 break;
672
673 case vect_used_by_reduction:
674 if (gimple_code (stmt) == GIMPLE_PHI)
675 break;
676 /* fall through */
677
678 default:
73fbfcad 679 if (dump_enabled_p ())
78c60e3d
SS
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681 "unsupported use of reduction.");
9771b263 682 worklist.release ();
06066f92
IR
683 return false;
684 }
685
b8698a0f 686 live_p = false;
06066f92 687 break;
b8698a0f 688
06066f92
IR
689 case vect_nested_cycle:
690 if (tmp_relevant != vect_unused_in_scope
691 && tmp_relevant != vect_used_in_outer_by_reduction
692 && tmp_relevant != vect_used_in_outer)
693 {
73fbfcad 694 if (dump_enabled_p ())
78c60e3d
SS
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of nested cycle.");
7c5222ff 697
9771b263 698 worklist.release ();
06066f92
IR
699 return false;
700 }
7c5222ff 701
b8698a0f
L
702 live_p = false;
703 break;
704
06066f92
IR
705 case vect_double_reduction_def:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_by_reduction)
708 {
73fbfcad 709 if (dump_enabled_p ())
78c60e3d
SS
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of double reduction.");
7c5222ff 712
9771b263 713 worklist.release ();
7c5222ff 714 return false;
06066f92
IR
715 }
716
717 live_p = false;
b8698a0f 718 break;
7c5222ff 719
06066f92
IR
720 default:
721 break;
7c5222ff 722 }
b8698a0f 723
aec7ae7d 724 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
69d2aade
JJ
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 738 live_p, relevant, &worklist, false)
69d2aade 739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 740 live_p, relevant, &worklist, false))
69d2aade 741 {
9771b263 742 worklist.release ();
69d2aade
JJ
743 return false;
744 }
745 i = 2;
746 }
747 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 748 {
69d2aade 749 op = gimple_op (stmt, i);
9d5e7640 750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 751 &worklist, false))
9d5e7640 752 {
9771b263 753 worklist.release ();
9d5e7640
IR
754 return false;
755 }
756 }
757 }
758 else if (is_gimple_call (stmt))
759 {
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 {
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 764 &worklist, false))
9d5e7640 765 {
9771b263 766 worklist.release ();
9d5e7640
IR
767 return false;
768 }
769 }
770 }
771 }
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 {
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 777 &worklist, false))
9d5e7640 778 {
9771b263 779 worklist.release ();
9d5e7640
IR
780 return false;
781 }
782 }
aec7ae7d
JJ
783
784 if (STMT_VINFO_GATHER_P (stmt_vinfo))
785 {
786 tree off;
787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 gcc_assert (decl);
789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 &worklist, true))
791 {
9771b263 792 worklist.release ();
aec7ae7d
JJ
793 return false;
794 }
795 }
ebfd146a
IR
796 } /* while worklist */
797
9771b263 798 worklist.release ();
ebfd146a
IR
799 return true;
800}
801
802
b8698a0f 803/* Function vect_model_simple_cost.
ebfd146a 804
b8698a0f 805 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
808
809void
b8698a0f 810vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
811 enum vect_def_type *dt,
812 stmt_vector_for_cost *prologue_cost_vec,
813 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
814{
815 int i;
92345349 816 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
817
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info))
820 return;
821
ebfd146a
IR
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i = 0; i < 2; i++)
92345349
BS
824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 stmt_info, 0, vect_prologue);
c3e7ee41
BS
827
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 stmt_info, 0, vect_body);
c3e7ee41 831
73fbfcad 832 if (dump_enabled_p ())
78c60e3d
SS
833 dump_printf_loc (MSG_NOTE, vect_location,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
836}
837
838
8bd37302
BS
839/* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
843
844static void
845vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 enum vect_def_type *dt, int pwr)
847{
848 int i, tmp;
92345349 849 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852 void *target_cost_data;
8bd37302
BS
853
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
857
c3e7ee41
BS
858 if (loop_vinfo)
859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860 else
861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862
8bd37302
BS
863 for (i = 0; i < pwr + 1; i++)
864 {
865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 (i + 1) : i;
c3e7ee41 867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
868 vec_promote_demote, stmt_info, 0,
869 vect_body);
8bd37302
BS
870 }
871
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i = 0; i < 2; i++)
92345349
BS
874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 stmt_info, 0, vect_prologue);
8bd37302 877
73fbfcad 878 if (dump_enabled_p ())
78c60e3d
SS
879 dump_printf_loc (MSG_NOTE, vect_location,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost, prologue_cost);
8bd37302
BS
882}
883
0d0293ac 884/* Function vect_cost_group_size
b8698a0f 885
0d0293ac 886 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
889
890static int
0d0293ac 891vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 892{
e14c1050 893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
894
895 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 896 return GROUP_SIZE (stmt_info);
ebfd146a
IR
897
898 return 1;
899}
900
901
902/* Function vect_model_store_cost
903
0d0293ac
MM
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
906
907void
b8698a0f 908vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 909 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
910 slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
913{
914 int group_size;
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
916 struct data_reference *first_dr;
917 gimple first_stmt;
ebfd146a
IR
918
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info))
921 return;
922
8644a673 923 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
ebfd146a 926
0d0293ac
MM
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
929 {
930 if (slp_node)
931 {
9771b263 932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
933 group_size = 1;
934 }
935 else
936 {
e14c1050 937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 938 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
939 }
940
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942 }
0d0293ac 943 /* Not a grouped access. */
ebfd146a 944 else
720f5239
IR
945 {
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
948 }
ebfd146a 949
272c6793 950 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
955 {
956 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
957
958 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 stmt_info, 0, vect_body);
ebfd146a 961
73fbfcad 962 if (dump_enabled_p ())
78c60e3d
SS
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_store_cost: strided group_size = %d .",
965 group_size);
ebfd146a
IR
966 }
967
968 /* Costs of the stores. */
92345349 969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 970
73fbfcad 971 if (dump_enabled_p ())
78c60e3d
SS
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
975}
976
977
720f5239
IR
978/* Calculate cost of DR's memory access. */
979void
980vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 981 unsigned int *inside_cost,
92345349 982 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
983{
984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
985 gimple stmt = DR_STMT (dr);
986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
987
988 switch (alignment_support_scheme)
989 {
990 case dr_aligned:
991 {
92345349
BS
992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 vector_store, stmt_info, 0,
994 vect_body);
720f5239 995
73fbfcad 996 if (dump_enabled_p ())
78c60e3d
SS
997 dump_printf_loc (MSG_NOTE, vect_location,
998 "vect_model_store_cost: aligned.");
720f5239
IR
999 break;
1000 }
1001
1002 case dr_unaligned_supported:
1003 {
720f5239 1004 /* Here, we assign an additional cost for the unaligned store. */
92345349 1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1006 unaligned_store, stmt_info,
92345349 1007 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1008 if (dump_enabled_p ())
78c60e3d
SS
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: unaligned supported by "
1011 "hardware.");
720f5239
IR
1012 break;
1013 }
1014
38eec4c6
UW
1015 case dr_unaligned_unsupported:
1016 {
1017 *inside_cost = VECT_MAX_COST;
1018
73fbfcad 1019 if (dump_enabled_p ())
78c60e3d
SS
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021 "vect_model_store_cost: unsupported access.");
38eec4c6
UW
1022 break;
1023 }
1024
720f5239
IR
1025 default:
1026 gcc_unreachable ();
1027 }
1028}
1029
1030
ebfd146a
IR
1031/* Function vect_model_load_cost
1032
0d0293ac
MM
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1035 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1036 access scheme chosen. */
1037
1038void
92345349
BS
1039vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 bool load_lanes_p, slp_tree slp_node,
1041 stmt_vector_for_cost *prologue_cost_vec,
1042 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1043{
1044 int group_size;
ebfd146a
IR
1045 gimple first_stmt;
1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1047 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1048
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info))
1051 return;
1052
0d0293ac 1053 /* Grouped accesses? */
e14c1050 1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1056 {
0d0293ac 1057 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 }
0d0293ac 1060 /* Not a grouped access. */
ebfd146a
IR
1061 else
1062 {
1063 group_size = 1;
1064 first_dr = dr;
1065 }
1066
272c6793 1067 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1072 {
1073 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1074 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
ebfd146a 1077
73fbfcad 1078 if (dump_enabled_p ())
78c60e3d
SS
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .",
1081 group_size);
ebfd146a
IR
1082 }
1083
1084 /* The loads themselves. */
a82960aa
RG
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086 {
a21892ad
BS
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1089 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1091 scalar_load, stmt_info, 0, vect_body);
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
a82960aa
RG
1094 }
1095 else
1096 vect_get_load_cost (first_dr, ncopies,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 || group_size > 1 || slp_node),
92345349
BS
1099 &inside_cost, &prologue_cost,
1100 prologue_cost_vec, body_cost_vec, true);
720f5239 1101
73fbfcad 1102 if (dump_enabled_p ())
78c60e3d
SS
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost, prologue_cost);
720f5239
IR
1106}
1107
1108
1109/* Calculate cost of DR's memory access. */
1110void
1111vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1112 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1113 unsigned int *prologue_cost,
1114 stmt_vector_for_cost *prologue_cost_vec,
1115 stmt_vector_for_cost *body_cost_vec,
1116 bool record_prologue_costs)
720f5239
IR
1117{
1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1119 gimple stmt = DR_STMT (dr);
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1121
1122 switch (alignment_support_scheme)
ebfd146a
IR
1123 {
1124 case dr_aligned:
1125 {
92345349
BS
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 stmt_info, 0, vect_body);
ebfd146a 1128
73fbfcad 1129 if (dump_enabled_p ())
78c60e3d
SS
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_load_cost: aligned.");
ebfd146a
IR
1132
1133 break;
1134 }
1135 case dr_unaligned_supported:
1136 {
720f5239 1137 /* Here, we assign an additional cost for the unaligned load. */
92345349 1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1139 unaligned_load, stmt_info,
92345349 1140 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1141
73fbfcad 1142 if (dump_enabled_p ())
78c60e3d
SS
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: unaligned supported by "
1145 "hardware.");
ebfd146a
IR
1146
1147 break;
1148 }
1149 case dr_explicit_realign:
1150 {
92345349
BS
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 vector_load, stmt_info, 0, vect_body);
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1155
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
92345349 1158 prologue costs. */
ebfd146a 1159 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 stmt_info, 0, vect_body);
ebfd146a 1162
73fbfcad 1163 if (dump_enabled_p ())
78c60e3d
SS
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: explicit realign");
8bd37302 1166
ebfd146a
IR
1167 break;
1168 }
1169 case dr_explicit_realign_optimized:
1170 {
73fbfcad 1171 if (dump_enabled_p ())
78c60e3d
SS
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned software "
1174 "pipelined.");
ebfd146a
IR
1175
1176 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1177 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1178 if this is an access in a group of loads, which provide grouped
ebfd146a 1179 access, then the above cost should only be considered for one
ff802fa1 1180 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1181 and a realignment op. */
1182
92345349 1183 if (add_realign_cost && record_prologue_costs)
ebfd146a 1184 {
92345349
BS
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
ebfd146a 1188 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 vector_stmt, stmt_info,
1191 0, vect_prologue);
ebfd146a
IR
1192 }
1193
92345349
BS
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 stmt_info, 0, vect_body);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 stmt_info, 0, vect_body);
8bd37302 1198
73fbfcad 1199 if (dump_enabled_p ())
78c60e3d
SS
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign optimized");
8bd37302 1202
ebfd146a
IR
1203 break;
1204 }
1205
38eec4c6
UW
1206 case dr_unaligned_unsupported:
1207 {
1208 *inside_cost = VECT_MAX_COST;
1209
73fbfcad 1210 if (dump_enabled_p ())
78c60e3d
SS
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212 "vect_model_load_cost: unsupported access.");
38eec4c6
UW
1213 break;
1214 }
1215
ebfd146a
IR
1216 default:
1217 gcc_unreachable ();
1218 }
ebfd146a
IR
1219}
1220
418b7df3
RG
1221/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1223
418b7df3
RG
1224static void
1225vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1226{
ebfd146a 1227 if (gsi)
418b7df3 1228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1229 else
1230 {
418b7df3 1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1233
a70d6342
IR
1234 if (loop_vinfo)
1235 {
1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1237 basic_block new_bb;
1238 edge pe;
a70d6342
IR
1239
1240 if (nested_in_vect_loop_p (loop, stmt))
1241 loop = loop->inner;
b8698a0f 1242
a70d6342 1243 pe = loop_preheader_edge (loop);
418b7df3 1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1245 gcc_assert (!new_bb);
1246 }
1247 else
1248 {
1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250 basic_block bb;
1251 gimple_stmt_iterator gsi_bb_start;
1252
1253 gcc_assert (bb_vinfo);
1254 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1255 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1257 }
ebfd146a
IR
1258 }
1259
73fbfcad 1260 if (dump_enabled_p ())
ebfd146a 1261 {
78c60e3d
SS
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1265 }
418b7df3
RG
1266}
1267
1268/* Function vect_init_vector.
ebfd146a 1269
5467ee52
RG
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
418b7df3
RG
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1277
1278tree
5467ee52 1279vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1280{
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1285
5467ee52
RG
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1288 {
5467ee52 1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1290 {
5467ee52
RG
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1293 else
1294 {
83d5977e 1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1297 new_temp, val,
418b7df3 1298 NULL_TREE);
418b7df3 1299 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1300 val = new_temp;
418b7df3
RG
1301 }
1302 }
5467ee52 1303 val = build_vector_from_val (type, val);
418b7df3
RG
1304 }
1305
5467ee52 1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1307 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1313}
1314
a70d6342 1315
ebfd146a
IR
1316/* Function vect_get_vec_def_for_operand.
1317
ff802fa1 1318 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1319 used in the vectorized stmt for STMT.
1320
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1326
1327tree
1328vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329{
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1335 unsigned int nunits;
ebfd146a 1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1337 tree def;
ebfd146a
IR
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1341
73fbfcad 1342 if (dump_enabled_p ())
ebfd146a 1343 {
78c60e3d
SS
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
ebfd146a
IR
1347 }
1348
24ee1384
IR
1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 &def_stmt, &def, &dt);
ebfd146a 1351 gcc_assert (is_simple_use);
73fbfcad 1352 if (dump_enabled_p ())
ebfd146a 1353 {
78c60e3d 1354 int loc_printed = 0;
ebfd146a
IR
1355 if (def)
1356 {
78c60e3d
SS
1357 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1358 loc_printed = 1;
1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
ebfd146a
IR
1360 }
1361 if (def_stmt)
1362 {
78c60e3d
SS
1363 if (loc_printed)
1364 dump_printf (MSG_NOTE, " def_stmt = ");
1365 else
1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1368 }
1369 }
1370
1371 switch (dt)
1372 {
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def:
1375 {
7569a6cc
RG
1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 gcc_assert (vector_type);
9dc3f7de 1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1379
b8698a0f 1380 if (scalar_def)
ebfd146a
IR
1381 *scalar_def = op;
1382
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1384 if (dump_enabled_p ())
78c60e3d
SS
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "Create vector_cst. nunits = %d", nunits);
ebfd146a 1387
418b7df3 1388 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1389 }
1390
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1392 case vect_external_def:
ebfd146a
IR
1393 {
1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 gcc_assert (vector_type);
ebfd146a 1396
b8698a0f 1397 if (scalar_def)
ebfd146a
IR
1398 *scalar_def = def;
1399
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1401 if (dump_enabled_p ())
78c60e3d 1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
ebfd146a 1403
418b7df3 1404 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1405 }
1406
1407 /* Case 3: operand is defined inside the loop. */
8644a673 1408 case vect_internal_def:
ebfd146a 1409 {
b8698a0f 1410 if (scalar_def)
ebfd146a
IR
1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1415
ebfd146a 1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1417 /* Get vectorized pattern statement. */
1418 if (!vec_stmt
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info))
1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1423 gcc_assert (vec_stmt);
1424 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 vec_oprnd = PHI_RESULT (vec_stmt);
1426 else if (is_gimple_call (vec_stmt))
1427 vec_oprnd = gimple_call_lhs (vec_stmt);
1428 else
1429 vec_oprnd = gimple_assign_lhs (vec_stmt);
1430 return vec_oprnd;
1431 }
1432
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def:
06066f92 1435 case vect_double_reduction_def:
7c5222ff 1436 case vect_nested_cycle:
ebfd146a
IR
1437 {
1438 struct loop *loop;
1439
1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1441 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1442
1443 /* Get the def before the loop */
1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445 return get_initial_def_for_reduction (stmt, op, scalar_def);
1446 }
1447
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def:
1450 {
1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info = vinfo_for_stmt (def_stmt);
1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1456 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 vec_oprnd = PHI_RESULT (vec_stmt);
1458 else
1459 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1460 return vec_oprnd;
1461 }
1462
1463 default:
1464 gcc_unreachable ();
1465 }
1466}
1467
1468
1469/* Function vect_get_vec_def_for_stmt_copy
1470
ff802fa1 1471 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1474 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1476 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1477 DT is the type of the vector def VEC_OPRND.
1478
1479 Context:
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1482 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1483 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1487 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1490
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1492
ebfd146a
IR
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1496 VS1.3: vx.3 = memref3
ebfd146a
IR
1497
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1502
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
b8698a0f
L
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1507 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1508 returns the vector-def 'vx.0'.
1509
b8698a0f
L
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514
b8698a0f
L
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1524
1525tree
1526vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527{
1528 gimple vec_stmt_for_operand;
1529 stmt_vec_info def_stmt_info;
1530
1531 /* Do nothing; can reuse same def. */
8644a673 1532 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1533 return vec_oprnd;
1534
1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537 gcc_assert (def_stmt_info);
1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539 gcc_assert (vec_stmt_for_operand);
1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546}
1547
1548
1549/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1551
1552static void
b8698a0f 1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
ebfd146a 1556{
9771b263 1557 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1560 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1561
9771b263 1562 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1563 {
9771b263 1564 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1566 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1567 }
1568}
1569
1570
d092494c
IR
1571/* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
ebfd146a 1574
d092494c 1575void
ebfd146a 1576vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
d092494c 1579 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1580{
1581 if (slp_node)
d092494c
IR
1582 {
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1584 vec<tree> ops;
1585 ops.create (nops);
37b5ec8f 1586 vec<vec<tree> > vec_defs;
9771b263 1587 vec_defs.create (nops);
d092494c 1588
9771b263 1589 ops.quick_push (op0);
d092494c 1590 if (op1)
9771b263 1591 ops.quick_push (op1);
d092494c
IR
1592
1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594
37b5ec8f 1595 *vec_oprnds0 = vec_defs[0];
d092494c 1596 if (op1)
37b5ec8f 1597 *vec_oprnds1 = vec_defs[1];
d092494c 1598
9771b263
DN
1599 ops.release ();
1600 vec_defs.release ();
d092494c 1601 }
ebfd146a
IR
1602 else
1603 {
1604 tree vec_oprnd;
1605
9771b263 1606 vec_oprnds0->create (1);
b8698a0f 1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1608 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1609
1610 if (op1)
1611 {
9771b263 1612 vec_oprnds1->create (1);
b8698a0f 1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1614 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1615 }
1616 }
1617}
1618
1619
1620/* Function vect_finish_stmt_generation.
1621
1622 Insert a new stmt. */
1623
1624void
1625vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 gimple_stmt_iterator *gsi)
1627{
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1631
1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633
54e8e2c3
RG
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1636 {
1637 gimple at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 {
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 && ((is_gimple_assign (vec_stmt)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 || (is_gimple_call (vec_stmt)
1651 && !(gimple_call_flags (vec_stmt)
1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 {
1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 gimple_set_vdef (vec_stmt, new_vdef);
1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 }
1658 }
1659 }
ebfd146a
IR
1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661
b8698a0f 1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1663 bb_vinfo));
ebfd146a 1664
73fbfcad 1665 if (dump_enabled_p ())
ebfd146a 1666 {
78c60e3d
SS
1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1669 }
1670
ad885386 1671 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1672}
1673
1674/* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1677
1678tree
1679vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680{
1681 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1682
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1687
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1692
62f7fd21 1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1694 vectype_in);
1695}
1696
1697/* Function vectorizable_call.
1698
b8698a0f
L
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704static bool
190c2236
JJ
1705vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
ebfd146a
IR
1707{
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1718 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1719 gimple def_stmt;
0502fb85
UB
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1722 gimple new_stmt = NULL;
ebfd146a 1723 int ncopies, j;
6e1aa848 1724 vec<tree> vargs = vNULL;
ebfd146a
IR
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
9d5e7640 1727 tree lhs;
ebfd146a 1728
190c2236 1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1730 return false;
1731
8644a673 1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1733 return false;
1734
ebfd146a
IR
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1738
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1741
822ba6d7 1742 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1743 return false;
1744
b690cc0f
RG
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
ebfd146a
IR
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
b690cc0f 1749 vectype_in = NULL_TREE;
ebfd146a
IR
1750 nargs = gimple_call_num_args (stmt);
1751
1b1562a5
MM
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1756 return false;
1757
74bf76ed
JJ
1758 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1759 if (gimple_call_internal_p (stmt)
1760 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1761 {
1762 nargs = 0;
1763 rhs_type = unsigned_type_node;
1764 }
1765
ebfd146a
IR
1766 for (i = 0; i < nargs; i++)
1767 {
b690cc0f
RG
1768 tree opvectype;
1769
ebfd146a
IR
1770 op = gimple_call_arg (stmt, i);
1771
1772 /* We can only handle calls with arguments of the same type. */
1773 if (rhs_type
8533c9d8 1774 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1775 {
73fbfcad 1776 if (dump_enabled_p ())
78c60e3d
SS
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "argument types differ.");
ebfd146a
IR
1779 return false;
1780 }
b690cc0f
RG
1781 if (!rhs_type)
1782 rhs_type = TREE_TYPE (op);
ebfd146a 1783
24ee1384 1784 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1785 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1786 {
73fbfcad 1787 if (dump_enabled_p ())
78c60e3d
SS
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "use not simple.");
ebfd146a
IR
1790 return false;
1791 }
ebfd146a 1792
b690cc0f
RG
1793 if (!vectype_in)
1794 vectype_in = opvectype;
1795 else if (opvectype
1796 && opvectype != vectype_in)
1797 {
73fbfcad 1798 if (dump_enabled_p ())
78c60e3d
SS
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "argument vector types differ.");
b690cc0f
RG
1801 return false;
1802 }
1803 }
1804 /* If all arguments are external or constant defs use a vector type with
1805 the same size as the output vector type. */
ebfd146a 1806 if (!vectype_in)
b690cc0f 1807 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1808 if (vec_stmt)
1809 gcc_assert (vectype_in);
1810 if (!vectype_in)
1811 {
73fbfcad 1812 if (dump_enabled_p ())
7d8930a0 1813 {
78c60e3d
SS
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "no vectype for scalar type ");
1816 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
7d8930a0
IR
1817 }
1818
1819 return false;
1820 }
ebfd146a
IR
1821
1822 /* FORNOW */
b690cc0f
RG
1823 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1824 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1825 if (nunits_in == nunits_out / 2)
1826 modifier = NARROW;
1827 else if (nunits_out == nunits_in)
1828 modifier = NONE;
1829 else if (nunits_out == nunits_in / 2)
1830 modifier = WIDEN;
1831 else
1832 return false;
1833
1834 /* For now, we only vectorize functions if a target specific builtin
1835 is available. TODO -- in some cases, it might be profitable to
1836 insert the calls for pieces of the vector, in order to be able
1837 to vectorize other operations in the loop. */
1838 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1839 if (fndecl == NULL_TREE)
1840 {
74bf76ed
JJ
1841 if (gimple_call_internal_p (stmt)
1842 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1843 && !slp_node
1844 && loop_vinfo
1845 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1846 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1847 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1848 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1849 {
1850 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1851 { 0, 1, 2, ... vf - 1 } vector. */
1852 gcc_assert (nargs == 0);
1853 }
1854 else
1855 {
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "function is not vectorizable.");
1859 return false;
1860 }
ebfd146a
IR
1861 }
1862
5006671f 1863 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1864
190c2236
JJ
1865 if (slp_node || PURE_SLP_STMT (stmt_info))
1866 ncopies = 1;
1867 else if (modifier == NARROW)
ebfd146a
IR
1868 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1869 else
1870 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1871
1872 /* Sanity check: make sure that at least one copy of the vectorized stmt
1873 needs to be generated. */
1874 gcc_assert (ncopies >= 1);
1875
1876 if (!vec_stmt) /* transformation not required. */
1877 {
1878 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1879 if (dump_enabled_p ())
78c60e3d 1880 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
c3e7ee41 1881 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1882 return true;
1883 }
1884
1885 /** Transform. **/
1886
73fbfcad 1887 if (dump_enabled_p ())
78c60e3d 1888 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
ebfd146a
IR
1889
1890 /* Handle def. */
1891 scalar_dest = gimple_call_lhs (stmt);
1892 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1893
1894 prev_stmt_info = NULL;
1895 switch (modifier)
1896 {
1897 case NONE:
1898 for (j = 0; j < ncopies; ++j)
1899 {
1900 /* Build argument list for the vectorized call. */
1901 if (j == 0)
9771b263 1902 vargs.create (nargs);
ebfd146a 1903 else
9771b263 1904 vargs.truncate (0);
ebfd146a 1905
190c2236
JJ
1906 if (slp_node)
1907 {
37b5ec8f 1908 vec<vec<tree> > vec_defs;
9771b263
DN
1909 vec_defs.create (nargs);
1910 vec<tree> vec_oprnds0;
190c2236
JJ
1911
1912 for (i = 0; i < nargs; i++)
9771b263 1913 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1914 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1915 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1916
1917 /* Arguments are ready. Create the new vector stmt. */
9771b263 1918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1919 {
1920 size_t k;
1921 for (k = 0; k < nargs; k++)
1922 {
37b5ec8f 1923 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1924 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1925 }
1926 new_stmt = gimple_build_call_vec (fndecl, vargs);
1927 new_temp = make_ssa_name (vec_dest, new_stmt);
1928 gimple_call_set_lhs (new_stmt, new_temp);
1929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1931 }
1932
1933 for (i = 0; i < nargs; i++)
1934 {
37b5ec8f 1935 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1936 vec_oprndsi.release ();
190c2236 1937 }
9771b263 1938 vec_defs.release ();
190c2236
JJ
1939 continue;
1940 }
1941
ebfd146a
IR
1942 for (i = 0; i < nargs; i++)
1943 {
1944 op = gimple_call_arg (stmt, i);
1945 if (j == 0)
1946 vec_oprnd0
1947 = vect_get_vec_def_for_operand (op, stmt, NULL);
1948 else
63827fb8
IR
1949 {
1950 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1951 vec_oprnd0
1952 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1953 }
ebfd146a 1954
9771b263 1955 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1956 }
1957
74bf76ed
JJ
1958 if (gimple_call_internal_p (stmt)
1959 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1960 {
1961 tree *v = XALLOCAVEC (tree, nunits_out);
1962 int k;
1963 for (k = 0; k < nunits_out; ++k)
1964 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1965 tree cst = build_vector (vectype_out, v);
1966 tree new_var
1967 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1968 gimple init_stmt = gimple_build_assign (new_var, cst);
1969 new_temp = make_ssa_name (new_var, init_stmt);
1970 gimple_assign_set_lhs (init_stmt, new_temp);
1971 vect_init_vector_1 (stmt, init_stmt, NULL);
1972 new_temp = make_ssa_name (vec_dest, NULL);
1973 new_stmt = gimple_build_assign (new_temp,
1974 gimple_assign_lhs (init_stmt));
1975 }
1976 else
1977 {
1978 new_stmt = gimple_build_call_vec (fndecl, vargs);
1979 new_temp = make_ssa_name (vec_dest, new_stmt);
1980 gimple_call_set_lhs (new_stmt, new_temp);
1981 }
ebfd146a
IR
1982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1983
1984 if (j == 0)
1985 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1986 else
1987 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1988
1989 prev_stmt_info = vinfo_for_stmt (new_stmt);
1990 }
1991
1992 break;
1993
1994 case NARROW:
1995 for (j = 0; j < ncopies; ++j)
1996 {
1997 /* Build argument list for the vectorized call. */
1998 if (j == 0)
9771b263 1999 vargs.create (nargs * 2);
ebfd146a 2000 else
9771b263 2001 vargs.truncate (0);
ebfd146a 2002
190c2236
JJ
2003 if (slp_node)
2004 {
37b5ec8f 2005 vec<vec<tree> > vec_defs;
9771b263
DN
2006 vec_defs.create (nargs);
2007 vec<tree> vec_oprnds0;
190c2236
JJ
2008
2009 for (i = 0; i < nargs; i++)
9771b263 2010 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2011 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2012 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2013
2014 /* Arguments are ready. Create the new vector stmt. */
9771b263 2015 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2016 {
2017 size_t k;
9771b263 2018 vargs.truncate (0);
190c2236
JJ
2019 for (k = 0; k < nargs; k++)
2020 {
37b5ec8f 2021 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2022 vargs.quick_push (vec_oprndsk[i]);
2023 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2024 }
2025 new_stmt = gimple_build_call_vec (fndecl, vargs);
2026 new_temp = make_ssa_name (vec_dest, new_stmt);
2027 gimple_call_set_lhs (new_stmt, new_temp);
2028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2029 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2030 }
2031
2032 for (i = 0; i < nargs; i++)
2033 {
37b5ec8f 2034 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2035 vec_oprndsi.release ();
190c2236 2036 }
9771b263 2037 vec_defs.release ();
190c2236
JJ
2038 continue;
2039 }
2040
ebfd146a
IR
2041 for (i = 0; i < nargs; i++)
2042 {
2043 op = gimple_call_arg (stmt, i);
2044 if (j == 0)
2045 {
2046 vec_oprnd0
2047 = vect_get_vec_def_for_operand (op, stmt, NULL);
2048 vec_oprnd1
63827fb8 2049 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2050 }
2051 else
2052 {
336ecb65 2053 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2054 vec_oprnd0
63827fb8 2055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2056 vec_oprnd1
63827fb8 2057 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2058 }
2059
9771b263
DN
2060 vargs.quick_push (vec_oprnd0);
2061 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2062 }
2063
2064 new_stmt = gimple_build_call_vec (fndecl, vargs);
2065 new_temp = make_ssa_name (vec_dest, new_stmt);
2066 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2068
2069 if (j == 0)
2070 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2071 else
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2073
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2075 }
2076
2077 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2078
2079 break;
2080
2081 case WIDEN:
2082 /* No current target implements this case. */
2083 return false;
2084 }
2085
9771b263 2086 vargs.release ();
ebfd146a
IR
2087
2088 /* Update the exception handling table with the vector stmt if necessary. */
2089 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2090 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2091
2092 /* The call in STMT might prevent it from being removed in dce.
2093 We however cannot remove it here, due to the way the ssa name
2094 it defines is mapped to the new definition. So just replace
2095 rhs of the statement with something harmless. */
2096
dd34c087
JJ
2097 if (slp_node)
2098 return true;
2099
ebfd146a 2100 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2101 if (is_pattern_stmt_p (stmt_info))
2102 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2103 else
2104 lhs = gimple_call_lhs (stmt);
2105 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2106 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2107 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2108 STMT_VINFO_STMT (stmt_info) = new_stmt;
2109 gsi_replace (gsi, new_stmt, false);
2110 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2111
2112 return true;
2113}
2114
2115
2116/* Function vect_gen_widened_results_half
2117
2118 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2119 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2120 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2121 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2122 needs to be created (DECL is a function-decl of a target-builtin).
2123 STMT is the original scalar stmt that we are vectorizing. */
2124
2125static gimple
2126vect_gen_widened_results_half (enum tree_code code,
2127 tree decl,
2128 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2129 tree vec_dest, gimple_stmt_iterator *gsi,
2130 gimple stmt)
b8698a0f 2131{
ebfd146a 2132 gimple new_stmt;
b8698a0f
L
2133 tree new_temp;
2134
2135 /* Generate half of the widened result: */
2136 if (code == CALL_EXPR)
2137 {
2138 /* Target specific support */
ebfd146a
IR
2139 if (op_type == binary_op)
2140 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2141 else
2142 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2143 new_temp = make_ssa_name (vec_dest, new_stmt);
2144 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2145 }
2146 else
ebfd146a 2147 {
b8698a0f
L
2148 /* Generic support */
2149 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2150 if (op_type != binary_op)
2151 vec_oprnd1 = NULL;
2152 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2153 vec_oprnd1);
2154 new_temp = make_ssa_name (vec_dest, new_stmt);
2155 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2156 }
ebfd146a
IR
2157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2158
ebfd146a
IR
2159 return new_stmt;
2160}
2161
4a00c761
JJ
2162
2163/* Get vectorized definitions for loop-based vectorization. For the first
2164 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2165 scalar operand), and for the rest we get a copy with
2166 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2167 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2168 The vectors are collected into VEC_OPRNDS. */
2169
2170static void
2171vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2172 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2173{
2174 tree vec_oprnd;
2175
2176 /* Get first vector operand. */
2177 /* All the vector operands except the very first one (that is scalar oprnd)
2178 are stmt copies. */
2179 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2180 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2181 else
2182 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2183
9771b263 2184 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2185
2186 /* Get second vector operand. */
2187 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2188 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2189
2190 *oprnd = vec_oprnd;
2191
2192 /* For conversion in multiple steps, continue to get operands
2193 recursively. */
2194 if (multi_step_cvt)
2195 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2196}
2197
2198
2199/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2200 For multi-step conversions store the resulting vectors and call the function
2201 recursively. */
2202
2203static void
9771b263 2204vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2205 int multi_step_cvt, gimple stmt,
9771b263 2206 vec<tree> vec_dsts,
4a00c761
JJ
2207 gimple_stmt_iterator *gsi,
2208 slp_tree slp_node, enum tree_code code,
2209 stmt_vec_info *prev_stmt_info)
2210{
2211 unsigned int i;
2212 tree vop0, vop1, new_tmp, vec_dest;
2213 gimple new_stmt;
2214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2215
9771b263 2216 vec_dest = vec_dsts.pop ();
4a00c761 2217
9771b263 2218 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2219 {
2220 /* Create demotion operation. */
9771b263
DN
2221 vop0 = (*vec_oprnds)[i];
2222 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2223 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2224 new_tmp = make_ssa_name (vec_dest, new_stmt);
2225 gimple_assign_set_lhs (new_stmt, new_tmp);
2226 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2227
2228 if (multi_step_cvt)
2229 /* Store the resulting vector for next recursive call. */
9771b263 2230 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2231 else
2232 {
2233 /* This is the last step of the conversion sequence. Store the
2234 vectors in SLP_NODE or in vector info of the scalar statement
2235 (or in STMT_VINFO_RELATED_STMT chain). */
2236 if (slp_node)
9771b263 2237 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2238 else
2239 {
2240 if (!*prev_stmt_info)
2241 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2242 else
2243 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2244
2245 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2246 }
2247 }
2248 }
2249
2250 /* For multi-step demotion operations we first generate demotion operations
2251 from the source type to the intermediate types, and then combine the
2252 results (stored in VEC_OPRNDS) in demotion operation to the destination
2253 type. */
2254 if (multi_step_cvt)
2255 {
2256 /* At each level of recursion we have half of the operands we had at the
2257 previous level. */
9771b263 2258 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2259 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2260 stmt, vec_dsts, gsi, slp_node,
2261 VEC_PACK_TRUNC_EXPR,
2262 prev_stmt_info);
2263 }
2264
9771b263 2265 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2266}
2267
2268
2269/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2270 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2271 the resulting vectors and call the function recursively. */
2272
2273static void
9771b263
DN
2274vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2275 vec<tree> *vec_oprnds1,
4a00c761
JJ
2276 gimple stmt, tree vec_dest,
2277 gimple_stmt_iterator *gsi,
2278 enum tree_code code1,
2279 enum tree_code code2, tree decl1,
2280 tree decl2, int op_type)
2281{
2282 int i;
2283 tree vop0, vop1, new_tmp1, new_tmp2;
2284 gimple new_stmt1, new_stmt2;
6e1aa848 2285 vec<tree> vec_tmp = vNULL;
4a00c761 2286
9771b263
DN
2287 vec_tmp.create (vec_oprnds0->length () * 2);
2288 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2289 {
2290 if (op_type == binary_op)
9771b263 2291 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2292 else
2293 vop1 = NULL_TREE;
2294
2295 /* Generate the two halves of promotion operation. */
2296 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2297 op_type, vec_dest, gsi, stmt);
2298 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2299 op_type, vec_dest, gsi, stmt);
2300 if (is_gimple_call (new_stmt1))
2301 {
2302 new_tmp1 = gimple_call_lhs (new_stmt1);
2303 new_tmp2 = gimple_call_lhs (new_stmt2);
2304 }
2305 else
2306 {
2307 new_tmp1 = gimple_assign_lhs (new_stmt1);
2308 new_tmp2 = gimple_assign_lhs (new_stmt2);
2309 }
2310
2311 /* Store the results for the next step. */
9771b263
DN
2312 vec_tmp.quick_push (new_tmp1);
2313 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2314 }
2315
689eaba3 2316 vec_oprnds0->release ();
4a00c761
JJ
2317 *vec_oprnds0 = vec_tmp;
2318}
2319
2320
b8698a0f
L
2321/* Check if STMT performs a conversion operation, that can be vectorized.
2322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2323 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2325
2326static bool
2327vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2328 gimple *vec_stmt, slp_tree slp_node)
2329{
2330 tree vec_dest;
2331 tree scalar_dest;
4a00c761 2332 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2333 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2334 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2335 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2336 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2337 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2338 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2339 tree new_temp;
2340 tree def;
2341 gimple def_stmt;
2342 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2343 gimple new_stmt = NULL;
2344 stmt_vec_info prev_stmt_info;
2345 int nunits_in;
2346 int nunits_out;
2347 tree vectype_out, vectype_in;
4a00c761
JJ
2348 int ncopies, i, j;
2349 tree lhs_type, rhs_type;
ebfd146a 2350 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2351 vec<tree> vec_oprnds0 = vNULL;
2352 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2353 tree vop0;
4a00c761
JJ
2354 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2355 int multi_step_cvt = 0;
6e1aa848
DN
2356 vec<tree> vec_dsts = vNULL;
2357 vec<tree> interm_types = vNULL;
4a00c761
JJ
2358 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2359 int op_type;
2360 enum machine_mode rhs_mode;
2361 unsigned short fltsz;
ebfd146a
IR
2362
2363 /* Is STMT a vectorizable conversion? */
2364
4a00c761 2365 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2366 return false;
2367
8644a673 2368 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2369 return false;
2370
2371 if (!is_gimple_assign (stmt))
2372 return false;
2373
2374 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2375 return false;
2376
2377 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2378 if (!CONVERT_EXPR_CODE_P (code)
2379 && code != FIX_TRUNC_EXPR
2380 && code != FLOAT_EXPR
2381 && code != WIDEN_MULT_EXPR
2382 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2383 return false;
2384
4a00c761
JJ
2385 op_type = TREE_CODE_LENGTH (code);
2386
ebfd146a 2387 /* Check types of lhs and rhs. */
b690cc0f 2388 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2389 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2390 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2391
ebfd146a
IR
2392 op0 = gimple_assign_rhs1 (stmt);
2393 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2394
2395 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2396 && !((INTEGRAL_TYPE_P (lhs_type)
2397 && INTEGRAL_TYPE_P (rhs_type))
2398 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2399 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2400 return false;
2401
2402 if ((INTEGRAL_TYPE_P (lhs_type)
2403 && (TYPE_PRECISION (lhs_type)
2404 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2405 || (INTEGRAL_TYPE_P (rhs_type)
2406 && (TYPE_PRECISION (rhs_type)
2407 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2408 {
73fbfcad 2409 if (dump_enabled_p ())
78c60e3d
SS
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "type conversion to/from bit-precision unsupported.");
4a00c761
JJ
2412 return false;
2413 }
2414
b690cc0f 2415 /* Check the operands of the operation. */
24ee1384 2416 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2417 &def_stmt, &def, &dt[0], &vectype_in))
2418 {
73fbfcad 2419 if (dump_enabled_p ())
78c60e3d
SS
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "use not simple.");
b690cc0f
RG
2422 return false;
2423 }
4a00c761
JJ
2424 if (op_type == binary_op)
2425 {
2426 bool ok;
2427
2428 op1 = gimple_assign_rhs2 (stmt);
2429 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2430 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2431 OP1. */
2432 if (CONSTANT_CLASS_P (op0))
f5709183 2433 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2434 &def_stmt, &def, &dt[1], &vectype_in);
2435 else
f5709183 2436 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2437 &def, &dt[1]);
4a00c761
JJ
2438
2439 if (!ok)
2440 {
73fbfcad 2441 if (dump_enabled_p ())
78c60e3d
SS
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "use not simple.");
4a00c761
JJ
2444 return false;
2445 }
2446 }
2447
b690cc0f
RG
2448 /* If op0 is an external or constant defs use a vector type of
2449 the same size as the output vector type. */
ebfd146a 2450 if (!vectype_in)
b690cc0f 2451 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2452 if (vec_stmt)
2453 gcc_assert (vectype_in);
2454 if (!vectype_in)
2455 {
73fbfcad 2456 if (dump_enabled_p ())
4a00c761 2457 {
78c60e3d
SS
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "no vectype for scalar type ");
2460 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4a00c761 2461 }
7d8930a0
IR
2462
2463 return false;
2464 }
ebfd146a 2465
b690cc0f
RG
2466 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2467 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2468 if (nunits_in < nunits_out)
ebfd146a
IR
2469 modifier = NARROW;
2470 else if (nunits_out == nunits_in)
2471 modifier = NONE;
ebfd146a 2472 else
4a00c761 2473 modifier = WIDEN;
ebfd146a 2474
ff802fa1
IR
2475 /* Multiple types in SLP are handled by creating the appropriate number of
2476 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2477 case of SLP. */
437f4a00 2478 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2479 ncopies = 1;
4a00c761
JJ
2480 else if (modifier == NARROW)
2481 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2482 else
2483 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2484
ebfd146a
IR
2485 /* Sanity check: make sure that at least one copy of the vectorized stmt
2486 needs to be generated. */
2487 gcc_assert (ncopies >= 1);
2488
ebfd146a 2489 /* Supportable by target? */
4a00c761 2490 switch (modifier)
ebfd146a 2491 {
4a00c761
JJ
2492 case NONE:
2493 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2494 return false;
2495 if (supportable_convert_operation (code, vectype_out, vectype_in,
2496 &decl1, &code1))
2497 break;
2498 /* FALLTHRU */
2499 unsupported:
73fbfcad 2500 if (dump_enabled_p ())
78c60e3d
SS
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "conversion not supported by target.");
ebfd146a 2503 return false;
ebfd146a 2504
4a00c761
JJ
2505 case WIDEN:
2506 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2507 &code1, &code2, &multi_step_cvt,
2508 &interm_types))
4a00c761
JJ
2509 {
2510 /* Binary widening operation can only be supported directly by the
2511 architecture. */
2512 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2513 break;
2514 }
2515
2516 if (code != FLOAT_EXPR
2517 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2518 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2519 goto unsupported;
2520
2521 rhs_mode = TYPE_MODE (rhs_type);
2522 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2523 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2524 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2525 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2526 {
2527 cvt_type
2528 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2529 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2530 if (cvt_type == NULL_TREE)
2531 goto unsupported;
2532
2533 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2534 {
2535 if (!supportable_convert_operation (code, vectype_out,
2536 cvt_type, &decl1, &codecvt1))
2537 goto unsupported;
2538 }
2539 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2540 cvt_type, &codecvt1,
2541 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2542 &interm_types))
2543 continue;
2544 else
2545 gcc_assert (multi_step_cvt == 0);
2546
2547 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2548 vectype_in, &code1, &code2,
2549 &multi_step_cvt, &interm_types))
4a00c761
JJ
2550 break;
2551 }
2552
2553 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2554 goto unsupported;
2555
2556 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2557 codecvt2 = ERROR_MARK;
2558 else
2559 {
2560 multi_step_cvt++;
9771b263 2561 interm_types.safe_push (cvt_type);
4a00c761
JJ
2562 cvt_type = NULL_TREE;
2563 }
2564 break;
2565
2566 case NARROW:
2567 gcc_assert (op_type == unary_op);
2568 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2569 &code1, &multi_step_cvt,
2570 &interm_types))
2571 break;
2572
2573 if (code != FIX_TRUNC_EXPR
2574 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2575 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2576 goto unsupported;
2577
2578 rhs_mode = TYPE_MODE (rhs_type);
2579 cvt_type
2580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2581 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2582 if (cvt_type == NULL_TREE)
2583 goto unsupported;
2584 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2585 &decl1, &codecvt1))
2586 goto unsupported;
2587 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2588 &code1, &multi_step_cvt,
2589 &interm_types))
2590 break;
2591 goto unsupported;
2592
2593 default:
2594 gcc_unreachable ();
ebfd146a
IR
2595 }
2596
2597 if (!vec_stmt) /* transformation not required. */
2598 {
73fbfcad 2599 if (dump_enabled_p ())
78c60e3d
SS
2600 dump_printf_loc (MSG_NOTE, vect_location,
2601 "=== vectorizable_conversion ===");
4a00c761 2602 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2603 {
2604 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2605 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2606 }
4a00c761
JJ
2607 else if (modifier == NARROW)
2608 {
2609 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2610 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2611 }
2612 else
2613 {
2614 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2615 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2616 }
9771b263 2617 interm_types.release ();
ebfd146a
IR
2618 return true;
2619 }
2620
2621 /** Transform. **/
73fbfcad 2622 if (dump_enabled_p ())
78c60e3d
SS
2623 dump_printf_loc (MSG_NOTE, vect_location,
2624 "transform conversion. ncopies = %d.", ncopies);
ebfd146a 2625
4a00c761
JJ
2626 if (op_type == binary_op)
2627 {
2628 if (CONSTANT_CLASS_P (op0))
2629 op0 = fold_convert (TREE_TYPE (op1), op0);
2630 else if (CONSTANT_CLASS_P (op1))
2631 op1 = fold_convert (TREE_TYPE (op0), op1);
2632 }
2633
2634 /* In case of multi-step conversion, we first generate conversion operations
2635 to the intermediate types, and then from that types to the final one.
2636 We create vector destinations for the intermediate type (TYPES) received
2637 from supportable_*_operation, and store them in the correct order
2638 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2639 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2640 vec_dest = vect_create_destination_var (scalar_dest,
2641 (cvt_type && modifier == WIDEN)
2642 ? cvt_type : vectype_out);
9771b263 2643 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2644
2645 if (multi_step_cvt)
2646 {
9771b263
DN
2647 for (i = interm_types.length () - 1;
2648 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2649 {
2650 vec_dest = vect_create_destination_var (scalar_dest,
2651 intermediate_type);
9771b263 2652 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2653 }
2654 }
ebfd146a 2655
4a00c761 2656 if (cvt_type)
82294ec1
JJ
2657 vec_dest = vect_create_destination_var (scalar_dest,
2658 modifier == WIDEN
2659 ? vectype_out : cvt_type);
4a00c761
JJ
2660
2661 if (!slp_node)
2662 {
30862efc 2663 if (modifier == WIDEN)
4a00c761 2664 {
9771b263 2665 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
4a00c761 2666 if (op_type == binary_op)
9771b263 2667 vec_oprnds1.create (1);
4a00c761 2668 }
30862efc 2669 else if (modifier == NARROW)
9771b263
DN
2670 vec_oprnds0.create (
2671 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2672 }
2673 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2674 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2675
4a00c761 2676 last_oprnd = op0;
ebfd146a
IR
2677 prev_stmt_info = NULL;
2678 switch (modifier)
2679 {
2680 case NONE:
2681 for (j = 0; j < ncopies; j++)
2682 {
ebfd146a 2683 if (j == 0)
d092494c
IR
2684 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2685 -1);
ebfd146a
IR
2686 else
2687 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2688
9771b263 2689 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2690 {
2691 /* Arguments are ready, create the new vector stmt. */
2692 if (code1 == CALL_EXPR)
2693 {
2694 new_stmt = gimple_build_call (decl1, 1, vop0);
2695 new_temp = make_ssa_name (vec_dest, new_stmt);
2696 gimple_call_set_lhs (new_stmt, new_temp);
2697 }
2698 else
2699 {
2700 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2701 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2702 vop0, NULL);
2703 new_temp = make_ssa_name (vec_dest, new_stmt);
2704 gimple_assign_set_lhs (new_stmt, new_temp);
2705 }
2706
2707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2708 if (slp_node)
9771b263 2709 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2710 }
2711
ebfd146a
IR
2712 if (j == 0)
2713 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2714 else
2715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2716 prev_stmt_info = vinfo_for_stmt (new_stmt);
2717 }
2718 break;
2719
2720 case WIDEN:
2721 /* In case the vectorization factor (VF) is bigger than the number
2722 of elements that we can fit in a vectype (nunits), we have to
2723 generate more than one vector stmt - i.e - we need to "unroll"
2724 the vector stmt by a factor VF/nunits. */
2725 for (j = 0; j < ncopies; j++)
2726 {
4a00c761 2727 /* Handle uses. */
ebfd146a 2728 if (j == 0)
4a00c761
JJ
2729 {
2730 if (slp_node)
2731 {
2732 if (code == WIDEN_LSHIFT_EXPR)
2733 {
2734 unsigned int k;
ebfd146a 2735
4a00c761
JJ
2736 vec_oprnd1 = op1;
2737 /* Store vec_oprnd1 for every vector stmt to be created
2738 for SLP_NODE. We check during the analysis that all
2739 the shift arguments are the same. */
2740 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2741 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2742
2743 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2744 slp_node, -1);
2745 }
2746 else
2747 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2748 &vec_oprnds1, slp_node, -1);
2749 }
2750 else
2751 {
2752 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2753 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2754 if (op_type == binary_op)
2755 {
2756 if (code == WIDEN_LSHIFT_EXPR)
2757 vec_oprnd1 = op1;
2758 else
2759 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2760 NULL);
9771b263 2761 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2762 }
2763 }
2764 }
ebfd146a 2765 else
4a00c761
JJ
2766 {
2767 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2768 vec_oprnds0.truncate (0);
2769 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2770 if (op_type == binary_op)
2771 {
2772 if (code == WIDEN_LSHIFT_EXPR)
2773 vec_oprnd1 = op1;
2774 else
2775 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2776 vec_oprnd1);
9771b263
DN
2777 vec_oprnds1.truncate (0);
2778 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2779 }
2780 }
ebfd146a 2781
4a00c761
JJ
2782 /* Arguments are ready. Create the new vector stmts. */
2783 for (i = multi_step_cvt; i >= 0; i--)
2784 {
9771b263 2785 tree this_dest = vec_dsts[i];
4a00c761
JJ
2786 enum tree_code c1 = code1, c2 = code2;
2787 if (i == 0 && codecvt2 != ERROR_MARK)
2788 {
2789 c1 = codecvt1;
2790 c2 = codecvt2;
2791 }
2792 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2793 &vec_oprnds1,
2794 stmt, this_dest, gsi,
2795 c1, c2, decl1, decl2,
2796 op_type);
2797 }
2798
9771b263 2799 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2800 {
2801 if (cvt_type)
2802 {
2803 if (codecvt1 == CALL_EXPR)
2804 {
2805 new_stmt = gimple_build_call (decl1, 1, vop0);
2806 new_temp = make_ssa_name (vec_dest, new_stmt);
2807 gimple_call_set_lhs (new_stmt, new_temp);
2808 }
2809 else
2810 {
2811 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2812 new_temp = make_ssa_name (vec_dest, NULL);
2813 new_stmt = gimple_build_assign_with_ops (codecvt1,
2814 new_temp,
2815 vop0, NULL);
2816 }
2817
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2819 }
2820 else
2821 new_stmt = SSA_NAME_DEF_STMT (vop0);
2822
2823 if (slp_node)
9771b263 2824 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2825 else
2826 {
2827 if (!prev_stmt_info)
2828 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2829 else
2830 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2831 prev_stmt_info = vinfo_for_stmt (new_stmt);
2832 }
2833 }
ebfd146a 2834 }
4a00c761
JJ
2835
2836 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2837 break;
2838
2839 case NARROW:
2840 /* In case the vectorization factor (VF) is bigger than the number
2841 of elements that we can fit in a vectype (nunits), we have to
2842 generate more than one vector stmt - i.e - we need to "unroll"
2843 the vector stmt by a factor VF/nunits. */
2844 for (j = 0; j < ncopies; j++)
2845 {
2846 /* Handle uses. */
4a00c761
JJ
2847 if (slp_node)
2848 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2849 slp_node, -1);
ebfd146a
IR
2850 else
2851 {
9771b263 2852 vec_oprnds0.truncate (0);
4a00c761
JJ
2853 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2854 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2855 }
2856
4a00c761
JJ
2857 /* Arguments are ready. Create the new vector stmts. */
2858 if (cvt_type)
9771b263 2859 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2860 {
2861 if (codecvt1 == CALL_EXPR)
2862 {
2863 new_stmt = gimple_build_call (decl1, 1, vop0);
2864 new_temp = make_ssa_name (vec_dest, new_stmt);
2865 gimple_call_set_lhs (new_stmt, new_temp);
2866 }
2867 else
2868 {
2869 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2870 new_temp = make_ssa_name (vec_dest, NULL);
2871 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2872 vop0, NULL);
2873 }
ebfd146a 2874
4a00c761 2875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2876 vec_oprnds0[i] = new_temp;
4a00c761 2877 }
ebfd146a 2878
4a00c761
JJ
2879 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2880 stmt, vec_dsts, gsi,
2881 slp_node, code1,
2882 &prev_stmt_info);
ebfd146a
IR
2883 }
2884
2885 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2886 break;
ebfd146a
IR
2887 }
2888
9771b263
DN
2889 vec_oprnds0.release ();
2890 vec_oprnds1.release ();
2891 vec_dsts.release ();
2892 interm_types.release ();
ebfd146a
IR
2893
2894 return true;
2895}
ff802fa1
IR
2896
2897
ebfd146a
IR
2898/* Function vectorizable_assignment.
2899
b8698a0f
L
2900 Check if STMT performs an assignment (copy) that can be vectorized.
2901 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2902 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2903 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2904
2905static bool
2906vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2907 gimple *vec_stmt, slp_tree slp_node)
2908{
2909 tree vec_dest;
2910 tree scalar_dest;
2911 tree op;
2912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2913 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2914 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2915 tree new_temp;
2916 tree def;
2917 gimple def_stmt;
2918 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2919 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2920 int ncopies;
f18b55bd 2921 int i, j;
6e1aa848 2922 vec<tree> vec_oprnds = vNULL;
ebfd146a 2923 tree vop;
a70d6342 2924 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2925 gimple new_stmt = NULL;
2926 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2927 enum tree_code code;
2928 tree vectype_in;
ebfd146a
IR
2929
2930 /* Multiple types in SLP are handled by creating the appropriate number of
2931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2932 case of SLP. */
437f4a00 2933 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2934 ncopies = 1;
2935 else
2936 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2937
2938 gcc_assert (ncopies >= 1);
ebfd146a 2939
a70d6342 2940 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2941 return false;
2942
8644a673 2943 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2944 return false;
2945
2946 /* Is vectorizable assignment? */
2947 if (!is_gimple_assign (stmt))
2948 return false;
2949
2950 scalar_dest = gimple_assign_lhs (stmt);
2951 if (TREE_CODE (scalar_dest) != SSA_NAME)
2952 return false;
2953
fde9c428 2954 code = gimple_assign_rhs_code (stmt);
ebfd146a 2955 if (gimple_assign_single_p (stmt)
fde9c428
RG
2956 || code == PAREN_EXPR
2957 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2958 op = gimple_assign_rhs1 (stmt);
2959 else
2960 return false;
2961
7b7ec6c5
RG
2962 if (code == VIEW_CONVERT_EXPR)
2963 op = TREE_OPERAND (op, 0);
2964
24ee1384 2965 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2966 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2967 {
73fbfcad 2968 if (dump_enabled_p ())
78c60e3d
SS
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "use not simple.");
ebfd146a
IR
2971 return false;
2972 }
2973
fde9c428
RG
2974 /* We can handle NOP_EXPR conversions that do not change the number
2975 of elements or the vector size. */
7b7ec6c5
RG
2976 if ((CONVERT_EXPR_CODE_P (code)
2977 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2978 && (!vectype_in
2979 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2980 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2981 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2982 return false;
2983
7b7b1813
RG
2984 /* We do not handle bit-precision changes. */
2985 if ((CONVERT_EXPR_CODE_P (code)
2986 || code == VIEW_CONVERT_EXPR)
2987 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2988 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2989 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2990 || ((TYPE_PRECISION (TREE_TYPE (op))
2991 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2992 /* But a conversion that does not change the bit-pattern is ok. */
2993 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2994 > TYPE_PRECISION (TREE_TYPE (op)))
2995 && TYPE_UNSIGNED (TREE_TYPE (op))))
2996 {
73fbfcad 2997 if (dump_enabled_p ())
78c60e3d
SS
2998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2999 "type conversion to/from bit-precision "
3000 "unsupported.");
7b7b1813
RG
3001 return false;
3002 }
3003
ebfd146a
IR
3004 if (!vec_stmt) /* transformation not required. */
3005 {
3006 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3007 if (dump_enabled_p ())
78c60e3d
SS
3008 dump_printf_loc (MSG_NOTE, vect_location,
3009 "=== vectorizable_assignment ===");
c3e7ee41 3010 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3011 return true;
3012 }
3013
3014 /** Transform. **/
73fbfcad 3015 if (dump_enabled_p ())
78c60e3d 3016 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
ebfd146a
IR
3017
3018 /* Handle def. */
3019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3020
3021 /* Handle use. */
f18b55bd 3022 for (j = 0; j < ncopies; j++)
ebfd146a 3023 {
f18b55bd
IR
3024 /* Handle uses. */
3025 if (j == 0)
d092494c 3026 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3027 else
3028 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3029
3030 /* Arguments are ready. create the new vector stmt. */
9771b263 3031 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3032 {
7b7ec6c5
RG
3033 if (CONVERT_EXPR_CODE_P (code)
3034 || code == VIEW_CONVERT_EXPR)
4a73490d 3035 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3036 new_stmt = gimple_build_assign (vec_dest, vop);
3037 new_temp = make_ssa_name (vec_dest, new_stmt);
3038 gimple_assign_set_lhs (new_stmt, new_temp);
3039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3040 if (slp_node)
9771b263 3041 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3042 }
ebfd146a
IR
3043
3044 if (slp_node)
f18b55bd
IR
3045 continue;
3046
3047 if (j == 0)
3048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3049 else
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3051
3052 prev_stmt_info = vinfo_for_stmt (new_stmt);
3053 }
b8698a0f 3054
9771b263 3055 vec_oprnds.release ();
ebfd146a
IR
3056 return true;
3057}
3058
9dc3f7de 3059
1107f3ae
IR
3060/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3061 either as shift by a scalar or by a vector. */
3062
3063bool
3064vect_supportable_shift (enum tree_code code, tree scalar_type)
3065{
3066
3067 enum machine_mode vec_mode;
3068 optab optab;
3069 int icode;
3070 tree vectype;
3071
3072 vectype = get_vectype_for_scalar_type (scalar_type);
3073 if (!vectype)
3074 return false;
3075
3076 optab = optab_for_tree_code (code, vectype, optab_scalar);
3077 if (!optab
3078 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3079 {
3080 optab = optab_for_tree_code (code, vectype, optab_vector);
3081 if (!optab
3082 || (optab_handler (optab, TYPE_MODE (vectype))
3083 == CODE_FOR_nothing))
3084 return false;
3085 }
3086
3087 vec_mode = TYPE_MODE (vectype);
3088 icode = (int) optab_handler (optab, vec_mode);
3089 if (icode == CODE_FOR_nothing)
3090 return false;
3091
3092 return true;
3093}
3094
3095
9dc3f7de
IR
3096/* Function vectorizable_shift.
3097
3098 Check if STMT performs a shift operation that can be vectorized.
3099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3102
3103static bool
3104vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3105 gimple *vec_stmt, slp_tree slp_node)
3106{
3107 tree vec_dest;
3108 tree scalar_dest;
3109 tree op0, op1 = NULL;
3110 tree vec_oprnd1 = NULL_TREE;
3111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3112 tree vectype;
3113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3114 enum tree_code code;
3115 enum machine_mode vec_mode;
3116 tree new_temp;
3117 optab optab;
3118 int icode;
3119 enum machine_mode optab_op2_mode;
3120 tree def;
3121 gimple def_stmt;
3122 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3123 gimple new_stmt = NULL;
3124 stmt_vec_info prev_stmt_info;
3125 int nunits_in;
3126 int nunits_out;
3127 tree vectype_out;
cede2577 3128 tree op1_vectype;
9dc3f7de
IR
3129 int ncopies;
3130 int j, i;
6e1aa848
DN
3131 vec<tree> vec_oprnds0 = vNULL;
3132 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3133 tree vop0, vop1;
3134 unsigned int k;
49eab32e 3135 bool scalar_shift_arg = true;
9dc3f7de
IR
3136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3137 int vf;
3138
3139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3140 return false;
3141
3142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3143 return false;
3144
3145 /* Is STMT a vectorizable binary/unary operation? */
3146 if (!is_gimple_assign (stmt))
3147 return false;
3148
3149 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3150 return false;
3151
3152 code = gimple_assign_rhs_code (stmt);
3153
3154 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3155 || code == RROTATE_EXPR))
3156 return false;
3157
3158 scalar_dest = gimple_assign_lhs (stmt);
3159 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3160 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3161 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3162 {
73fbfcad 3163 if (dump_enabled_p ())
78c60e3d
SS
3164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3165 "bit-precision shifts not supported.");
7b7b1813
RG
3166 return false;
3167 }
9dc3f7de
IR
3168
3169 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3170 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3171 &def_stmt, &def, &dt[0], &vectype))
3172 {
73fbfcad 3173 if (dump_enabled_p ())
78c60e3d
SS
3174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3175 "use not simple.");
9dc3f7de
IR
3176 return false;
3177 }
3178 /* If op0 is an external or constant def use a vector type with
3179 the same size as the output vector type. */
3180 if (!vectype)
3181 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3182 if (vec_stmt)
3183 gcc_assert (vectype);
3184 if (!vectype)
3185 {
73fbfcad 3186 if (dump_enabled_p ())
78c60e3d
SS
3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3188 "no vectype for scalar type ");
9dc3f7de
IR
3189 return false;
3190 }
3191
3192 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3193 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3194 if (nunits_out != nunits_in)
3195 return false;
3196
3197 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3198 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3199 &def, &dt[1], &op1_vectype))
9dc3f7de 3200 {
73fbfcad 3201 if (dump_enabled_p ())
78c60e3d
SS
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3203 "use not simple.");
9dc3f7de
IR
3204 return false;
3205 }
3206
3207 if (loop_vinfo)
3208 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3209 else
3210 vf = 1;
3211
3212 /* Multiple types in SLP are handled by creating the appropriate number of
3213 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3214 case of SLP. */
437f4a00 3215 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3216 ncopies = 1;
3217 else
3218 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3219
3220 gcc_assert (ncopies >= 1);
3221
3222 /* Determine whether the shift amount is a vector, or scalar. If the
3223 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3224
49eab32e
JJ
3225 if (dt[1] == vect_internal_def && !slp_node)
3226 scalar_shift_arg = false;
3227 else if (dt[1] == vect_constant_def
3228 || dt[1] == vect_external_def
3229 || dt[1] == vect_internal_def)
3230 {
3231 /* In SLP, need to check whether the shift count is the same,
3232 in loops if it is a constant or invariant, it is always
3233 a scalar shift. */
3234 if (slp_node)
3235 {
9771b263 3236 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3237 gimple slpstmt;
3238
9771b263 3239 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3240 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3241 scalar_shift_arg = false;
3242 }
3243 }
3244 else
3245 {
73fbfcad 3246 if (dump_enabled_p ())
78c60e3d
SS
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3248 "operand mode requires invariant argument.");
49eab32e
JJ
3249 return false;
3250 }
3251
9dc3f7de 3252 /* Vector shifted by vector. */
49eab32e 3253 if (!scalar_shift_arg)
9dc3f7de
IR
3254 {
3255 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3256 if (dump_enabled_p ())
78c60e3d
SS
3257 dump_printf_loc (MSG_NOTE, vect_location,
3258 "vector/vector shift/rotate found.");
3259
aa948027
JJ
3260 if (!op1_vectype)
3261 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3262 if (op1_vectype == NULL_TREE
3263 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3264 {
73fbfcad 3265 if (dump_enabled_p ())
78c60e3d
SS
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "unusable type for last operand in"
3268 " vector/vector shift/rotate.");
cede2577
JJ
3269 return false;
3270 }
9dc3f7de
IR
3271 }
3272 /* See if the machine has a vector shifted by scalar insn and if not
3273 then see if it has a vector shifted by vector insn. */
49eab32e 3274 else
9dc3f7de
IR
3275 {
3276 optab = optab_for_tree_code (code, vectype, optab_scalar);
3277 if (optab
3278 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3279 {
73fbfcad 3280 if (dump_enabled_p ())
78c60e3d
SS
3281 dump_printf_loc (MSG_NOTE, vect_location,
3282 "vector/scalar shift/rotate found.");
9dc3f7de
IR
3283 }
3284 else
3285 {
3286 optab = optab_for_tree_code (code, vectype, optab_vector);
3287 if (optab
3288 && (optab_handler (optab, TYPE_MODE (vectype))
3289 != CODE_FOR_nothing))
3290 {
49eab32e
JJ
3291 scalar_shift_arg = false;
3292
73fbfcad 3293 if (dump_enabled_p ())
78c60e3d
SS
3294 dump_printf_loc (MSG_NOTE, vect_location,
3295 "vector/vector shift/rotate found.");
9dc3f7de
IR
3296
3297 /* Unlike the other binary operators, shifts/rotates have
3298 the rhs being int, instead of the same type as the lhs,
3299 so make sure the scalar is the right type if we are
aa948027 3300 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3301 if (dt[1] == vect_constant_def)
3302 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3303 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3304 TREE_TYPE (op1)))
3305 {
3306 if (slp_node
3307 && TYPE_MODE (TREE_TYPE (vectype))
3308 != TYPE_MODE (TREE_TYPE (op1)))
3309 {
73fbfcad 3310 if (dump_enabled_p ())
78c60e3d
SS
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "unusable type for last operand in"
3313 " vector/vector shift/rotate.");
aa948027
JJ
3314 return false;
3315 }
3316 if (vec_stmt && !slp_node)
3317 {
3318 op1 = fold_convert (TREE_TYPE (vectype), op1);
3319 op1 = vect_init_vector (stmt, op1,
3320 TREE_TYPE (vectype), NULL);
3321 }
3322 }
9dc3f7de
IR
3323 }
3324 }
3325 }
9dc3f7de
IR
3326
3327 /* Supportable by target? */
3328 if (!optab)
3329 {
73fbfcad 3330 if (dump_enabled_p ())
78c60e3d
SS
3331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3332 "no optab.");
9dc3f7de
IR
3333 return false;
3334 }
3335 vec_mode = TYPE_MODE (vectype);
3336 icode = (int) optab_handler (optab, vec_mode);
3337 if (icode == CODE_FOR_nothing)
3338 {
73fbfcad 3339 if (dump_enabled_p ())
78c60e3d
SS
3340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3341 "op not supported by target.");
9dc3f7de
IR
3342 /* Check only during analysis. */
3343 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3344 || (vf < vect_min_worthwhile_factor (code)
3345 && !vec_stmt))
3346 return false;
73fbfcad 3347 if (dump_enabled_p ())
78c60e3d 3348 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
9dc3f7de
IR
3349 }
3350
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3353 && vf < vect_min_worthwhile_factor (code)
3354 && !vec_stmt)
3355 {
73fbfcad 3356 if (dump_enabled_p ())
78c60e3d
SS
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "not worthwhile without SIMD support.");
9dc3f7de
IR
3359 return false;
3360 }
3361
3362 if (!vec_stmt) /* transformation not required. */
3363 {
3364 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3365 if (dump_enabled_p ())
78c60e3d 3366 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
c3e7ee41 3367 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3368 return true;
3369 }
3370
3371 /** Transform. **/
3372
73fbfcad 3373 if (dump_enabled_p ())
78c60e3d
SS
3374 dump_printf_loc (MSG_NOTE, vect_location,
3375 "transform binary/unary operation.");
9dc3f7de
IR
3376
3377 /* Handle def. */
3378 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3379
9dc3f7de
IR
3380 prev_stmt_info = NULL;
3381 for (j = 0; j < ncopies; j++)
3382 {
3383 /* Handle uses. */
3384 if (j == 0)
3385 {
3386 if (scalar_shift_arg)
3387 {
3388 /* Vector shl and shr insn patterns can be defined with scalar
3389 operand 2 (shift operand). In this case, use constant or loop
3390 invariant op1 directly, without extending it to vector mode
3391 first. */
3392 optab_op2_mode = insn_data[icode].operand[2].mode;
3393 if (!VECTOR_MODE_P (optab_op2_mode))
3394 {
73fbfcad 3395 if (dump_enabled_p ())
78c60e3d
SS
3396 dump_printf_loc (MSG_NOTE, vect_location,
3397 "operand 1 using scalar mode.");
9dc3f7de 3398 vec_oprnd1 = op1;
8930f723 3399 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3400 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3401 if (slp_node)
3402 {
3403 /* Store vec_oprnd1 for every vector stmt to be created
3404 for SLP_NODE. We check during the analysis that all
3405 the shift arguments are the same.
3406 TODO: Allow different constants for different vector
3407 stmts generated for an SLP instance. */
3408 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3409 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3410 }
3411 }
3412 }
3413
3414 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3415 (a special case for certain kind of vector shifts); otherwise,
3416 operand 1 should be of a vector type (the usual case). */
3417 if (vec_oprnd1)
3418 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3419 slp_node, -1);
9dc3f7de
IR
3420 else
3421 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3422 slp_node, -1);
9dc3f7de
IR
3423 }
3424 else
3425 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3426
3427 /* Arguments are ready. Create the new vector stmt. */
9771b263 3428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3429 {
9771b263 3430 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3431 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3432 new_temp = make_ssa_name (vec_dest, new_stmt);
3433 gimple_assign_set_lhs (new_stmt, new_temp);
3434 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3435 if (slp_node)
9771b263 3436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3437 }
3438
3439 if (slp_node)
3440 continue;
3441
3442 if (j == 0)
3443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3444 else
3445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3446 prev_stmt_info = vinfo_for_stmt (new_stmt);
3447 }
3448
9771b263
DN
3449 vec_oprnds0.release ();
3450 vec_oprnds1.release ();
9dc3f7de
IR
3451
3452 return true;
3453}
3454
3455
5deb57cb
JJ
3456static tree permute_vec_elements (tree, tree, tree, gimple,
3457 gimple_stmt_iterator *);
3458
3459
ebfd146a
IR
3460/* Function vectorizable_operation.
3461
16949072
RG
3462 Check if STMT performs a binary, unary or ternary operation that can
3463 be vectorized.
b8698a0f 3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3465 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3467
3468static bool
3469vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3470 gimple *vec_stmt, slp_tree slp_node)
3471{
00f07b86 3472 tree vec_dest;
ebfd146a 3473 tree scalar_dest;
16949072 3474 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3476 tree vectype;
ebfd146a
IR
3477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3478 enum tree_code code;
3479 enum machine_mode vec_mode;
3480 tree new_temp;
3481 int op_type;
00f07b86 3482 optab optab;
ebfd146a 3483 int icode;
ebfd146a
IR
3484 tree def;
3485 gimple def_stmt;
16949072
RG
3486 enum vect_def_type dt[3]
3487 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3488 gimple new_stmt = NULL;
3489 stmt_vec_info prev_stmt_info;
b690cc0f 3490 int nunits_in;
ebfd146a
IR
3491 int nunits_out;
3492 tree vectype_out;
3493 int ncopies;
3494 int j, i;
6e1aa848
DN
3495 vec<tree> vec_oprnds0 = vNULL;
3496 vec<tree> vec_oprnds1 = vNULL;
3497 vec<tree> vec_oprnds2 = vNULL;
16949072 3498 tree vop0, vop1, vop2;
a70d6342
IR
3499 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3500 int vf;
3501
a70d6342 3502 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3503 return false;
3504
8644a673 3505 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3506 return false;
3507
3508 /* Is STMT a vectorizable binary/unary operation? */
3509 if (!is_gimple_assign (stmt))
3510 return false;
3511
3512 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3513 return false;
3514
ebfd146a
IR
3515 code = gimple_assign_rhs_code (stmt);
3516
3517 /* For pointer addition, we should use the normal plus for
3518 the vector addition. */
3519 if (code == POINTER_PLUS_EXPR)
3520 code = PLUS_EXPR;
3521
3522 /* Support only unary or binary operations. */
3523 op_type = TREE_CODE_LENGTH (code);
16949072 3524 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3525 {
73fbfcad 3526 if (dump_enabled_p ())
78c60e3d
SS
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3528 "num. args = %d (not unary/binary/ternary op).",
3529 op_type);
ebfd146a
IR
3530 return false;
3531 }
3532
b690cc0f
RG
3533 scalar_dest = gimple_assign_lhs (stmt);
3534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3535
7b7b1813
RG
3536 /* Most operations cannot handle bit-precision types without extra
3537 truncations. */
3538 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3539 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3540 /* Exception are bitwise binary operations. */
3541 && code != BIT_IOR_EXPR
3542 && code != BIT_XOR_EXPR
3543 && code != BIT_AND_EXPR)
3544 {
73fbfcad 3545 if (dump_enabled_p ())
78c60e3d
SS
3546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3547 "bit-precision arithmetic not supported.");
7b7b1813
RG
3548 return false;
3549 }
3550
ebfd146a 3551 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3552 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3553 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3554 {
73fbfcad 3555 if (dump_enabled_p ())
78c60e3d
SS
3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3557 "use not simple.");
ebfd146a
IR
3558 return false;
3559 }
b690cc0f
RG
3560 /* If op0 is an external or constant def use a vector type with
3561 the same size as the output vector type. */
3562 if (!vectype)
3563 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3564 if (vec_stmt)
3565 gcc_assert (vectype);
3566 if (!vectype)
3567 {
73fbfcad 3568 if (dump_enabled_p ())
7d8930a0 3569 {
78c60e3d
SS
3570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3571 "no vectype for scalar type ");
3572 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3573 TREE_TYPE (op0));
7d8930a0
IR
3574 }
3575
3576 return false;
3577 }
b690cc0f
RG
3578
3579 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3580 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3581 if (nunits_out != nunits_in)
3582 return false;
ebfd146a 3583
16949072 3584 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3585 {
3586 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3587 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3588 &def, &dt[1]))
ebfd146a 3589 {
73fbfcad 3590 if (dump_enabled_p ())
78c60e3d
SS
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3592 "use not simple.");
ebfd146a
IR
3593 return false;
3594 }
3595 }
16949072
RG
3596 if (op_type == ternary_op)
3597 {
3598 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3599 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3600 &def, &dt[2]))
16949072 3601 {
73fbfcad 3602 if (dump_enabled_p ())
78c60e3d
SS
3603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3604 "use not simple.");
16949072
RG
3605 return false;
3606 }
3607 }
ebfd146a 3608
b690cc0f
RG
3609 if (loop_vinfo)
3610 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3611 else
3612 vf = 1;
3613
3614 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3615 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3616 case of SLP. */
437f4a00 3617 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3618 ncopies = 1;
3619 else
3620 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3621
3622 gcc_assert (ncopies >= 1);
3623
9dc3f7de 3624 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3625 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3626 || code == RROTATE_EXPR)
9dc3f7de 3627 return false;
ebfd146a 3628
ebfd146a 3629 /* Supportable by target? */
00f07b86
RH
3630
3631 vec_mode = TYPE_MODE (vectype);
3632 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3633 {
00f07b86 3634 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3635 icode = LAST_INSN_CODE;
00f07b86
RH
3636 else
3637 icode = CODE_FOR_nothing;
ebfd146a 3638 }
00f07b86
RH
3639 else
3640 {
3641 optab = optab_for_tree_code (code, vectype, optab_default);
3642 if (!optab)
5deb57cb 3643 {
73fbfcad 3644 if (dump_enabled_p ())
78c60e3d
SS
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3646 "no optab.");
00f07b86 3647 return false;
5deb57cb 3648 }
00f07b86 3649 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3650 }
3651
ebfd146a
IR
3652 if (icode == CODE_FOR_nothing)
3653 {
73fbfcad 3654 if (dump_enabled_p ())
78c60e3d
SS
3655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3656 "op not supported by target.");
ebfd146a
IR
3657 /* Check only during analysis. */
3658 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3659 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3660 return false;
73fbfcad 3661 if (dump_enabled_p ())
78c60e3d 3662 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
383d9c83
IR
3663 }
3664
4a00c761 3665 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3666 if (!VECTOR_MODE_P (vec_mode)
3667 && !vec_stmt
3668 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3669 {
73fbfcad 3670 if (dump_enabled_p ())
78c60e3d
SS
3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3672 "not worthwhile without SIMD support.");
e34842c6 3673 return false;
7d8930a0 3674 }
ebfd146a 3675
ebfd146a
IR
3676 if (!vec_stmt) /* transformation not required. */
3677 {
4a00c761 3678 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3679 if (dump_enabled_p ())
78c60e3d
SS
3680 dump_printf_loc (MSG_NOTE, vect_location,
3681 "=== vectorizable_operation ===");
c3e7ee41 3682 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3683 return true;
3684 }
3685
3686 /** Transform. **/
3687
73fbfcad 3688 if (dump_enabled_p ())
78c60e3d
SS
3689 dump_printf_loc (MSG_NOTE, vect_location,
3690 "transform binary/unary operation.");
383d9c83 3691
ebfd146a 3692 /* Handle def. */
00f07b86 3693 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3694
ebfd146a
IR
3695 /* In case the vectorization factor (VF) is bigger than the number
3696 of elements that we can fit in a vectype (nunits), we have to generate
3697 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3698 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3699 from one copy of the vector stmt to the next, in the field
3700 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3701 stages to find the correct vector defs to be used when vectorizing
3702 stmts that use the defs of the current stmt. The example below
3703 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3704 we need to create 4 vectorized stmts):
3705
3706 before vectorization:
3707 RELATED_STMT VEC_STMT
3708 S1: x = memref - -
3709 S2: z = x + 1 - -
3710
3711 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3712 there):
3713 RELATED_STMT VEC_STMT
3714 VS1_0: vx0 = memref0 VS1_1 -
3715 VS1_1: vx1 = memref1 VS1_2 -
3716 VS1_2: vx2 = memref2 VS1_3 -
3717 VS1_3: vx3 = memref3 - -
3718 S1: x = load - VS1_0
3719 S2: z = x + 1 - -
3720
3721 step2: vectorize stmt S2 (done here):
3722 To vectorize stmt S2 we first need to find the relevant vector
3723 def for the first operand 'x'. This is, as usual, obtained from
3724 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3725 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3726 relevant vector def 'vx0'. Having found 'vx0' we can generate
3727 the vector stmt VS2_0, and as usual, record it in the
3728 STMT_VINFO_VEC_STMT of stmt S2.
3729 When creating the second copy (VS2_1), we obtain the relevant vector
3730 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3731 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3732 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3733 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3734 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3735 chain of stmts and pointers:
3736 RELATED_STMT VEC_STMT
3737 VS1_0: vx0 = memref0 VS1_1 -
3738 VS1_1: vx1 = memref1 VS1_2 -
3739 VS1_2: vx2 = memref2 VS1_3 -
3740 VS1_3: vx3 = memref3 - -
3741 S1: x = load - VS1_0
3742 VS2_0: vz0 = vx0 + v1 VS2_1 -
3743 VS2_1: vz1 = vx1 + v1 VS2_2 -
3744 VS2_2: vz2 = vx2 + v1 VS2_3 -
3745 VS2_3: vz3 = vx3 + v1 - -
3746 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3747
3748 prev_stmt_info = NULL;
3749 for (j = 0; j < ncopies; j++)
3750 {
3751 /* Handle uses. */
3752 if (j == 0)
4a00c761
JJ
3753 {
3754 if (op_type == binary_op || op_type == ternary_op)
3755 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3756 slp_node, -1);
3757 else
3758 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3759 slp_node, -1);
3760 if (op_type == ternary_op)
36ba4aae 3761 {
9771b263
DN
3762 vec_oprnds2.create (1);
3763 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3764 stmt,
3765 NULL));
36ba4aae 3766 }
4a00c761 3767 }
ebfd146a 3768 else
4a00c761
JJ
3769 {
3770 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3771 if (op_type == ternary_op)
3772 {
9771b263
DN
3773 tree vec_oprnd = vec_oprnds2.pop ();
3774 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3775 vec_oprnd));
4a00c761
JJ
3776 }
3777 }
3778
3779 /* Arguments are ready. Create the new vector stmt. */
9771b263 3780 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3781 {
4a00c761 3782 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3783 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3784 vop2 = ((op_type == ternary_op)
9771b263 3785 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3786 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3787 vop0, vop1, vop2);
4a00c761
JJ
3788 new_temp = make_ssa_name (vec_dest, new_stmt);
3789 gimple_assign_set_lhs (new_stmt, new_temp);
3790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3791 if (slp_node)
9771b263 3792 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3793 }
3794
4a00c761
JJ
3795 if (slp_node)
3796 continue;
3797
3798 if (j == 0)
3799 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3800 else
3801 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3802 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3803 }
3804
9771b263
DN
3805 vec_oprnds0.release ();
3806 vec_oprnds1.release ();
3807 vec_oprnds2.release ();
ebfd146a 3808
ebfd146a
IR
3809 return true;
3810}
3811
c716e67f
XDL
3812/* A helper function to ensure data reference DR's base alignment
3813 for STMT_INFO. */
3814
3815static void
3816ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3817{
3818 if (!dr->aux)
3819 return;
3820
3821 if (((dataref_aux *)dr->aux)->base_misaligned)
3822 {
3823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3824 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3825
3826 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3827 DECL_USER_ALIGN (base_decl) = 1;
3828 ((dataref_aux *)dr->aux)->base_misaligned = false;
3829 }
3830}
3831
ebfd146a
IR
3832
3833/* Function vectorizable_store.
3834
b8698a0f
L
3835 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3836 can be vectorized.
3837 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3838 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3839 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3840
3841static bool
3842vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3843 slp_tree slp_node)
ebfd146a
IR
3844{
3845 tree scalar_dest;
3846 tree data_ref;
3847 tree op;
3848 tree vec_oprnd = NULL_TREE;
3849 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3850 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3851 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3852 tree elem_type;
ebfd146a 3853 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3854 struct loop *loop = NULL;
ebfd146a
IR
3855 enum machine_mode vec_mode;
3856 tree dummy;
3857 enum dr_alignment_support alignment_support_scheme;
3858 tree def;
3859 gimple def_stmt;
3860 enum vect_def_type dt;
3861 stmt_vec_info prev_stmt_info = NULL;
3862 tree dataref_ptr = NULL_TREE;
74bf76ed 3863 tree dataref_offset = NULL_TREE;
fef4d2b3 3864 gimple ptr_incr = NULL;
ebfd146a
IR
3865 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3866 int ncopies;
3867 int j;
3868 gimple next_stmt, first_stmt = NULL;
0d0293ac 3869 bool grouped_store = false;
272c6793 3870 bool store_lanes_p = false;
ebfd146a 3871 unsigned int group_size, i;
6e1aa848
DN
3872 vec<tree> dr_chain = vNULL;
3873 vec<tree> oprnds = vNULL;
3874 vec<tree> result_chain = vNULL;
ebfd146a 3875 bool inv_p;
6e1aa848 3876 vec<tree> vec_oprnds = vNULL;
ebfd146a 3877 bool slp = (slp_node != NULL);
ebfd146a 3878 unsigned int vec_num;
a70d6342 3879 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3880 tree aggr_type;
a70d6342
IR
3881
3882 if (loop_vinfo)
3883 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3884
3885 /* Multiple types in SLP are handled by creating the appropriate number of
3886 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3887 case of SLP. */
437f4a00 3888 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3889 ncopies = 1;
3890 else
3891 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3892
3893 gcc_assert (ncopies >= 1);
3894
3895 /* FORNOW. This restriction should be relaxed. */
a70d6342 3896 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3897 {
73fbfcad 3898 if (dump_enabled_p ())
78c60e3d
SS
3899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3900 "multiple types in nested loop.");
ebfd146a
IR
3901 return false;
3902 }
3903
a70d6342 3904 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3905 return false;
3906
8644a673 3907 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3908 return false;
3909
3910 /* Is vectorizable store? */
3911
3912 if (!is_gimple_assign (stmt))
3913 return false;
3914
3915 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3916 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3917 && is_pattern_stmt_p (stmt_info))
3918 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3919 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3920 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3921 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3922 && TREE_CODE (scalar_dest) != COMPONENT_REF
3923 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3924 && TREE_CODE (scalar_dest) != REALPART_EXPR
3925 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3926 return false;
3927
3928 gcc_assert (gimple_assign_single_p (stmt));
3929 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3930 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3931 &def, &dt))
ebfd146a 3932 {
73fbfcad 3933 if (dump_enabled_p ())
78c60e3d
SS
3934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3935 "use not simple.");
ebfd146a
IR
3936 return false;
3937 }
3938
272c6793 3939 elem_type = TREE_TYPE (vectype);
ebfd146a 3940 vec_mode = TYPE_MODE (vectype);
7b7b1813 3941
ebfd146a
IR
3942 /* FORNOW. In some cases can vectorize even if data-type not supported
3943 (e.g. - array initialization with 0). */
947131ba 3944 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3945 return false;
3946
3947 if (!STMT_VINFO_DATA_REF (stmt_info))
3948 return false;
3949
a7ce6ec3
RG
3950 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3951 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3952 size_zero_node) < 0)
a1e53f3f 3953 {
73fbfcad 3954 if (dump_enabled_p ())
78c60e3d
SS
3955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3956 "negative step for store.");
a1e53f3f
L
3957 return false;
3958 }
3959
0d0293ac 3960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3961 {
0d0293ac 3962 grouped_store = true;
e14c1050 3963 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3964 if (!slp && !PURE_SLP_STMT (stmt_info))
3965 {
e14c1050 3966 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3967 if (vect_store_lanes_supported (vectype, group_size))
3968 store_lanes_p = true;
0d0293ac 3969 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3970 return false;
3971 }
b8698a0f 3972
ebfd146a
IR
3973 if (first_stmt == stmt)
3974 {
3975 /* STMT is the leader of the group. Check the operands of all the
3976 stmts of the group. */
e14c1050 3977 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3978 while (next_stmt)
3979 {
3980 gcc_assert (gimple_assign_single_p (next_stmt));
3981 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3982 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3983 &def_stmt, &def, &dt))
ebfd146a 3984 {
73fbfcad 3985 if (dump_enabled_p ())
78c60e3d
SS
3986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3987 "use not simple.");
ebfd146a
IR
3988 return false;
3989 }
e14c1050 3990 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3991 }
3992 }
3993 }
3994
3995 if (!vec_stmt) /* transformation not required. */
3996 {
3997 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
3998 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3999 NULL, NULL, NULL);
ebfd146a
IR
4000 return true;
4001 }
4002
4003 /** Transform. **/
4004
c716e67f
XDL
4005 ensure_base_align (stmt_info, dr);
4006
0d0293ac 4007 if (grouped_store)
ebfd146a
IR
4008 {
4009 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4010 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4011
e14c1050 4012 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4013
4014 /* FORNOW */
a70d6342 4015 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4016
4017 /* We vectorize all the stmts of the interleaving group when we
4018 reach the last stmt in the group. */
e14c1050
IR
4019 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4020 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4021 && !slp)
4022 {
4023 *vec_stmt = NULL;
4024 return true;
4025 }
4026
4027 if (slp)
4b5caab7 4028 {
0d0293ac 4029 grouped_store = false;
4b5caab7
IR
4030 /* VEC_NUM is the number of vect stmts to be created for this
4031 group. */
4032 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4033 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4034 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4035 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4036 }
ebfd146a 4037 else
4b5caab7
IR
4038 /* VEC_NUM is the number of vect stmts to be created for this
4039 group. */
ebfd146a
IR
4040 vec_num = group_size;
4041 }
b8698a0f 4042 else
ebfd146a
IR
4043 {
4044 first_stmt = stmt;
4045 first_dr = dr;
4046 group_size = vec_num = 1;
ebfd146a 4047 }
b8698a0f 4048
73fbfcad 4049 if (dump_enabled_p ())
78c60e3d
SS
4050 dump_printf_loc (MSG_NOTE, vect_location,
4051 "transform store. ncopies = %d", ncopies);
ebfd146a 4052
9771b263
DN
4053 dr_chain.create (group_size);
4054 oprnds.create (group_size);
ebfd146a 4055
720f5239 4056 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4057 gcc_assert (alignment_support_scheme);
272c6793
RS
4058 /* Targets with store-lane instructions must not require explicit
4059 realignment. */
4060 gcc_assert (!store_lanes_p
4061 || alignment_support_scheme == dr_aligned
4062 || alignment_support_scheme == dr_unaligned_supported);
4063
4064 if (store_lanes_p)
4065 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4066 else
4067 aggr_type = vectype;
ebfd146a
IR
4068
4069 /* In case the vectorization factor (VF) is bigger than the number
4070 of elements that we can fit in a vectype (nunits), we have to generate
4071 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4072 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4073 vect_get_vec_def_for_copy_stmt. */
4074
0d0293ac 4075 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4076
4077 S1: &base + 2 = x2
4078 S2: &base = x0
4079 S3: &base + 1 = x1
4080 S4: &base + 3 = x3
4081
4082 We create vectorized stores starting from base address (the access of the
4083 first stmt in the chain (S2 in the above example), when the last store stmt
4084 of the chain (S4) is reached:
4085
4086 VS1: &base = vx2
4087 VS2: &base + vec_size*1 = vx0
4088 VS3: &base + vec_size*2 = vx1
4089 VS4: &base + vec_size*3 = vx3
4090
4091 Then permutation statements are generated:
4092
3fcc1b55
JJ
4093 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4094 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4095 ...
b8698a0f 4096
ebfd146a
IR
4097 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4098 (the order of the data-refs in the output of vect_permute_store_chain
4099 corresponds to the order of scalar stmts in the interleaving chain - see
4100 the documentation of vect_permute_store_chain()).
4101
4102 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4103 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4104 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4105 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4106 */
4107
4108 prev_stmt_info = NULL;
4109 for (j = 0; j < ncopies; j++)
4110 {
4111 gimple new_stmt;
ebfd146a
IR
4112
4113 if (j == 0)
4114 {
4115 if (slp)
4116 {
4117 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4118 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4119 NULL, slp_node, -1);
ebfd146a 4120
9771b263 4121 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4122 }
4123 else
4124 {
b8698a0f
L
4125 /* For interleaved stores we collect vectorized defs for all the
4126 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4127 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4128 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4129
0d0293ac 4130 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4131 OPRNDS are of size 1. */
b8698a0f 4132 next_stmt = first_stmt;
ebfd146a
IR
4133 for (i = 0; i < group_size; i++)
4134 {
b8698a0f
L
4135 /* Since gaps are not supported for interleaved stores,
4136 GROUP_SIZE is the exact number of stmts in the chain.
4137 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4138 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4139 iteration of the loop will be executed. */
4140 gcc_assert (next_stmt
4141 && gimple_assign_single_p (next_stmt));
4142 op = gimple_assign_rhs1 (next_stmt);
4143
b8698a0f 4144 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4145 NULL);
9771b263
DN
4146 dr_chain.quick_push (vec_oprnd);
4147 oprnds.quick_push (vec_oprnd);
e14c1050 4148 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4149 }
4150 }
4151
4152 /* We should have catched mismatched types earlier. */
4153 gcc_assert (useless_type_conversion_p (vectype,
4154 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4155 bool simd_lane_access_p
4156 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4157 if (simd_lane_access_p
4158 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4159 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4160 && integer_zerop (DR_OFFSET (first_dr))
4161 && integer_zerop (DR_INIT (first_dr))
4162 && alias_sets_conflict_p (get_alias_set (aggr_type),
4163 get_alias_set (DR_REF (first_dr))))
4164 {
4165 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4166 dataref_offset = build_int_cst (reference_alias_ptr_type
4167 (DR_REF (first_dr)), 0);
4168 }
4169 else
4170 dataref_ptr
4171 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4172 simd_lane_access_p ? loop : NULL,
4173 NULL_TREE, &dummy, gsi, &ptr_incr,
4174 simd_lane_access_p, &inv_p);
a70d6342 4175 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4176 }
b8698a0f 4177 else
ebfd146a 4178 {
b8698a0f
L
4179 /* For interleaved stores we created vectorized defs for all the
4180 defs stored in OPRNDS in the previous iteration (previous copy).
4181 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4182 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4183 next copy.
0d0293ac 4184 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4185 OPRNDS are of size 1. */
4186 for (i = 0; i < group_size; i++)
4187 {
9771b263 4188 op = oprnds[i];
24ee1384
IR
4189 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4190 &def, &dt);
b8698a0f 4191 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4192 dr_chain[i] = vec_oprnd;
4193 oprnds[i] = vec_oprnd;
ebfd146a 4194 }
74bf76ed
JJ
4195 if (dataref_offset)
4196 dataref_offset
4197 = int_const_binop (PLUS_EXPR, dataref_offset,
4198 TYPE_SIZE_UNIT (aggr_type));
4199 else
4200 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4201 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4202 }
4203
272c6793 4204 if (store_lanes_p)
ebfd146a 4205 {
272c6793 4206 tree vec_array;
267d3070 4207
272c6793
RS
4208 /* Combine all the vectors into an array. */
4209 vec_array = create_vector_array (vectype, vec_num);
4210 for (i = 0; i < vec_num; i++)
c2d7ab2a 4211 {
9771b263 4212 vec_oprnd = dr_chain[i];
272c6793 4213 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4214 }
b8698a0f 4215
272c6793
RS
4216 /* Emit:
4217 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4218 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4219 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4220 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4222 }
4223 else
4224 {
4225 new_stmt = NULL;
0d0293ac 4226 if (grouped_store)
272c6793 4227 {
b6b9227d
JJ
4228 if (j == 0)
4229 result_chain.create (group_size);
272c6793
RS
4230 /* Permute. */
4231 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4232 &result_chain);
4233 }
c2d7ab2a 4234
272c6793
RS
4235 next_stmt = first_stmt;
4236 for (i = 0; i < vec_num; i++)
4237 {
644ffefd 4238 unsigned align, misalign;
272c6793
RS
4239
4240 if (i > 0)
4241 /* Bump the vector pointer. */
4242 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4243 stmt, NULL_TREE);
4244
4245 if (slp)
9771b263 4246 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4247 else if (grouped_store)
4248 /* For grouped stores vectorized defs are interleaved in
272c6793 4249 vect_permute_store_chain(). */
9771b263 4250 vec_oprnd = result_chain[i];
272c6793
RS
4251
4252 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4253 dataref_offset
4254 ? dataref_offset
4255 : build_int_cst (reference_alias_ptr_type
4256 (DR_REF (first_dr)), 0));
644ffefd 4257 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4258 if (aligned_access_p (first_dr))
644ffefd 4259 misalign = 0;
272c6793
RS
4260 else if (DR_MISALIGNMENT (first_dr) == -1)
4261 {
4262 TREE_TYPE (data_ref)
4263 = build_aligned_type (TREE_TYPE (data_ref),
4264 TYPE_ALIGN (elem_type));
644ffefd
MJ
4265 align = TYPE_ALIGN_UNIT (elem_type);
4266 misalign = 0;
272c6793
RS
4267 }
4268 else
4269 {
4270 TREE_TYPE (data_ref)
4271 = build_aligned_type (TREE_TYPE (data_ref),
4272 TYPE_ALIGN (elem_type));
644ffefd 4273 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4274 }
74bf76ed
JJ
4275 if (dataref_offset == NULL_TREE)
4276 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4277 misalign);
c2d7ab2a 4278
272c6793
RS
4279 /* Arguments are ready. Create the new vector stmt. */
4280 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4282
4283 if (slp)
4284 continue;
4285
e14c1050 4286 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4287 if (!next_stmt)
4288 break;
4289 }
ebfd146a 4290 }
1da0876c
RS
4291 if (!slp)
4292 {
4293 if (j == 0)
4294 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4295 else
4296 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4297 prev_stmt_info = vinfo_for_stmt (new_stmt);
4298 }
ebfd146a
IR
4299 }
4300
9771b263
DN
4301 dr_chain.release ();
4302 oprnds.release ();
4303 result_chain.release ();
4304 vec_oprnds.release ();
ebfd146a
IR
4305
4306 return true;
4307}
4308
aec7ae7d
JJ
4309/* Given a vector type VECTYPE and permutation SEL returns
4310 the VECTOR_CST mask that implements the permutation of the
4311 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4312
3fcc1b55
JJ
4313tree
4314vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4315{
d2a12ae7 4316 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4317 int i, nunits;
a1e53f3f 4318
22e4dee7 4319 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4320
4321 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4322 return NULL;
4323
96f9265a
RG
4324 mask_elt_type = lang_hooks.types.type_for_mode
4325 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4326 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4327
d2a12ae7 4328 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4329 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4330 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4331 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4332
2635892a 4333 return mask_vec;
a1e53f3f
L
4334}
4335
aec7ae7d
JJ
4336/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4337 reversal of the vector elements. If that is impossible to do,
4338 returns NULL. */
4339
4340static tree
4341perm_mask_for_reverse (tree vectype)
4342{
4343 int i, nunits;
4344 unsigned char *sel;
4345
4346 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4347 sel = XALLOCAVEC (unsigned char, nunits);
4348
4349 for (i = 0; i < nunits; ++i)
4350 sel[i] = nunits - 1 - i;
4351
3fcc1b55 4352 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4353}
4354
4355/* Given a vector variable X and Y, that was generated for the scalar
4356 STMT, generate instructions to permute the vector elements of X and Y
4357 using permutation mask MASK_VEC, insert them at *GSI and return the
4358 permuted vector variable. */
a1e53f3f
L
4359
4360static tree
aec7ae7d
JJ
4361permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4362 gimple_stmt_iterator *gsi)
a1e53f3f
L
4363{
4364 tree vectype = TREE_TYPE (x);
aec7ae7d 4365 tree perm_dest, data_ref;
a1e53f3f
L
4366 gimple perm_stmt;
4367
a1e53f3f 4368 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4369 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4370
4371 /* Generate the permute statement. */
73804b12
RG
4372 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4373 x, y, mask_vec);
a1e53f3f
L
4374 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4375
4376 return data_ref;
4377}
4378
ebfd146a
IR
4379/* vectorizable_load.
4380
b8698a0f
L
4381 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4382 can be vectorized.
4383 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4384 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4385 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4386
4387static bool
4388vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4389 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4390{
4391 tree scalar_dest;
4392 tree vec_dest = NULL;
4393 tree data_ref = NULL;
4394 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4395 stmt_vec_info prev_stmt_info;
ebfd146a 4396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4397 struct loop *loop = NULL;
ebfd146a 4398 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4399 bool nested_in_vect_loop = false;
c716e67f 4400 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4401 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4402 tree elem_type;
ebfd146a 4403 tree new_temp;
947131ba 4404 enum machine_mode mode;
ebfd146a
IR
4405 gimple new_stmt = NULL;
4406 tree dummy;
4407 enum dr_alignment_support alignment_support_scheme;
4408 tree dataref_ptr = NULL_TREE;
74bf76ed 4409 tree dataref_offset = NULL_TREE;
fef4d2b3 4410 gimple ptr_incr = NULL;
ebfd146a
IR
4411 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4412 int ncopies;
a64b9c26 4413 int i, j, group_size, group_gap;
ebfd146a
IR
4414 tree msq = NULL_TREE, lsq;
4415 tree offset = NULL_TREE;
4416 tree realignment_token = NULL_TREE;
4417 gimple phi = NULL;
6e1aa848 4418 vec<tree> dr_chain = vNULL;
0d0293ac 4419 bool grouped_load = false;
272c6793 4420 bool load_lanes_p = false;
ebfd146a 4421 gimple first_stmt;
ebfd146a 4422 bool inv_p;
319e6439 4423 bool negative = false;
ebfd146a
IR
4424 bool compute_in_loop = false;
4425 struct loop *at_loop;
4426 int vec_num;
4427 bool slp = (slp_node != NULL);
4428 bool slp_perm = false;
4429 enum tree_code code;
a70d6342
IR
4430 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4431 int vf;
272c6793 4432 tree aggr_type;
aec7ae7d
JJ
4433 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4434 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4435 int gather_scale = 1;
4436 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4437
4438 if (loop_vinfo)
4439 {
4440 loop = LOOP_VINFO_LOOP (loop_vinfo);
4441 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4442 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4443 }
4444 else
3533e503 4445 vf = 1;
ebfd146a
IR
4446
4447 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4448 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4449 case of SLP. */
437f4a00 4450 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4451 ncopies = 1;
4452 else
4453 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4454
4455 gcc_assert (ncopies >= 1);
4456
4457 /* FORNOW. This restriction should be relaxed. */
4458 if (nested_in_vect_loop && ncopies > 1)
4459 {
73fbfcad 4460 if (dump_enabled_p ())
78c60e3d
SS
4461 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4462 "multiple types in nested loop.");
ebfd146a
IR
4463 return false;
4464 }
4465
a70d6342 4466 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4467 return false;
4468
8644a673 4469 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4470 return false;
4471
4472 /* Is vectorizable load? */
4473 if (!is_gimple_assign (stmt))
4474 return false;
4475
4476 scalar_dest = gimple_assign_lhs (stmt);
4477 if (TREE_CODE (scalar_dest) != SSA_NAME)
4478 return false;
4479
4480 code = gimple_assign_rhs_code (stmt);
4481 if (code != ARRAY_REF
38000232 4482 && code != BIT_FIELD_REF
ebfd146a 4483 && code != INDIRECT_REF
e9dbe7bb
IR
4484 && code != COMPONENT_REF
4485 && code != IMAGPART_EXPR
70f34814 4486 && code != REALPART_EXPR
42373e0b
RG
4487 && code != MEM_REF
4488 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4489 return false;
4490
4491 if (!STMT_VINFO_DATA_REF (stmt_info))
4492 return false;
4493
7b7b1813 4494 elem_type = TREE_TYPE (vectype);
947131ba 4495 mode = TYPE_MODE (vectype);
ebfd146a
IR
4496
4497 /* FORNOW. In some cases can vectorize even if data-type not supported
4498 (e.g. - data copies). */
947131ba 4499 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4500 {
73fbfcad 4501 if (dump_enabled_p ())
78c60e3d
SS
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4503 "Aligned load, but unsupported type.");
ebfd146a
IR
4504 return false;
4505 }
4506
ebfd146a 4507 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4508 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4509 {
0d0293ac 4510 grouped_load = true;
ebfd146a 4511 /* FORNOW */
aec7ae7d 4512 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4513
e14c1050 4514 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4515 if (!slp && !PURE_SLP_STMT (stmt_info))
4516 {
e14c1050 4517 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4518 if (vect_load_lanes_supported (vectype, group_size))
4519 load_lanes_p = true;
0d0293ac 4520 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4521 return false;
4522 }
ebfd146a
IR
4523 }
4524
a1e53f3f 4525
aec7ae7d
JJ
4526 if (STMT_VINFO_GATHER_P (stmt_info))
4527 {
4528 gimple def_stmt;
4529 tree def;
4530 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4531 &gather_off, &gather_scale);
4532 gcc_assert (gather_decl);
24ee1384 4533 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4534 &def_stmt, &def, &gather_dt,
4535 &gather_off_vectype))
4536 {
73fbfcad 4537 if (dump_enabled_p ())
78c60e3d
SS
4538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4539 "gather index use not simple.");
aec7ae7d
JJ
4540 return false;
4541 }
4542 }
7d75abc8 4543 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4544 ;
319e6439
RG
4545 else
4546 {
4547 negative = tree_int_cst_compare (nested_in_vect_loop
4548 ? STMT_VINFO_DR_STEP (stmt_info)
4549 : DR_STEP (dr),
4550 size_zero_node) < 0;
4551 if (negative && ncopies > 1)
4552 {
73fbfcad 4553 if (dump_enabled_p ())
78c60e3d
SS
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4555 "multiple types with negative step.");
319e6439
RG
4556 return false;
4557 }
4558
4559 if (negative)
4560 {
08940f33
RB
4561 if (grouped_load)
4562 {
4563 if (dump_enabled_p ())
4564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4565 "negative step for group load not supported");
4566 return false;
4567 }
319e6439
RG
4568 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4569 if (alignment_support_scheme != dr_aligned
4570 && alignment_support_scheme != dr_unaligned_supported)
4571 {
73fbfcad 4572 if (dump_enabled_p ())
78c60e3d
SS
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4574 "negative step but alignment required.");
319e6439
RG
4575 return false;
4576 }
4577 if (!perm_mask_for_reverse (vectype))
4578 {
73fbfcad 4579 if (dump_enabled_p ())
78c60e3d
SS
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4581 "negative step and reversing not supported.");
319e6439
RG
4582 return false;
4583 }
4584 }
7d75abc8 4585 }
aec7ae7d 4586
ebfd146a
IR
4587 if (!vec_stmt) /* transformation not required. */
4588 {
4589 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4590 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4591 return true;
4592 }
4593
73fbfcad 4594 if (dump_enabled_p ())
78c60e3d
SS
4595 dump_printf_loc (MSG_NOTE, vect_location,
4596 "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4597
4598 /** Transform. **/
4599
c716e67f
XDL
4600 ensure_base_align (stmt_info, dr);
4601
aec7ae7d
JJ
4602 if (STMT_VINFO_GATHER_P (stmt_info))
4603 {
4604 tree vec_oprnd0 = NULL_TREE, op;
4605 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4606 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4607 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4608 edge pe = loop_preheader_edge (loop);
4609 gimple_seq seq;
4610 basic_block new_bb;
4611 enum { NARROW, NONE, WIDEN } modifier;
4612 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4613
4614 if (nunits == gather_off_nunits)
4615 modifier = NONE;
4616 else if (nunits == gather_off_nunits / 2)
4617 {
4618 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4619 modifier = WIDEN;
4620
4621 for (i = 0; i < gather_off_nunits; ++i)
4622 sel[i] = i | nunits;
4623
3fcc1b55 4624 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4625 gcc_assert (perm_mask != NULL_TREE);
4626 }
4627 else if (nunits == gather_off_nunits * 2)
4628 {
4629 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4630 modifier = NARROW;
4631
4632 for (i = 0; i < nunits; ++i)
4633 sel[i] = i < gather_off_nunits
4634 ? i : i + nunits - gather_off_nunits;
4635
3fcc1b55 4636 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4637 gcc_assert (perm_mask != NULL_TREE);
4638 ncopies *= 2;
4639 }
4640 else
4641 gcc_unreachable ();
4642
4643 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4644 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4645 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4646 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4647 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4648 scaletype = TREE_VALUE (arglist);
4649 gcc_checking_assert (types_compatible_p (srctype, rettype)
4650 && types_compatible_p (srctype, masktype));
4651
4652 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4653
4654 ptr = fold_convert (ptrtype, gather_base);
4655 if (!is_gimple_min_invariant (ptr))
4656 {
4657 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4658 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4659 gcc_assert (!new_bb);
4660 }
4661
4662 /* Currently we support only unconditional gather loads,
4663 so mask should be all ones. */
4664 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4665 mask = build_int_cst (TREE_TYPE (masktype), -1);
4666 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4667 {
4668 REAL_VALUE_TYPE r;
4669 long tmp[6];
4670 for (j = 0; j < 6; ++j)
4671 tmp[j] = -1;
4672 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4673 mask = build_real (TREE_TYPE (masktype), r);
4674 }
4675 else
4676 gcc_unreachable ();
4677 mask = build_vector_from_val (masktype, mask);
4678 mask = vect_init_vector (stmt, mask, masktype, NULL);
4679
4680 scale = build_int_cst (scaletype, gather_scale);
4681
4682 prev_stmt_info = NULL;
4683 for (j = 0; j < ncopies; ++j)
4684 {
4685 if (modifier == WIDEN && (j & 1))
4686 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4687 perm_mask, stmt, gsi);
4688 else if (j == 0)
4689 op = vec_oprnd0
4690 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4691 else
4692 op = vec_oprnd0
4693 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4694
4695 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4696 {
4697 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4698 == TYPE_VECTOR_SUBPARTS (idxtype));
4699 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4700 var = make_ssa_name (var, NULL);
4701 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4702 new_stmt
4703 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4704 op, NULL_TREE);
4705 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4706 op = var;
4707 }
4708
4709 new_stmt
4710 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4711
4712 if (!useless_type_conversion_p (vectype, rettype))
4713 {
4714 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4715 == TYPE_VECTOR_SUBPARTS (rettype));
4716 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4717 op = make_ssa_name (var, new_stmt);
4718 gimple_call_set_lhs (new_stmt, op);
4719 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4720 var = make_ssa_name (vec_dest, NULL);
4721 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4722 new_stmt
4723 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4724 NULL_TREE);
4725 }
4726 else
4727 {
4728 var = make_ssa_name (vec_dest, new_stmt);
4729 gimple_call_set_lhs (new_stmt, var);
4730 }
4731
4732 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4733
4734 if (modifier == NARROW)
4735 {
4736 if ((j & 1) == 0)
4737 {
4738 prev_res = var;
4739 continue;
4740 }
4741 var = permute_vec_elements (prev_res, var,
4742 perm_mask, stmt, gsi);
4743 new_stmt = SSA_NAME_DEF_STMT (var);
4744 }
4745
4746 if (prev_stmt_info == NULL)
4747 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4748 else
4749 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4750 prev_stmt_info = vinfo_for_stmt (new_stmt);
4751 }
4752 return true;
4753 }
7d75abc8
MM
4754 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4755 {
4756 gimple_stmt_iterator incr_gsi;
4757 bool insert_after;
4758 gimple incr;
4759 tree offvar;
7d75abc8
MM
4760 tree ivstep;
4761 tree running_off;
9771b263 4762 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4763 gimple_seq stmts = NULL;
14ac6aa2
RB
4764 tree stride_base, stride_step, alias_off;
4765
4766 gcc_assert (!nested_in_vect_loop);
7d75abc8 4767
14ac6aa2
RB
4768 stride_base
4769 = fold_build_pointer_plus
4770 (unshare_expr (DR_BASE_ADDRESS (dr)),
4771 size_binop (PLUS_EXPR,
4772 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4773 convert_to_ptrofftype (DR_INIT(dr))));
4774 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4775
4776 /* For a load with loop-invariant (but other than power-of-2)
4777 stride (i.e. not a grouped access) like so:
4778
4779 for (i = 0; i < n; i += stride)
4780 ... = array[i];
4781
4782 we generate a new induction variable and new accesses to
4783 form a new vector (or vectors, depending on ncopies):
4784
4785 for (j = 0; ; j += VF*stride)
4786 tmp1 = array[j];
4787 tmp2 = array[j + stride];
4788 ...
4789 vectemp = {tmp1, tmp2, ...}
4790 */
4791
4792 ivstep = stride_step;
4793 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4794 build_int_cst (TREE_TYPE (ivstep), vf));
4795
4796 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4797
4798 create_iv (stride_base, ivstep, NULL,
4799 loop, &incr_gsi, insert_after,
4800 &offvar, NULL);
4801 incr = gsi_stmt (incr_gsi);
4802 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4803
4804 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4805 if (stmts)
4806 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4807
4808 prev_stmt_info = NULL;
4809 running_off = offvar;
14ac6aa2 4810 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4811 for (j = 0; j < ncopies; j++)
4812 {
4813 tree vec_inv;
4814
9771b263 4815 vec_alloc (v, nunits);
7d75abc8
MM
4816 for (i = 0; i < nunits; i++)
4817 {
4818 tree newref, newoff;
4819 gimple incr;
14ac6aa2
RB
4820 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4821 running_off, alias_off);
7d75abc8
MM
4822
4823 newref = force_gimple_operand_gsi (gsi, newref, true,
4824 NULL_TREE, true,
4825 GSI_SAME_STMT);
4826 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4827 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4828 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4829 running_off, stride_step);
7d75abc8
MM
4830 vect_finish_stmt_generation (stmt, incr, gsi);
4831
4832 running_off = newoff;
4833 }
4834
4835 vec_inv = build_constructor (vectype, v);
4836 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4837 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4838
4839 if (j == 0)
4840 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4841 else
4842 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4843 prev_stmt_info = vinfo_for_stmt (new_stmt);
4844 }
4845 return true;
4846 }
aec7ae7d 4847
0d0293ac 4848 if (grouped_load)
ebfd146a 4849 {
e14c1050 4850 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4851 if (slp
01d8bf07 4852 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4853 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4854 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4855
ebfd146a 4856 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4857 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4858 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4859 ??? But we can only do so if there is exactly one
4860 as we have no way to get at the rest. Leave the CSE
4861 opportunity alone.
4862 ??? With the group load eventually participating
4863 in multiple different permutations (having multiple
4864 slp nodes which refer to the same group) the CSE
4865 is even wrong code. See PR56270. */
4866 && !slp)
ebfd146a
IR
4867 {
4868 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4869 return true;
4870 }
4871 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4872 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4873
4874 /* VEC_NUM is the number of vect stmts to be created for this group. */
4875 if (slp)
4876 {
0d0293ac 4877 grouped_load = false;
ebfd146a 4878 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4879 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4880 slp_perm = true;
a64b9c26 4881 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4882 }
ebfd146a 4883 else
a64b9c26
RB
4884 {
4885 vec_num = group_size;
4886 group_gap = 0;
4887 }
ebfd146a
IR
4888 }
4889 else
4890 {
4891 first_stmt = stmt;
4892 first_dr = dr;
4893 group_size = vec_num = 1;
a64b9c26 4894 group_gap = 0;
ebfd146a
IR
4895 }
4896
720f5239 4897 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4898 gcc_assert (alignment_support_scheme);
272c6793
RS
4899 /* Targets with load-lane instructions must not require explicit
4900 realignment. */
4901 gcc_assert (!load_lanes_p
4902 || alignment_support_scheme == dr_aligned
4903 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4904
4905 /* In case the vectorization factor (VF) is bigger than the number
4906 of elements that we can fit in a vectype (nunits), we have to generate
4907 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4908 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4909 from one copy of the vector stmt to the next, in the field
ff802fa1 4910 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4911 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4912 stmts that use the defs of the current stmt. The example below
4913 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4914 need to create 4 vectorized stmts):
ebfd146a
IR
4915
4916 before vectorization:
4917 RELATED_STMT VEC_STMT
4918 S1: x = memref - -
4919 S2: z = x + 1 - -
4920
4921 step 1: vectorize stmt S1:
4922 We first create the vector stmt VS1_0, and, as usual, record a
4923 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4924 Next, we create the vector stmt VS1_1, and record a pointer to
4925 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4926 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4927 stmts and pointers:
4928 RELATED_STMT VEC_STMT
4929 VS1_0: vx0 = memref0 VS1_1 -
4930 VS1_1: vx1 = memref1 VS1_2 -
4931 VS1_2: vx2 = memref2 VS1_3 -
4932 VS1_3: vx3 = memref3 - -
4933 S1: x = load - VS1_0
4934 S2: z = x + 1 - -
4935
b8698a0f
L
4936 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4937 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4938 stmt S2. */
4939
0d0293ac 4940 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4941
4942 S1: x2 = &base + 2
4943 S2: x0 = &base
4944 S3: x1 = &base + 1
4945 S4: x3 = &base + 3
4946
b8698a0f 4947 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4948 starting from the access of the first stmt of the chain:
4949
4950 VS1: vx0 = &base
4951 VS2: vx1 = &base + vec_size*1
4952 VS3: vx3 = &base + vec_size*2
4953 VS4: vx4 = &base + vec_size*3
4954
4955 Then permutation statements are generated:
4956
e2c83630
RH
4957 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4958 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4959 ...
4960
4961 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4962 (the order of the data-refs in the output of vect_permute_load_chain
4963 corresponds to the order of scalar stmts in the interleaving chain - see
4964 the documentation of vect_permute_load_chain()).
4965 The generation of permutation stmts and recording them in
0d0293ac 4966 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4967
b8698a0f 4968 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4969 permutation stmts above are created for every copy. The result vector
4970 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4971 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4972
4973 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4974 on a target that supports unaligned accesses (dr_unaligned_supported)
4975 we generate the following code:
4976 p = initial_addr;
4977 indx = 0;
4978 loop {
4979 p = p + indx * vectype_size;
4980 vec_dest = *(p);
4981 indx = indx + 1;
4982 }
4983
4984 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4985 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4986 then generate the following code, in which the data in each iteration is
4987 obtained by two vector loads, one from the previous iteration, and one
4988 from the current iteration:
4989 p1 = initial_addr;
4990 msq_init = *(floor(p1))
4991 p2 = initial_addr + VS - 1;
4992 realignment_token = call target_builtin;
4993 indx = 0;
4994 loop {
4995 p2 = p2 + indx * vectype_size
4996 lsq = *(floor(p2))
4997 vec_dest = realign_load (msq, lsq, realignment_token)
4998 indx = indx + 1;
4999 msq = lsq;
5000 } */
5001
5002 /* If the misalignment remains the same throughout the execution of the
5003 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5004 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5005 This can only occur when vectorizing memory accesses in the inner-loop
5006 nested within an outer-loop that is being vectorized. */
5007
d1e4b493 5008 if (nested_in_vect_loop
211bea38 5009 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5010 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5011 {
5012 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5013 compute_in_loop = true;
5014 }
5015
5016 if ((alignment_support_scheme == dr_explicit_realign_optimized
5017 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5018 && !compute_in_loop)
ebfd146a
IR
5019 {
5020 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5021 alignment_support_scheme, NULL_TREE,
5022 &at_loop);
5023 if (alignment_support_scheme == dr_explicit_realign_optimized)
5024 {
5025 phi = SSA_NAME_DEF_STMT (msq);
5026 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5027 }
5028 }
5029 else
5030 at_loop = loop;
5031
a1e53f3f
L
5032 if (negative)
5033 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5034
272c6793
RS
5035 if (load_lanes_p)
5036 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5037 else
5038 aggr_type = vectype;
5039
ebfd146a
IR
5040 prev_stmt_info = NULL;
5041 for (j = 0; j < ncopies; j++)
b8698a0f 5042 {
272c6793 5043 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5044 if (j == 0)
74bf76ed
JJ
5045 {
5046 bool simd_lane_access_p
5047 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5048 if (simd_lane_access_p
5049 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5050 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5051 && integer_zerop (DR_OFFSET (first_dr))
5052 && integer_zerop (DR_INIT (first_dr))
5053 && alias_sets_conflict_p (get_alias_set (aggr_type),
5054 get_alias_set (DR_REF (first_dr)))
5055 && (alignment_support_scheme == dr_aligned
5056 || alignment_support_scheme == dr_unaligned_supported))
5057 {
5058 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5059 dataref_offset = build_int_cst (reference_alias_ptr_type
5060 (DR_REF (first_dr)), 0);
5061 }
5062 else
5063 dataref_ptr
5064 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5065 offset, &dummy, gsi, &ptr_incr,
5066 simd_lane_access_p, &inv_p);
5067 }
5068 else if (dataref_offset)
5069 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5070 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5071 else
272c6793
RS
5072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5073 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5074
0d0293ac 5075 if (grouped_load || slp_perm)
9771b263 5076 dr_chain.create (vec_num);
5ce1ee7f 5077
272c6793 5078 if (load_lanes_p)
ebfd146a 5079 {
272c6793
RS
5080 tree vec_array;
5081
5082 vec_array = create_vector_array (vectype, vec_num);
5083
5084 /* Emit:
5085 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5086 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5087 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5088 gimple_call_set_lhs (new_stmt, vec_array);
5089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5090
272c6793
RS
5091 /* Extract each vector into an SSA_NAME. */
5092 for (i = 0; i < vec_num; i++)
ebfd146a 5093 {
272c6793
RS
5094 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5095 vec_array, i);
9771b263 5096 dr_chain.quick_push (new_temp);
272c6793
RS
5097 }
5098
5099 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5100 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5101 }
5102 else
5103 {
5104 for (i = 0; i < vec_num; i++)
5105 {
5106 if (i > 0)
5107 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5108 stmt, NULL_TREE);
5109
5110 /* 2. Create the vector-load in the loop. */
5111 switch (alignment_support_scheme)
5112 {
5113 case dr_aligned:
5114 case dr_unaligned_supported:
be1ac4ec 5115 {
644ffefd
MJ
5116 unsigned int align, misalign;
5117
272c6793
RS
5118 data_ref
5119 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5120 dataref_offset
5121 ? dataref_offset
5122 : build_int_cst (reference_alias_ptr_type
5123 (DR_REF (first_dr)), 0));
644ffefd 5124 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5125 if (alignment_support_scheme == dr_aligned)
5126 {
5127 gcc_assert (aligned_access_p (first_dr));
644ffefd 5128 misalign = 0;
272c6793
RS
5129 }
5130 else if (DR_MISALIGNMENT (first_dr) == -1)
5131 {
5132 TREE_TYPE (data_ref)
5133 = build_aligned_type (TREE_TYPE (data_ref),
5134 TYPE_ALIGN (elem_type));
644ffefd
MJ
5135 align = TYPE_ALIGN_UNIT (elem_type);
5136 misalign = 0;
272c6793
RS
5137 }
5138 else
5139 {
5140 TREE_TYPE (data_ref)
5141 = build_aligned_type (TREE_TYPE (data_ref),
5142 TYPE_ALIGN (elem_type));
644ffefd 5143 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5144 }
74bf76ed
JJ
5145 if (dataref_offset == NULL_TREE)
5146 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5147 align, misalign);
272c6793 5148 break;
be1ac4ec 5149 }
272c6793 5150 case dr_explicit_realign:
267d3070 5151 {
272c6793
RS
5152 tree ptr, bump;
5153 tree vs_minus_1;
5154
5155 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5156
5157 if (compute_in_loop)
5158 msq = vect_setup_realignment (first_stmt, gsi,
5159 &realignment_token,
5160 dr_explicit_realign,
5161 dataref_ptr, NULL);
5162
070ecdfd 5163 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5164 new_stmt = gimple_build_assign_with_ops
070ecdfd 5165 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5166 build_int_cst
5167 (TREE_TYPE (dataref_ptr),
5168 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5170 data_ref
5171 = build2 (MEM_REF, vectype, ptr,
5172 build_int_cst (reference_alias_ptr_type
5173 (DR_REF (first_dr)), 0));
5174 vec_dest = vect_create_destination_var (scalar_dest,
5175 vectype);
5176 new_stmt = gimple_build_assign (vec_dest, data_ref);
5177 new_temp = make_ssa_name (vec_dest, new_stmt);
5178 gimple_assign_set_lhs (new_stmt, new_temp);
5179 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5180 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5181 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5182 msq = new_temp;
5183
5184 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5185 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5186 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5187 new_stmt = gimple_build_assign_with_ops
5188 (BIT_AND_EXPR, NULL_TREE, ptr,
5189 build_int_cst
5190 (TREE_TYPE (ptr),
5191 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5192 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5193 gimple_assign_set_lhs (new_stmt, ptr);
5194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5195 data_ref
5196 = build2 (MEM_REF, vectype, ptr,
5197 build_int_cst (reference_alias_ptr_type
5198 (DR_REF (first_dr)), 0));
5199 break;
267d3070 5200 }
272c6793 5201 case dr_explicit_realign_optimized:
070ecdfd 5202 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5203 new_stmt = gimple_build_assign_with_ops
070ecdfd 5204 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5205 build_int_cst
5206 (TREE_TYPE (dataref_ptr),
5207 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5209 data_ref
5210 = build2 (MEM_REF, vectype, new_temp,
5211 build_int_cst (reference_alias_ptr_type
5212 (DR_REF (first_dr)), 0));
5213 break;
5214 default:
5215 gcc_unreachable ();
5216 }
ebfd146a 5217 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5218 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5219 new_temp = make_ssa_name (vec_dest, new_stmt);
5220 gimple_assign_set_lhs (new_stmt, new_temp);
5221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5222
272c6793
RS
5223 /* 3. Handle explicit realignment if necessary/supported.
5224 Create in loop:
5225 vec_dest = realign_load (msq, lsq, realignment_token) */
5226 if (alignment_support_scheme == dr_explicit_realign_optimized
5227 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5228 {
272c6793
RS
5229 lsq = gimple_assign_lhs (new_stmt);
5230 if (!realignment_token)
5231 realignment_token = dataref_ptr;
5232 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5233 new_stmt
73804b12
RG
5234 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5235 vec_dest, msq, lsq,
5236 realignment_token);
272c6793
RS
5237 new_temp = make_ssa_name (vec_dest, new_stmt);
5238 gimple_assign_set_lhs (new_stmt, new_temp);
5239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5240
5241 if (alignment_support_scheme == dr_explicit_realign_optimized)
5242 {
5243 gcc_assert (phi);
5244 if (i == vec_num - 1 && j == ncopies - 1)
5245 add_phi_arg (phi, lsq,
5246 loop_latch_edge (containing_loop),
9e227d60 5247 UNKNOWN_LOCATION);
272c6793
RS
5248 msq = lsq;
5249 }
ebfd146a 5250 }
ebfd146a 5251
59fd17e3
RB
5252 /* 4. Handle invariant-load. */
5253 if (inv_p && !bb_vinfo)
5254 {
5255 gimple_stmt_iterator gsi2 = *gsi;
5256 gcc_assert (!grouped_load);
5257 gsi_next (&gsi2);
5258 new_temp = vect_init_vector (stmt, scalar_dest,
5259 vectype, &gsi2);
5260 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5261 }
5262
272c6793
RS
5263 if (negative)
5264 {
aec7ae7d
JJ
5265 tree perm_mask = perm_mask_for_reverse (vectype);
5266 new_temp = permute_vec_elements (new_temp, new_temp,
5267 perm_mask, stmt, gsi);
ebfd146a
IR
5268 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5269 }
267d3070 5270
272c6793 5271 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5272 vect_transform_grouped_load (). */
5273 if (grouped_load || slp_perm)
9771b263 5274 dr_chain.quick_push (new_temp);
267d3070 5275
272c6793
RS
5276 /* Store vector loads in the corresponding SLP_NODE. */
5277 if (slp && !slp_perm)
9771b263 5278 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5279 }
a64b9c26
RB
5280 /* Bump the vector pointer to account for a gap. */
5281 if (slp && group_gap != 0)
5282 {
5283 tree bump = size_binop (MULT_EXPR,
5284 TYPE_SIZE_UNIT (elem_type),
5285 size_int (group_gap));
5286 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5287 stmt, bump);
5288 }
ebfd146a
IR
5289 }
5290
5291 if (slp && !slp_perm)
5292 continue;
5293
5294 if (slp_perm)
5295 {
01d8bf07 5296 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5297 slp_node_instance, false))
5298 {
9771b263 5299 dr_chain.release ();
ebfd146a
IR
5300 return false;
5301 }
5302 }
5303 else
5304 {
0d0293ac 5305 if (grouped_load)
ebfd146a 5306 {
272c6793 5307 if (!load_lanes_p)
0d0293ac 5308 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5309 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5310 }
5311 else
5312 {
5313 if (j == 0)
5314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5315 else
5316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5317 prev_stmt_info = vinfo_for_stmt (new_stmt);
5318 }
5319 }
9771b263 5320 dr_chain.release ();
ebfd146a
IR
5321 }
5322
ebfd146a
IR
5323 return true;
5324}
5325
5326/* Function vect_is_simple_cond.
b8698a0f 5327
ebfd146a
IR
5328 Input:
5329 LOOP - the loop that is being vectorized.
5330 COND - Condition that is checked for simple use.
5331
e9e1d143
RG
5332 Output:
5333 *COMP_VECTYPE - the vector type for the comparison.
5334
ebfd146a
IR
5335 Returns whether a COND can be vectorized. Checks whether
5336 condition operands are supportable using vec_is_simple_use. */
5337
87aab9b2 5338static bool
24ee1384
IR
5339vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5340 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5341{
5342 tree lhs, rhs;
5343 tree def;
5344 enum vect_def_type dt;
e9e1d143 5345 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5346
5347 if (!COMPARISON_CLASS_P (cond))
5348 return false;
5349
5350 lhs = TREE_OPERAND (cond, 0);
5351 rhs = TREE_OPERAND (cond, 1);
5352
5353 if (TREE_CODE (lhs) == SSA_NAME)
5354 {
5355 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5356 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5357 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5358 return false;
5359 }
5360 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5361 && TREE_CODE (lhs) != FIXED_CST)
5362 return false;
5363
5364 if (TREE_CODE (rhs) == SSA_NAME)
5365 {
5366 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5367 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5368 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5369 return false;
5370 }
f7e531cf 5371 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5372 && TREE_CODE (rhs) != FIXED_CST)
5373 return false;
5374
e9e1d143 5375 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5376 return true;
5377}
5378
5379/* vectorizable_condition.
5380
b8698a0f
L
5381 Check if STMT is conditional modify expression that can be vectorized.
5382 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5383 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5384 at GSI.
5385
5386 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5387 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5388 else caluse if it is 2).
ebfd146a
IR
5389
5390 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5391
4bbe8262 5392bool
ebfd146a 5393vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5394 gimple *vec_stmt, tree reduc_def, int reduc_index,
5395 slp_tree slp_node)
ebfd146a
IR
5396{
5397 tree scalar_dest = NULL_TREE;
5398 tree vec_dest = NULL_TREE;
ebfd146a
IR
5399 tree cond_expr, then_clause, else_clause;
5400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5401 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5402 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5403 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5404 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5405 tree vec_compare, vec_cond_expr;
5406 tree new_temp;
5407 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5408 tree def;
a855b1b1 5409 enum vect_def_type dt, dts[4];
ebfd146a 5410 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5411 int ncopies;
ebfd146a 5412 enum tree_code code;
a855b1b1 5413 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5414 int i, j;
5415 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5416 vec<tree> vec_oprnds0 = vNULL;
5417 vec<tree> vec_oprnds1 = vNULL;
5418 vec<tree> vec_oprnds2 = vNULL;
5419 vec<tree> vec_oprnds3 = vNULL;
74946978 5420 tree vec_cmp_type;
b8698a0f 5421
f7e531cf
IR
5422 if (slp_node || PURE_SLP_STMT (stmt_info))
5423 ncopies = 1;
5424 else
5425 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5426
ebfd146a 5427 gcc_assert (ncopies >= 1);
a855b1b1 5428 if (reduc_index && ncopies > 1)
ebfd146a
IR
5429 return false; /* FORNOW */
5430
f7e531cf
IR
5431 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5432 return false;
5433
5434 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5435 return false;
5436
4bbe8262
IR
5437 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5438 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5439 && reduc_def))
ebfd146a
IR
5440 return false;
5441
ebfd146a 5442 /* FORNOW: not yet supported. */
b8698a0f 5443 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5444 {
73fbfcad 5445 if (dump_enabled_p ())
78c60e3d
SS
5446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5447 "value used after loop.");
ebfd146a
IR
5448 return false;
5449 }
5450
5451 /* Is vectorizable conditional operation? */
5452 if (!is_gimple_assign (stmt))
5453 return false;
5454
5455 code = gimple_assign_rhs_code (stmt);
5456
5457 if (code != COND_EXPR)
5458 return false;
5459
4e71066d
RG
5460 cond_expr = gimple_assign_rhs1 (stmt);
5461 then_clause = gimple_assign_rhs2 (stmt);
5462 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5463
24ee1384
IR
5464 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5465 &comp_vectype)
e9e1d143 5466 || !comp_vectype)
ebfd146a
IR
5467 return false;
5468
5469 if (TREE_CODE (then_clause) == SSA_NAME)
5470 {
5471 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5472 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5473 &then_def_stmt, &def, &dt))
5474 return false;
5475 }
b8698a0f 5476 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5477 && TREE_CODE (then_clause) != REAL_CST
5478 && TREE_CODE (then_clause) != FIXED_CST)
5479 return false;
5480
5481 if (TREE_CODE (else_clause) == SSA_NAME)
5482 {
5483 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5484 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5485 &else_def_stmt, &def, &dt))
5486 return false;
5487 }
b8698a0f 5488 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5489 && TREE_CODE (else_clause) != REAL_CST
5490 && TREE_CODE (else_clause) != FIXED_CST)
5491 return false;
5492
74946978
MP
5493 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5494 /* The result of a vector comparison should be signed type. */
5495 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5496 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5497 if (vec_cmp_type == NULL_TREE)
5498 return false;
784fb9b3 5499
b8698a0f 5500 if (!vec_stmt)
ebfd146a
IR
5501 {
5502 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5503 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5504 }
5505
f7e531cf
IR
5506 /* Transform. */
5507
5508 if (!slp_node)
5509 {
9771b263
DN
5510 vec_oprnds0.create (1);
5511 vec_oprnds1.create (1);
5512 vec_oprnds2.create (1);
5513 vec_oprnds3.create (1);
f7e531cf 5514 }
ebfd146a
IR
5515
5516 /* Handle def. */
5517 scalar_dest = gimple_assign_lhs (stmt);
5518 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5519
5520 /* Handle cond expr. */
a855b1b1
MM
5521 for (j = 0; j < ncopies; j++)
5522 {
f7e531cf 5523 gimple new_stmt = NULL;
a855b1b1
MM
5524 if (j == 0)
5525 {
f7e531cf
IR
5526 if (slp_node)
5527 {
9771b263
DN
5528 vec<tree> ops;
5529 ops.create (4);
37b5ec8f 5530 vec<vec<tree> > vec_defs;
9771b263
DN
5531
5532 vec_defs.create (4);
5533 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5534 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5535 ops.safe_push (then_clause);
5536 ops.safe_push (else_clause);
f7e531cf 5537 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5538 vec_oprnds3 = vec_defs.pop ();
5539 vec_oprnds2 = vec_defs.pop ();
5540 vec_oprnds1 = vec_defs.pop ();
5541 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5542
9771b263
DN
5543 ops.release ();
5544 vec_defs.release ();
f7e531cf
IR
5545 }
5546 else
5547 {
5548 gimple gtemp;
5549 vec_cond_lhs =
a855b1b1
MM
5550 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5551 stmt, NULL);
24ee1384
IR
5552 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5553 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5554
5555 vec_cond_rhs =
5556 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5557 stmt, NULL);
24ee1384
IR
5558 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5559 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5560 if (reduc_index == 1)
5561 vec_then_clause = reduc_def;
5562 else
5563 {
5564 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5565 stmt, NULL);
24ee1384 5566 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5567 NULL, &gtemp, &def, &dts[2]);
5568 }
5569 if (reduc_index == 2)
5570 vec_else_clause = reduc_def;
5571 else
5572 {
5573 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5574 stmt, NULL);
24ee1384 5575 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5576 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5577 }
a855b1b1
MM
5578 }
5579 }
5580 else
5581 {
f7e531cf 5582 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5583 vec_oprnds0.pop ());
f7e531cf 5584 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5585 vec_oprnds1.pop ());
a855b1b1 5586 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5587 vec_oprnds2.pop ());
a855b1b1 5588 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5589 vec_oprnds3.pop ());
f7e531cf
IR
5590 }
5591
5592 if (!slp_node)
5593 {
9771b263
DN
5594 vec_oprnds0.quick_push (vec_cond_lhs);
5595 vec_oprnds1.quick_push (vec_cond_rhs);
5596 vec_oprnds2.quick_push (vec_then_clause);
5597 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5598 }
5599
9dc3f7de 5600 /* Arguments are ready. Create the new vector stmt. */
9771b263 5601 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5602 {
9771b263
DN
5603 vec_cond_rhs = vec_oprnds1[i];
5604 vec_then_clause = vec_oprnds2[i];
5605 vec_else_clause = vec_oprnds3[i];
a855b1b1 5606
784fb9b3
JJ
5607 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5608 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5609 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5610 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5611
f7e531cf
IR
5612 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5613 new_temp = make_ssa_name (vec_dest, new_stmt);
5614 gimple_assign_set_lhs (new_stmt, new_temp);
5615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5616 if (slp_node)
9771b263 5617 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5618 }
5619
5620 if (slp_node)
5621 continue;
5622
5623 if (j == 0)
5624 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5625 else
5626 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5627
5628 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5629 }
b8698a0f 5630
9771b263
DN
5631 vec_oprnds0.release ();
5632 vec_oprnds1.release ();
5633 vec_oprnds2.release ();
5634 vec_oprnds3.release ();
f7e531cf 5635
ebfd146a
IR
5636 return true;
5637}
5638
5639
8644a673 5640/* Make sure the statement is vectorizable. */
ebfd146a
IR
5641
5642bool
a70d6342 5643vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5644{
8644a673 5645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5647 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5648 bool ok;
a70d6342 5649 tree scalar_type, vectype;
363477c0
JJ
5650 gimple pattern_stmt;
5651 gimple_seq pattern_def_seq;
ebfd146a 5652
73fbfcad 5653 if (dump_enabled_p ())
ebfd146a 5654 {
78c60e3d
SS
5655 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 5657 }
ebfd146a 5658
1825a1f3 5659 if (gimple_has_volatile_ops (stmt))
b8698a0f 5660 {
73fbfcad 5661 if (dump_enabled_p ())
78c60e3d
SS
5662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5663 "not vectorized: stmt has volatile operands");
1825a1f3
IR
5664
5665 return false;
5666 }
b8698a0f
L
5667
5668 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5669 to include:
5670 - the COND_EXPR which is the loop exit condition
5671 - any LABEL_EXPRs in the loop
b8698a0f 5672 - computations that are used only for array indexing or loop control.
8644a673 5673 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5674 instance, therefore, all the statements are relevant.
ebfd146a 5675
d092494c 5676 Pattern statement needs to be analyzed instead of the original statement
83197f37 5677 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5678 statements. In basic blocks we are called from some SLP instance
5679 traversal, don't analyze pattern stmts instead, the pattern stmts
5680 already will be part of SLP instance. */
83197f37
IR
5681
5682 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5683 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5684 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5685 {
9d5e7640 5686 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5687 && pattern_stmt
9d5e7640
IR
5688 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5689 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5690 {
83197f37 5691 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5692 stmt = pattern_stmt;
5693 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5694 if (dump_enabled_p ())
9d5e7640 5695 {
78c60e3d
SS
5696 dump_printf_loc (MSG_NOTE, vect_location,
5697 "==> examining pattern statement: ");
5698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
5699 }
5700 }
5701 else
5702 {
73fbfcad 5703 if (dump_enabled_p ())
78c60e3d 5704 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
ebfd146a 5705
9d5e7640
IR
5706 return true;
5707 }
8644a673 5708 }
83197f37 5709 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5710 && node == NULL
83197f37
IR
5711 && pattern_stmt
5712 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5713 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5714 {
5715 /* Analyze PATTERN_STMT too. */
73fbfcad 5716 if (dump_enabled_p ())
83197f37 5717 {
78c60e3d
SS
5718 dump_printf_loc (MSG_NOTE, vect_location,
5719 "==> examining pattern statement: ");
5720 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
5721 }
5722
5723 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5724 return false;
5725 }
ebfd146a 5726
1107f3ae 5727 if (is_pattern_stmt_p (stmt_info)
079c527f 5728 && node == NULL
363477c0 5729 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5730 {
363477c0 5731 gimple_stmt_iterator si;
1107f3ae 5732
363477c0
JJ
5733 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5734 {
5735 gimple pattern_def_stmt = gsi_stmt (si);
5736 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5737 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5738 {
5739 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5740 if (dump_enabled_p ())
363477c0 5741 {
78c60e3d
SS
5742 dump_printf_loc (MSG_NOTE, vect_location,
5743 "==> examining pattern def statement: ");
5744 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 5745 }
1107f3ae 5746
363477c0
JJ
5747 if (!vect_analyze_stmt (pattern_def_stmt,
5748 need_to_vectorize, node))
5749 return false;
5750 }
5751 }
5752 }
1107f3ae 5753
8644a673
IR
5754 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5755 {
5756 case vect_internal_def:
5757 break;
ebfd146a 5758
8644a673 5759 case vect_reduction_def:
7c5222ff 5760 case vect_nested_cycle:
a70d6342 5761 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5762 || relevance == vect_used_in_outer_by_reduction
a70d6342 5763 || relevance == vect_unused_in_scope));
8644a673
IR
5764 break;
5765
5766 case vect_induction_def:
5767 case vect_constant_def:
5768 case vect_external_def:
5769 case vect_unknown_def_type:
5770 default:
5771 gcc_unreachable ();
5772 }
ebfd146a 5773
a70d6342
IR
5774 if (bb_vinfo)
5775 {
5776 gcc_assert (PURE_SLP_STMT (stmt_info));
5777
b690cc0f 5778 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5779 if (dump_enabled_p ())
a70d6342 5780 {
78c60e3d
SS
5781 dump_printf_loc (MSG_NOTE, vect_location,
5782 "get vectype for scalar type: ");
5783 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
a70d6342
IR
5784 }
5785
5786 vectype = get_vectype_for_scalar_type (scalar_type);
5787 if (!vectype)
5788 {
73fbfcad 5789 if (dump_enabled_p ())
a70d6342 5790 {
78c60e3d
SS
5791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5792 "not SLPed: unsupported data-type ");
5793 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5794 scalar_type);
a70d6342
IR
5795 }
5796 return false;
5797 }
5798
73fbfcad 5799 if (dump_enabled_p ())
a70d6342 5800 {
78c60e3d
SS
5801 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5802 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
a70d6342
IR
5803 }
5804
5805 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5806 }
5807
8644a673 5808 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5809 {
8644a673
IR
5810 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5811 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5812 *need_to_vectorize = true;
ebfd146a
IR
5813 }
5814
8644a673 5815 ok = true;
b8698a0f 5816 if (!bb_vinfo
a70d6342
IR
5817 && (STMT_VINFO_RELEVANT_P (stmt_info)
5818 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5819 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5820 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5821 || vectorizable_operation (stmt, NULL, NULL, NULL)
5822 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5823 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5824 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5825 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5826 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5827 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5828 else
5829 {
5830 if (bb_vinfo)
4a00c761
JJ
5831 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5832 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5833 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5834 || vectorizable_assignment (stmt, NULL, NULL, node)
5835 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5836 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5837 || vectorizable_store (stmt, NULL, NULL, node)
5838 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5839 }
8644a673
IR
5840
5841 if (!ok)
ebfd146a 5842 {
73fbfcad 5843 if (dump_enabled_p ())
8644a673 5844 {
78c60e3d
SS
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "not vectorized: relevant stmt not ");
5847 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5848 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5849 }
b8698a0f 5850
ebfd146a
IR
5851 return false;
5852 }
5853
a70d6342
IR
5854 if (bb_vinfo)
5855 return true;
5856
8644a673
IR
5857 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5858 need extra handling, except for vectorizable reductions. */
5859 if (STMT_VINFO_LIVE_P (stmt_info)
5860 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5861 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5862
8644a673 5863 if (!ok)
ebfd146a 5864 {
73fbfcad 5865 if (dump_enabled_p ())
8644a673 5866 {
78c60e3d
SS
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5868 "not vectorized: live stmt not ");
5869 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5871 }
b8698a0f 5872
8644a673 5873 return false;
ebfd146a
IR
5874 }
5875
ebfd146a
IR
5876 return true;
5877}
5878
5879
5880/* Function vect_transform_stmt.
5881
5882 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5883
5884bool
5885vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5886 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5887 slp_instance slp_node_instance)
5888{
5889 bool is_store = false;
5890 gimple vec_stmt = NULL;
5891 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5892 bool done;
ebfd146a
IR
5893
5894 switch (STMT_VINFO_TYPE (stmt_info))
5895 {
5896 case type_demotion_vec_info_type:
ebfd146a 5897 case type_promotion_vec_info_type:
ebfd146a
IR
5898 case type_conversion_vec_info_type:
5899 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5900 gcc_assert (done);
5901 break;
5902
5903 case induc_vec_info_type:
5904 gcc_assert (!slp_node);
5905 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5906 gcc_assert (done);
5907 break;
5908
9dc3f7de
IR
5909 case shift_vec_info_type:
5910 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5911 gcc_assert (done);
5912 break;
5913
ebfd146a
IR
5914 case op_vec_info_type:
5915 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5916 gcc_assert (done);
5917 break;
5918
5919 case assignment_vec_info_type:
5920 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5921 gcc_assert (done);
5922 break;
5923
5924 case load_vec_info_type:
b8698a0f 5925 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5926 slp_node_instance);
5927 gcc_assert (done);
5928 break;
5929
5930 case store_vec_info_type:
5931 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5932 gcc_assert (done);
0d0293ac 5933 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5934 {
5935 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5936 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5937 one are skipped, and there vec_stmt_info shouldn't be freed
5938 meanwhile. */
0d0293ac 5939 *grouped_store = true;
ebfd146a
IR
5940 if (STMT_VINFO_VEC_STMT (stmt_info))
5941 is_store = true;
5942 }
5943 else
5944 is_store = true;
5945 break;
5946
5947 case condition_vec_info_type:
f7e531cf 5948 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5949 gcc_assert (done);
5950 break;
5951
5952 case call_vec_info_type:
190c2236 5953 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5954 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5955 break;
5956
5957 case reduc_vec_info_type:
b5aeb3bb 5958 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5959 gcc_assert (done);
5960 break;
5961
5962 default:
5963 if (!STMT_VINFO_LIVE_P (stmt_info))
5964 {
73fbfcad 5965 if (dump_enabled_p ())
78c60e3d
SS
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5967 "stmt not supported.");
ebfd146a
IR
5968 gcc_unreachable ();
5969 }
5970 }
5971
5972 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5973 is being vectorized, but outside the immediately enclosing loop. */
5974 if (vec_stmt
a70d6342
IR
5975 && STMT_VINFO_LOOP_VINFO (stmt_info)
5976 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5977 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5978 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5979 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5980 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5981 vect_used_in_outer_by_reduction))
ebfd146a 5982 {
a70d6342
IR
5983 struct loop *innerloop = LOOP_VINFO_LOOP (
5984 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5985 imm_use_iterator imm_iter;
5986 use_operand_p use_p;
5987 tree scalar_dest;
5988 gimple exit_phi;
5989
73fbfcad 5990 if (dump_enabled_p ())
78c60e3d
SS
5991 dump_printf_loc (MSG_NOTE, vect_location,
5992 "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5993
5994 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5995 (to be used when vectorizing outer-loop stmts that use the DEF of
5996 STMT). */
5997 if (gimple_code (stmt) == GIMPLE_PHI)
5998 scalar_dest = PHI_RESULT (stmt);
5999 else
6000 scalar_dest = gimple_assign_lhs (stmt);
6001
6002 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6003 {
6004 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6005 {
6006 exit_phi = USE_STMT (use_p);
6007 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6008 }
6009 }
6010 }
6011
6012 /* Handle stmts whose DEF is used outside the loop-nest that is
6013 being vectorized. */
6014 if (STMT_VINFO_LIVE_P (stmt_info)
6015 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6016 {
6017 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6018 gcc_assert (done);
6019 }
6020
6021 if (vec_stmt)
83197f37 6022 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6023
b8698a0f 6024 return is_store;
ebfd146a
IR
6025}
6026
6027
b8698a0f 6028/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6029 stmt_vec_info. */
6030
6031void
6032vect_remove_stores (gimple first_stmt)
6033{
6034 gimple next = first_stmt;
6035 gimple tmp;
6036 gimple_stmt_iterator next_si;
6037
6038 while (next)
6039 {
78048b1c
JJ
6040 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6041
6042 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6043 if (is_pattern_stmt_p (stmt_info))
6044 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6045 /* Free the attached stmt_vec_info and remove the stmt. */
6046 next_si = gsi_for_stmt (next);
3d3f2249 6047 unlink_stmt_vdef (next);
ebfd146a 6048 gsi_remove (&next_si, true);
3d3f2249 6049 release_defs (next);
ebfd146a
IR
6050 free_stmt_vec_info (next);
6051 next = tmp;
6052 }
6053}
6054
6055
6056/* Function new_stmt_vec_info.
6057
6058 Create and initialize a new stmt_vec_info struct for STMT. */
6059
6060stmt_vec_info
b8698a0f 6061new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6062 bb_vec_info bb_vinfo)
ebfd146a
IR
6063{
6064 stmt_vec_info res;
6065 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6066
6067 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6068 STMT_VINFO_STMT (res) = stmt;
6069 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6070 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6071 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6072 STMT_VINFO_LIVE_P (res) = false;
6073 STMT_VINFO_VECTYPE (res) = NULL;
6074 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6075 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6076 STMT_VINFO_IN_PATTERN_P (res) = false;
6077 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6078 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6079 STMT_VINFO_DATA_REF (res) = NULL;
6080
6081 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6082 STMT_VINFO_DR_OFFSET (res) = NULL;
6083 STMT_VINFO_DR_INIT (res) = NULL;
6084 STMT_VINFO_DR_STEP (res) = NULL;
6085 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6086
6087 if (gimple_code (stmt) == GIMPLE_PHI
6088 && is_loop_header_bb_p (gimple_bb (stmt)))
6089 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6090 else
8644a673
IR
6091 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6092
9771b263 6093 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6094 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6095 GROUP_FIRST_ELEMENT (res) = NULL;
6096 GROUP_NEXT_ELEMENT (res) = NULL;
6097 GROUP_SIZE (res) = 0;
6098 GROUP_STORE_COUNT (res) = 0;
6099 GROUP_GAP (res) = 0;
6100 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6101
6102 return res;
6103}
6104
6105
6106/* Create a hash table for stmt_vec_info. */
6107
6108void
6109init_stmt_vec_info_vec (void)
6110{
9771b263
DN
6111 gcc_assert (!stmt_vec_info_vec.exists ());
6112 stmt_vec_info_vec.create (50);
ebfd146a
IR
6113}
6114
6115
6116/* Free hash table for stmt_vec_info. */
6117
6118void
6119free_stmt_vec_info_vec (void)
6120{
93675444
JJ
6121 unsigned int i;
6122 vec_void_p info;
6123 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6124 if (info != NULL)
6125 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6126 gcc_assert (stmt_vec_info_vec.exists ());
6127 stmt_vec_info_vec.release ();
ebfd146a
IR
6128}
6129
6130
6131/* Free stmt vectorization related info. */
6132
6133void
6134free_stmt_vec_info (gimple stmt)
6135{
6136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6137
6138 if (!stmt_info)
6139 return;
6140
78048b1c
JJ
6141 /* Check if this statement has a related "pattern stmt"
6142 (introduced by the vectorizer during the pattern recognition
6143 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6144 too. */
6145 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6146 {
6147 stmt_vec_info patt_info
6148 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6149 if (patt_info)
6150 {
363477c0
JJ
6151 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6152 if (seq)
6153 {
6154 gimple_stmt_iterator si;
6155 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6156 free_stmt_vec_info (gsi_stmt (si));
6157 }
78048b1c
JJ
6158 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6159 }
6160 }
6161
9771b263 6162 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6163 set_vinfo_for_stmt (stmt, NULL);
6164 free (stmt_info);
6165}
6166
6167
bb67d9c7 6168/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6169
bb67d9c7 6170 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6171 by the target. */
6172
bb67d9c7
RG
6173static tree
6174get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6175{
6176 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6177 enum machine_mode simd_mode;
2f816591 6178 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6179 int nunits;
6180 tree vectype;
6181
cc4b5170 6182 if (nbytes == 0)
ebfd146a
IR
6183 return NULL_TREE;
6184
48f2e373
RB
6185 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6186 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6187 return NULL_TREE;
6188
7b7b1813
RG
6189 /* For vector types of elements whose mode precision doesn't
6190 match their types precision we use a element type of mode
6191 precision. The vectorization routines will have to make sure
48f2e373
RB
6192 they support the proper result truncation/extension.
6193 We also make sure to build vector types with INTEGER_TYPE
6194 component type only. */
6d7971b8 6195 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6196 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6197 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6198 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6199 TYPE_UNSIGNED (scalar_type));
6d7971b8 6200
ccbf5bb4
RG
6201 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6202 When the component mode passes the above test simply use a type
6203 corresponding to that mode. The theory is that any use that
6204 would cause problems with this will disable vectorization anyway. */
dfc2e2ac
RB
6205 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6206 && !INTEGRAL_TYPE_P (scalar_type)
6207 && !POINTER_TYPE_P (scalar_type))
60b95d28
RB
6208 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6209
6210 /* We can't build a vector type of elements with alignment bigger than
6211 their size. */
dfc2e2ac 6212 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6213 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6214 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6215
dfc2e2ac
RB
6216 /* If we felt back to using the mode fail if there was
6217 no scalar type for it. */
6218 if (scalar_type == NULL_TREE)
6219 return NULL_TREE;
6220
bb67d9c7
RG
6221 /* If no size was supplied use the mode the target prefers. Otherwise
6222 lookup a vector mode of the specified size. */
6223 if (size == 0)
6224 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6225 else
6226 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6227 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6228 if (nunits <= 1)
6229 return NULL_TREE;
ebfd146a
IR
6230
6231 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6232
6233 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6234 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6235 return NULL_TREE;
ebfd146a
IR
6236
6237 return vectype;
6238}
6239
bb67d9c7
RG
6240unsigned int current_vector_size;
6241
6242/* Function get_vectype_for_scalar_type.
6243
6244 Returns the vector type corresponding to SCALAR_TYPE as supported
6245 by the target. */
6246
6247tree
6248get_vectype_for_scalar_type (tree scalar_type)
6249{
6250 tree vectype;
6251 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6252 current_vector_size);
6253 if (vectype
6254 && current_vector_size == 0)
6255 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6256 return vectype;
6257}
6258
b690cc0f
RG
6259/* Function get_same_sized_vectype
6260
6261 Returns a vector type corresponding to SCALAR_TYPE of size
6262 VECTOR_TYPE if supported by the target. */
6263
6264tree
bb67d9c7 6265get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6266{
bb67d9c7
RG
6267 return get_vectype_for_scalar_type_and_size
6268 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6269}
6270
ebfd146a
IR
6271/* Function vect_is_simple_use.
6272
6273 Input:
a70d6342
IR
6274 LOOP_VINFO - the vect info of the loop that is being vectorized.
6275 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6276 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6277 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6278
6279 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6280 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6281 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6282 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6283 is the case in reduction/induction computations).
6284 For basic blocks, supportable operands are constants and bb invariants.
6285 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6286
6287bool
24ee1384 6288vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6289 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6290 tree *def, enum vect_def_type *dt)
b8698a0f 6291{
ebfd146a
IR
6292 basic_block bb;
6293 stmt_vec_info stmt_vinfo;
a70d6342 6294 struct loop *loop = NULL;
b8698a0f 6295
a70d6342
IR
6296 if (loop_vinfo)
6297 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6298
6299 *def_stmt = NULL;
6300 *def = NULL_TREE;
b8698a0f 6301
73fbfcad 6302 if (dump_enabled_p ())
ebfd146a 6303 {
78c60e3d
SS
6304 dump_printf_loc (MSG_NOTE, vect_location,
6305 "vect_is_simple_use: operand ");
6306 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 6307 }
b8698a0f 6308
b758f602 6309 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6310 {
6311 *dt = vect_constant_def;
6312 return true;
6313 }
b8698a0f 6314
ebfd146a
IR
6315 if (is_gimple_min_invariant (operand))
6316 {
6317 *def = operand;
8644a673 6318 *dt = vect_external_def;
ebfd146a
IR
6319 return true;
6320 }
6321
6322 if (TREE_CODE (operand) == PAREN_EXPR)
6323 {
73fbfcad 6324 if (dump_enabled_p ())
78c60e3d 6325 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
ebfd146a
IR
6326 operand = TREE_OPERAND (operand, 0);
6327 }
b8698a0f 6328
ebfd146a
IR
6329 if (TREE_CODE (operand) != SSA_NAME)
6330 {
73fbfcad 6331 if (dump_enabled_p ())
78c60e3d
SS
6332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6333 "not ssa-name.");
ebfd146a
IR
6334 return false;
6335 }
b8698a0f 6336
ebfd146a
IR
6337 *def_stmt = SSA_NAME_DEF_STMT (operand);
6338 if (*def_stmt == NULL)
6339 {
73fbfcad 6340 if (dump_enabled_p ())
78c60e3d
SS
6341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6342 "no def_stmt.");
ebfd146a
IR
6343 return false;
6344 }
6345
73fbfcad 6346 if (dump_enabled_p ())
ebfd146a 6347 {
78c60e3d
SS
6348 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6349 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
6350 }
6351
8644a673 6352 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6353 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6354 if (gimple_nop_p (*def_stmt))
6355 {
6356 *def = operand;
8644a673 6357 *dt = vect_external_def;
ebfd146a
IR
6358 return true;
6359 }
6360
6361 bb = gimple_bb (*def_stmt);
a70d6342
IR
6362
6363 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6364 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6365 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6366 *dt = vect_external_def;
ebfd146a
IR
6367 else
6368 {
6369 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6370 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6371 }
6372
24ee1384
IR
6373 if (*dt == vect_unknown_def_type
6374 || (stmt
6375 && *dt == vect_double_reduction_def
6376 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6377 {
73fbfcad 6378 if (dump_enabled_p ())
78c60e3d
SS
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 "Unsupported pattern.");
ebfd146a
IR
6381 return false;
6382 }
6383
73fbfcad 6384 if (dump_enabled_p ())
78c60e3d 6385 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
ebfd146a
IR
6386
6387 switch (gimple_code (*def_stmt))
6388 {
6389 case GIMPLE_PHI:
6390 *def = gimple_phi_result (*def_stmt);
6391 break;
6392
6393 case GIMPLE_ASSIGN:
6394 *def = gimple_assign_lhs (*def_stmt);
6395 break;
6396
6397 case GIMPLE_CALL:
6398 *def = gimple_call_lhs (*def_stmt);
6399 if (*def != NULL)
6400 break;
6401 /* FALLTHRU */
6402 default:
73fbfcad 6403 if (dump_enabled_p ())
78c60e3d
SS
6404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6405 "unsupported defining stmt: ");
ebfd146a
IR
6406 return false;
6407 }
6408
6409 return true;
6410}
6411
b690cc0f
RG
6412/* Function vect_is_simple_use_1.
6413
6414 Same as vect_is_simple_use_1 but also determines the vector operand
6415 type of OPERAND and stores it to *VECTYPE. If the definition of
6416 OPERAND is vect_uninitialized_def, vect_constant_def or
6417 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6418 is responsible to compute the best suited vector type for the
6419 scalar operand. */
6420
6421bool
24ee1384 6422vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6423 bb_vec_info bb_vinfo, gimple *def_stmt,
6424 tree *def, enum vect_def_type *dt, tree *vectype)
6425{
24ee1384
IR
6426 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6427 def, dt))
b690cc0f
RG
6428 return false;
6429
6430 /* Now get a vector type if the def is internal, otherwise supply
6431 NULL_TREE and leave it up to the caller to figure out a proper
6432 type for the use stmt. */
6433 if (*dt == vect_internal_def
6434 || *dt == vect_induction_def
6435 || *dt == vect_reduction_def
6436 || *dt == vect_double_reduction_def
6437 || *dt == vect_nested_cycle)
6438 {
6439 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6440
6441 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6442 && !STMT_VINFO_RELEVANT (stmt_info)
6443 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6444 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6445
b690cc0f
RG
6446 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6447 gcc_assert (*vectype != NULL_TREE);
6448 }
6449 else if (*dt == vect_uninitialized_def
6450 || *dt == vect_constant_def
6451 || *dt == vect_external_def)
6452 *vectype = NULL_TREE;
6453 else
6454 gcc_unreachable ();
6455
6456 return true;
6457}
6458
ebfd146a
IR
6459
6460/* Function supportable_widening_operation
6461
b8698a0f
L
6462 Check whether an operation represented by the code CODE is a
6463 widening operation that is supported by the target platform in
b690cc0f
RG
6464 vector form (i.e., when operating on arguments of type VECTYPE_IN
6465 producing a result of type VECTYPE_OUT).
b8698a0f 6466
ebfd146a
IR
6467 Widening operations we currently support are NOP (CONVERT), FLOAT
6468 and WIDEN_MULT. This function checks if these operations are supported
6469 by the target platform either directly (via vector tree-codes), or via
6470 target builtins.
6471
6472 Output:
b8698a0f
L
6473 - CODE1 and CODE2 are codes of vector operations to be used when
6474 vectorizing the operation, if available.
ebfd146a
IR
6475 - MULTI_STEP_CVT determines the number of required intermediate steps in
6476 case of multi-step conversion (like char->short->int - in that case
6477 MULTI_STEP_CVT will be 1).
b8698a0f
L
6478 - INTERM_TYPES contains the intermediate type required to perform the
6479 widening operation (short in the above example). */
ebfd146a
IR
6480
6481bool
b690cc0f
RG
6482supportable_widening_operation (enum tree_code code, gimple stmt,
6483 tree vectype_out, tree vectype_in,
ebfd146a
IR
6484 enum tree_code *code1, enum tree_code *code2,
6485 int *multi_step_cvt,
9771b263 6486 vec<tree> *interm_types)
ebfd146a
IR
6487{
6488 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6489 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6490 struct loop *vect_loop = NULL;
ebfd146a 6491 enum machine_mode vec_mode;
81f40b79 6492 enum insn_code icode1, icode2;
ebfd146a 6493 optab optab1, optab2;
b690cc0f
RG
6494 tree vectype = vectype_in;
6495 tree wide_vectype = vectype_out;
ebfd146a 6496 enum tree_code c1, c2;
4a00c761
JJ
6497 int i;
6498 tree prev_type, intermediate_type;
6499 enum machine_mode intermediate_mode, prev_mode;
6500 optab optab3, optab4;
ebfd146a 6501
4a00c761 6502 *multi_step_cvt = 0;
4ef69dfc
IR
6503 if (loop_info)
6504 vect_loop = LOOP_VINFO_LOOP (loop_info);
6505
ebfd146a
IR
6506 switch (code)
6507 {
6508 case WIDEN_MULT_EXPR:
6ae6116f
RH
6509 /* The result of a vectorized widening operation usually requires
6510 two vectors (because the widened results do not fit into one vector).
6511 The generated vector results would normally be expected to be
6512 generated in the same order as in the original scalar computation,
6513 i.e. if 8 results are generated in each vector iteration, they are
6514 to be organized as follows:
6515 vect1: [res1,res2,res3,res4],
6516 vect2: [res5,res6,res7,res8].
6517
6518 However, in the special case that the result of the widening
6519 operation is used in a reduction computation only, the order doesn't
6520 matter (because when vectorizing a reduction we change the order of
6521 the computation). Some targets can take advantage of this and
6522 generate more efficient code. For example, targets like Altivec,
6523 that support widen_mult using a sequence of {mult_even,mult_odd}
6524 generate the following vectors:
6525 vect1: [res1,res3,res5,res7],
6526 vect2: [res2,res4,res6,res8].
6527
6528 When vectorizing outer-loops, we execute the inner-loop sequentially
6529 (each vectorized inner-loop iteration contributes to VF outer-loop
6530 iterations in parallel). We therefore don't allow to change the
6531 order of the computation in the inner-loop during outer-loop
6532 vectorization. */
6533 /* TODO: Another case in which order doesn't *really* matter is when we
6534 widen and then contract again, e.g. (short)((int)x * y >> 8).
6535 Normally, pack_trunc performs an even/odd permute, whereas the
6536 repack from an even/odd expansion would be an interleave, which
6537 would be significantly simpler for e.g. AVX2. */
6538 /* In any case, in order to avoid duplicating the code below, recurse
6539 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6540 are properly set up for the caller. If we fail, we'll continue with
6541 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6542 if (vect_loop
6543 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6544 && !nested_in_vect_loop_p (vect_loop, stmt)
6545 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6546 stmt, vectype_out, vectype_in,
a86ec597
RH
6547 code1, code2, multi_step_cvt,
6548 interm_types))
6ae6116f 6549 return true;
4a00c761
JJ
6550 c1 = VEC_WIDEN_MULT_LO_EXPR;
6551 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6552 break;
6553
6ae6116f
RH
6554 case VEC_WIDEN_MULT_EVEN_EXPR:
6555 /* Support the recursion induced just above. */
6556 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6557 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6558 break;
6559
36ba4aae 6560 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6561 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6562 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6563 break;
6564
ebfd146a 6565 CASE_CONVERT:
4a00c761
JJ
6566 c1 = VEC_UNPACK_LO_EXPR;
6567 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6568 break;
6569
6570 case FLOAT_EXPR:
4a00c761
JJ
6571 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6572 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6573 break;
6574
6575 case FIX_TRUNC_EXPR:
6576 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6577 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6578 computing the operation. */
6579 return false;
6580
6581 default:
6582 gcc_unreachable ();
6583 }
6584
6ae6116f 6585 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6586 {
6587 enum tree_code ctmp = c1;
6588 c1 = c2;
6589 c2 = ctmp;
6590 }
6591
ebfd146a
IR
6592 if (code == FIX_TRUNC_EXPR)
6593 {
6594 /* The signedness is determined from output operand. */
b690cc0f
RG
6595 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6596 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6597 }
6598 else
6599 {
6600 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6601 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6602 }
6603
6604 if (!optab1 || !optab2)
6605 return false;
6606
6607 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6608 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6609 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6610 return false;
6611
4a00c761
JJ
6612 *code1 = c1;
6613 *code2 = c2;
6614
6615 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6616 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6617 return true;
6618
b8698a0f 6619 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6620 types. */
ebfd146a 6621
4a00c761
JJ
6622 prev_type = vectype;
6623 prev_mode = vec_mode;
b8698a0f 6624
4a00c761
JJ
6625 if (!CONVERT_EXPR_CODE_P (code))
6626 return false;
b8698a0f 6627
4a00c761
JJ
6628 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6629 intermediate steps in promotion sequence. We try
6630 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6631 not. */
9771b263 6632 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6633 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6634 {
6635 intermediate_mode = insn_data[icode1].operand[0].mode;
6636 intermediate_type
6637 = lang_hooks.types.type_for_mode (intermediate_mode,
6638 TYPE_UNSIGNED (prev_type));
6639 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6640 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6641
6642 if (!optab3 || !optab4
6643 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6644 || insn_data[icode1].operand[0].mode != intermediate_mode
6645 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6646 || insn_data[icode2].operand[0].mode != intermediate_mode
6647 || ((icode1 = optab_handler (optab3, intermediate_mode))
6648 == CODE_FOR_nothing)
6649 || ((icode2 = optab_handler (optab4, intermediate_mode))
6650 == CODE_FOR_nothing))
6651 break;
ebfd146a 6652
9771b263 6653 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6654 (*multi_step_cvt)++;
6655
6656 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6657 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6658 return true;
6659
6660 prev_type = intermediate_type;
6661 prev_mode = intermediate_mode;
ebfd146a
IR
6662 }
6663
9771b263 6664 interm_types->release ();
4a00c761 6665 return false;
ebfd146a
IR
6666}
6667
6668
6669/* Function supportable_narrowing_operation
6670
b8698a0f
L
6671 Check whether an operation represented by the code CODE is a
6672 narrowing operation that is supported by the target platform in
b690cc0f
RG
6673 vector form (i.e., when operating on arguments of type VECTYPE_IN
6674 and producing a result of type VECTYPE_OUT).
b8698a0f 6675
ebfd146a 6676 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6677 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6678 the target platform directly via vector tree-codes.
6679
6680 Output:
b8698a0f
L
6681 - CODE1 is the code of a vector operation to be used when
6682 vectorizing the operation, if available.
ebfd146a
IR
6683 - MULTI_STEP_CVT determines the number of required intermediate steps in
6684 case of multi-step conversion (like int->short->char - in that case
6685 MULTI_STEP_CVT will be 1).
6686 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6687 narrowing operation (short in the above example). */
ebfd146a
IR
6688
6689bool
6690supportable_narrowing_operation (enum tree_code code,
b690cc0f 6691 tree vectype_out, tree vectype_in,
ebfd146a 6692 enum tree_code *code1, int *multi_step_cvt,
9771b263 6693 vec<tree> *interm_types)
ebfd146a
IR
6694{
6695 enum machine_mode vec_mode;
6696 enum insn_code icode1;
6697 optab optab1, interm_optab;
b690cc0f
RG
6698 tree vectype = vectype_in;
6699 tree narrow_vectype = vectype_out;
ebfd146a 6700 enum tree_code c1;
4a00c761
JJ
6701 tree intermediate_type;
6702 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6703 int i;
4a00c761 6704 bool uns;
ebfd146a 6705
4a00c761 6706 *multi_step_cvt = 0;
ebfd146a
IR
6707 switch (code)
6708 {
6709 CASE_CONVERT:
6710 c1 = VEC_PACK_TRUNC_EXPR;
6711 break;
6712
6713 case FIX_TRUNC_EXPR:
6714 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6715 break;
6716
6717 case FLOAT_EXPR:
6718 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6719 tree code and optabs used for computing the operation. */
6720 return false;
6721
6722 default:
6723 gcc_unreachable ();
6724 }
6725
6726 if (code == FIX_TRUNC_EXPR)
6727 /* The signedness is determined from output operand. */
b690cc0f 6728 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6729 else
6730 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6731
6732 if (!optab1)
6733 return false;
6734
6735 vec_mode = TYPE_MODE (vectype);
947131ba 6736 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6737 return false;
6738
4a00c761
JJ
6739 *code1 = c1;
6740
6741 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6742 return true;
6743
ebfd146a
IR
6744 /* Check if it's a multi-step conversion that can be done using intermediate
6745 types. */
4a00c761
JJ
6746 prev_mode = vec_mode;
6747 if (code == FIX_TRUNC_EXPR)
6748 uns = TYPE_UNSIGNED (vectype_out);
6749 else
6750 uns = TYPE_UNSIGNED (vectype);
6751
6752 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6753 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6754 costly than signed. */
6755 if (code == FIX_TRUNC_EXPR && uns)
6756 {
6757 enum insn_code icode2;
6758
6759 intermediate_type
6760 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6761 interm_optab
6762 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6763 if (interm_optab != unknown_optab
4a00c761
JJ
6764 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6765 && insn_data[icode1].operand[0].mode
6766 == insn_data[icode2].operand[0].mode)
6767 {
6768 uns = false;
6769 optab1 = interm_optab;
6770 icode1 = icode2;
6771 }
6772 }
ebfd146a 6773
4a00c761
JJ
6774 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6775 intermediate steps in promotion sequence. We try
6776 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6777 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6778 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6779 {
6780 intermediate_mode = insn_data[icode1].operand[0].mode;
6781 intermediate_type
6782 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6783 interm_optab
6784 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6785 optab_default);
6786 if (!interm_optab
6787 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6788 || insn_data[icode1].operand[0].mode != intermediate_mode
6789 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6790 == CODE_FOR_nothing))
6791 break;
6792
9771b263 6793 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6794 (*multi_step_cvt)++;
6795
6796 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6797 return true;
6798
6799 prev_mode = intermediate_mode;
6800 optab1 = interm_optab;
ebfd146a
IR
6801 }
6802
9771b263 6803 interm_types->release ();
4a00c761 6804 return false;
ebfd146a 6805}