]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR c++/59821 (__builtin_LINE and __builtin_FILE for new'd objects is wrong)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
23a5b65a 2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
ebfd146a 27#include "tree.h"
d8a2d370 28#include "stor-layout.h"
ebfd146a
IR
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
2fb9a547
AM
32#include "tree-ssa-alias.h"
33#include "internal-fn.h"
34#include "tree-eh.h"
35#include "gimple-expr.h"
36#include "is-a.h"
18f429e2 37#include "gimple.h"
45b0be94 38#include "gimplify.h"
5be5c238 39#include "gimple-iterator.h"
18f429e2 40#include "gimplify-me.h"
442b4905
AM
41#include "gimple-ssa.h"
42#include "tree-cfg.h"
43#include "tree-phinodes.h"
44#include "ssa-iterators.h"
d8a2d370 45#include "stringpool.h"
442b4905 46#include "tree-ssanames.h"
e28030cf 47#include "tree-ssa-loop-manip.h"
ebfd146a 48#include "cfgloop.h"
0136f8f0
AH
49#include "tree-ssa-loop.h"
50#include "tree-scalar-evolution.h"
ebfd146a 51#include "expr.h"
7ee2468b 52#include "recog.h" /* FIXME: for insn_data */
ebfd146a 53#include "optabs.h"
718f9c0f 54#include "diagnostic-core.h"
ebfd146a 55#include "tree-vectorizer.h"
7ee2468b 56#include "dumpfile.h"
0136f8f0 57#include "cgraph.h"
ebfd146a 58
7ee2468b
SB
59/* For lang_hooks.types.type_for_mode. */
60#include "langhooks.h"
ebfd146a 61
c3e7ee41
BS
62/* Return the vectorized type for the given statement. */
63
64tree
65stmt_vectype (struct _stmt_vec_info *stmt_info)
66{
67 return STMT_VINFO_VECTYPE (stmt_info);
68}
69
70/* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72bool
73stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
74{
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86}
87
88/* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92unsigned
92345349 93record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 95 int misalign, enum vect_cost_model_location where)
c3e7ee41 96{
92345349 97 if (body_cost_vec)
c3e7ee41 98 {
92345349
BS
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
c3e7ee41 103 return (unsigned)
92345349 104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
105
106 }
107 else
108 {
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
112
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
117
92345349
BS
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
c3e7ee41
BS
120 }
121}
122
272c6793
RS
123/* Return a variable of type ELEM_TYPE[NELEMS]. */
124
125static tree
126create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
127{
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
130}
131
132/* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
136
137static tree
138read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
140{
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
143
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
150
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
155
156 return vect_name;
157}
158
159/* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
162
163static void
164write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
166{
167 tree array_ref;
168 gimple new_stmt;
169
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
173
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
176}
177
178/* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
181
182static tree
183create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
184{
272c6793
RS
185 tree mem_ref, alias_ptr_type;
186
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
644ffefd 190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
191 return mem_ref;
192}
193
ebfd146a
IR
194/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196/* Function vect_mark_relevant.
197
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
199
200static void
9771b263 201vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
ebfd146a
IR
204{
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 208 gimple pattern_stmt;
ebfd146a 209
73fbfcad 210 if (dump_enabled_p ())
78c60e3d 211 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 212 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
83197f37
IR
220 bool found = false;
221 if (!used_in_pattern)
222 {
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
13c931c9
JJ
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 229
83197f37
IR
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
ebfd146a 234
83197f37
IR
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
5ce9450f 238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
240 {
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
244
13c931c9
JJ
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
247
ab0ef706
JJ
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
250 {
251 found = true;
252 break;
253 }
254 }
83197f37
IR
255 }
256
257 if (!found)
258 {
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
263
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
265
73fbfcad 266 if (dump_enabled_p ())
78c60e3d
SS
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
e645e942 269 " relevant/live.\n");
83197f37
IR
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
275 }
ebfd146a
IR
276 }
277
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
281
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
284 {
73fbfcad 285 if (dump_enabled_p ())
78c60e3d 286 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 287 "already marked relevant/live.\n");
ebfd146a
IR
288 return;
289 }
290
9771b263 291 worklist->safe_push (stmt);
ebfd146a
IR
292}
293
294
295/* Function vect_stmt_relevant_p.
296
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
299
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
304
305 CHECKME: what other side effects would the vectorizer allow? */
306
307static bool
308vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
310{
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
316
8644a673 317 *relevant = vect_unused_in_scope;
ebfd146a
IR
318 *live_p = false;
319
320 /* cond stmt other than loop exit cond. */
b8698a0f
L
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
8644a673 324 *relevant = vect_used_in_scope;
ebfd146a
IR
325
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 328 if (gimple_vdef (stmt))
ebfd146a 329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 333 *relevant = vect_used_in_scope;
ebfd146a
IR
334 }
335
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
338 {
339 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
340 {
341 basic_block bb = gimple_bb (USE_STMT (use_p));
342 if (!flow_bb_inside_loop_p (loop, bb))
343 {
73fbfcad 344 if (dump_enabled_p ())
78c60e3d 345 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 346 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 347
3157b0c2
AO
348 if (is_gimple_debug (USE_STMT (use_p)))
349 continue;
350
ebfd146a
IR
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
354 gcc_assert (bb == single_exit (loop)->dest);
355
356 *live_p = true;
357 }
358 }
359 }
360
361 return (*live_p || *relevant);
362}
363
364
b8698a0f 365/* Function exist_non_indexing_operands_for_use_p
ebfd146a 366
ff802fa1 367 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
368 used in STMT for anything other than indexing an array. */
369
370static bool
371exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
372{
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 375
ff802fa1 376 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
59a05b0c 381
ebfd146a
IR
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
387
388 'var' in the second case corresponds to a def, not a use,
b8698a0f 389 so USE cannot correspond to any operands that are not used
ebfd146a
IR
390 for array indexing.
391
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
ebfd146a
IR
394
395 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
396 {
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
400 {
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
413 }
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b8698a0f 458process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 459 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 tree def;
467 gimple def_stmt;
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
24ee1384 475 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a
IR
577 case vect_used_by_reduction:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
580
8644a673 581 case vect_used_in_scope:
ebfd146a
IR
582 relevant = vect_used_in_outer;
583 break;
584
585 default:
586 gcc_unreachable ();
587 }
588 }
589
83197f37
IR
590 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
591 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
592 return true;
593}
594
595
596/* Function vect_mark_stmts_to_be_vectorized.
597
598 Not all stmts in the loop need to be vectorized. For example:
599
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
604
605 3. j = j + 1
606
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
609
610 This pass detects such stmts. */
611
612bool
613vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
614{
ebfd146a
IR
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 gimple stmt;
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
623 gimple phi;
624 bool live_p;
06066f92
IR
625 enum vect_relevant relevant, tmp_relevant;
626 enum vect_def_type def_type;
ebfd146a 627
73fbfcad 628 if (dump_enabled_p ())
78c60e3d 629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 630 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 631
00f96dc9 632 auto_vec<gimple, 64> worklist;
ebfd146a
IR
633
634 /* 1. Init worklist. */
635 for (i = 0; i < nbbs; i++)
636 {
637 bb = bbs[i];
638 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 639 {
ebfd146a 640 phi = gsi_stmt (si);
73fbfcad 641 if (dump_enabled_p ())
ebfd146a 642 {
78c60e3d
SS
643 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 645 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
646 }
647
648 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 649 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
650 }
651 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
652 {
653 stmt = gsi_stmt (si);
73fbfcad 654 if (dump_enabled_p ())
ebfd146a 655 {
78c60e3d
SS
656 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 658 dump_printf (MSG_NOTE, "\n");
b8698a0f 659 }
ebfd146a
IR
660
661 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 662 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
663 }
664 }
665
666 /* 2. Process_worklist */
9771b263 667 while (worklist.length () > 0)
ebfd146a
IR
668 {
669 use_operand_p use_p;
670 ssa_op_iter iter;
671
9771b263 672 stmt = worklist.pop ();
73fbfcad 673 if (dump_enabled_p ())
ebfd146a 674 {
78c60e3d
SS
675 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 677 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
678 }
679
b8698a0f
L
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
682 liveness and relevance properties of STMT. */
683 stmt_vinfo = vinfo_for_stmt (stmt);
684 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
685 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
686
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
691
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
694 live_p = false
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 697 those that are used by a reduction computation, and those that are
ff802fa1 698 (also) used by a regular computation. This allows us later on to
b8698a0f 699 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 700 order of the results that they produce does not have to be kept. */
ebfd146a 701
06066f92
IR
702 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
703 tmp_relevant = relevant;
704 switch (def_type)
ebfd146a 705 {
06066f92
IR
706 case vect_reduction_def:
707 switch (tmp_relevant)
708 {
709 case vect_unused_in_scope:
710 relevant = vect_used_by_reduction;
711 break;
712
713 case vect_used_by_reduction:
714 if (gimple_code (stmt) == GIMPLE_PHI)
715 break;
716 /* fall through */
717
718 default:
73fbfcad 719 if (dump_enabled_p ())
78c60e3d 720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 721 "unsupported use of reduction.\n");
06066f92
IR
722 return false;
723 }
724
b8698a0f 725 live_p = false;
06066f92 726 break;
b8698a0f 727
06066f92
IR
728 case vect_nested_cycle:
729 if (tmp_relevant != vect_unused_in_scope
730 && tmp_relevant != vect_used_in_outer_by_reduction
731 && tmp_relevant != vect_used_in_outer)
732 {
73fbfcad 733 if (dump_enabled_p ())
78c60e3d 734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 735 "unsupported use of nested cycle.\n");
7c5222ff 736
06066f92
IR
737 return false;
738 }
7c5222ff 739
b8698a0f
L
740 live_p = false;
741 break;
742
06066f92
IR
743 case vect_double_reduction_def:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_by_reduction)
746 {
73fbfcad 747 if (dump_enabled_p ())
78c60e3d 748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 749 "unsupported use of double reduction.\n");
7c5222ff 750
7c5222ff 751 return false;
06066f92
IR
752 }
753
754 live_p = false;
b8698a0f 755 break;
7c5222ff 756
06066f92
IR
757 default:
758 break;
7c5222ff 759 }
b8698a0f 760
aec7ae7d 761 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
762 {
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt))
767 {
69d2aade
JJ
768 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
769 tree op = gimple_assign_rhs1 (stmt);
770
771 i = 1;
772 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 {
774 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 775 live_p, relevant, &worklist, false)
69d2aade 776 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 777 live_p, relevant, &worklist, false))
07687835 778 return false;
69d2aade
JJ
779 i = 2;
780 }
781 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 782 {
69d2aade 783 op = gimple_op (stmt, i);
9d5e7640 784 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 785 &worklist, false))
07687835 786 return false;
9d5e7640
IR
787 }
788 }
789 else if (is_gimple_call (stmt))
790 {
791 for (i = 0; i < gimple_call_num_args (stmt); i++)
792 {
793 tree arg = gimple_call_arg (stmt, i);
794 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 795 &worklist, false))
07687835 796 return false;
9d5e7640
IR
797 }
798 }
799 }
800 else
801 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
802 {
803 tree op = USE_FROM_PTR (use_p);
804 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 805 &worklist, false))
07687835 806 return false;
9d5e7640 807 }
aec7ae7d
JJ
808
809 if (STMT_VINFO_GATHER_P (stmt_vinfo))
810 {
811 tree off;
812 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
813 gcc_assert (decl);
814 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
815 &worklist, true))
07687835 816 return false;
aec7ae7d 817 }
ebfd146a
IR
818 } /* while worklist */
819
ebfd146a
IR
820 return true;
821}
822
823
b8698a0f 824/* Function vect_model_simple_cost.
ebfd146a 825
b8698a0f 826 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
829
830void
b8698a0f 831vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
832 enum vect_def_type *dt,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
835{
836 int i;
92345349 837 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
838
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
842
ebfd146a
IR
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i = 0; i < 2; i++)
92345349
BS
845 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
846 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
847 stmt_info, 0, vect_prologue);
c3e7ee41
BS
848
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
850 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
851 stmt_info, 0, vect_body);
c3e7ee41 852
73fbfcad 853 if (dump_enabled_p ())
78c60e3d
SS
854 dump_printf_loc (MSG_NOTE, vect_location,
855 "vect_model_simple_cost: inside_cost = %d, "
e645e942 856 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
857}
858
859
8bd37302
BS
860/* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
864
865static void
866vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
867 enum vect_def_type *dt, int pwr)
868{
869 int i, tmp;
92345349 870 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
872 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
873 void *target_cost_data;
8bd37302
BS
874
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info))
877 return;
878
c3e7ee41
BS
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
883
8bd37302
BS
884 for (i = 0; i < pwr + 1; i++)
885 {
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
c3e7ee41 888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
8bd37302
BS
891 }
892
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
92345349
BS
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
8bd37302 898
73fbfcad 899 if (dump_enabled_p ())
78c60e3d
SS
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
903}
904
0d0293ac 905/* Function vect_cost_group_size
b8698a0f 906
0d0293ac 907 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
910
911static int
0d0293ac 912vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 913{
e14c1050 914 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
915
916 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 917 return GROUP_SIZE (stmt_info);
ebfd146a
IR
918
919 return 1;
920}
921
922
923/* Function vect_model_store_cost
924
0d0293ac
MM
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
927
928void
b8698a0f 929vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 930 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
931 slp_tree slp_node,
932 stmt_vector_for_cost *prologue_cost_vec,
933 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
934{
935 int group_size;
92345349 936 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
937 struct data_reference *first_dr;
938 gimple first_stmt;
ebfd146a
IR
939
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info))
942 return;
943
8644a673 944 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
ebfd146a 947
0d0293ac
MM
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
950 {
951 if (slp_node)
952 {
9771b263 953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
954 group_size = 1;
955 }
956 else
957 {
e14c1050 958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 959 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
960 }
961
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
963 }
0d0293ac 964 /* Not a grouped access. */
ebfd146a 965 else
720f5239
IR
966 {
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
969 }
ebfd146a 970
272c6793 971 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
976 {
977 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
978
979 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
980 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
981 stmt_info, 0, vect_body);
ebfd146a 982
73fbfcad 983 if (dump_enabled_p ())
78c60e3d 984 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 985 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 986 group_size);
ebfd146a
IR
987 }
988
989 /* Costs of the stores. */
92345349 990 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 991
73fbfcad 992 if (dump_enabled_p ())
78c60e3d
SS
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: inside_cost = %d, "
e645e942 995 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
996}
997
998
720f5239
IR
999/* Calculate cost of DR's memory access. */
1000void
1001vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1002 unsigned int *inside_cost,
92345349 1003 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1004{
1005 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1008
1009 switch (alignment_support_scheme)
1010 {
1011 case dr_aligned:
1012 {
92345349
BS
1013 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1014 vector_store, stmt_info, 0,
1015 vect_body);
720f5239 1016
73fbfcad 1017 if (dump_enabled_p ())
78c60e3d 1018 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1019 "vect_model_store_cost: aligned.\n");
720f5239
IR
1020 break;
1021 }
1022
1023 case dr_unaligned_supported:
1024 {
720f5239 1025 /* Here, we assign an additional cost for the unaligned store. */
92345349 1026 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1027 unaligned_store, stmt_info,
92345349 1028 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1029 if (dump_enabled_p ())
78c60e3d
SS
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "vect_model_store_cost: unaligned supported by "
e645e942 1032 "hardware.\n");
720f5239
IR
1033 break;
1034 }
1035
38eec4c6
UW
1036 case dr_unaligned_unsupported:
1037 {
1038 *inside_cost = VECT_MAX_COST;
1039
73fbfcad 1040 if (dump_enabled_p ())
78c60e3d 1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1042 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1043 break;
1044 }
1045
720f5239
IR
1046 default:
1047 gcc_unreachable ();
1048 }
1049}
1050
1051
ebfd146a
IR
1052/* Function vect_model_load_cost
1053
0d0293ac
MM
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1056 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1057 access scheme chosen. */
1058
1059void
92345349
BS
1060vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1061 bool load_lanes_p, slp_tree slp_node,
1062 stmt_vector_for_cost *prologue_cost_vec,
1063 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1064{
1065 int group_size;
ebfd146a
IR
1066 gimple first_stmt;
1067 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1068 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1069
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info))
1072 return;
1073
0d0293ac 1074 /* Grouped accesses? */
e14c1050 1075 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1077 {
0d0293ac 1078 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1079 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1080 }
0d0293ac 1081 /* Not a grouped access. */
ebfd146a
IR
1082 else
1083 {
1084 group_size = 1;
1085 first_dr = dr;
1086 }
1087
272c6793 1088 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1093 {
1094 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1095 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1096 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1097 stmt_info, 0, vect_body);
ebfd146a 1098
73fbfcad 1099 if (dump_enabled_p ())
e645e942
TJ
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1102 group_size);
ebfd146a
IR
1103 }
1104
1105 /* The loads themselves. */
a82960aa
RG
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1107 {
a21892ad
BS
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1110 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1111 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1112 scalar_load, stmt_info, 0, vect_body);
1113 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1114 stmt_info, 0, vect_body);
a82960aa
RG
1115 }
1116 else
1117 vect_get_load_cost (first_dr, ncopies,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1119 || group_size > 1 || slp_node),
92345349
BS
1120 &inside_cost, &prologue_cost,
1121 prologue_cost_vec, body_cost_vec, true);
720f5239 1122
73fbfcad 1123 if (dump_enabled_p ())
78c60e3d
SS
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: inside_cost = %d, "
e645e942 1126 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1127}
1128
1129
1130/* Calculate cost of DR's memory access. */
1131void
1132vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1133 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1134 unsigned int *prologue_cost,
1135 stmt_vector_for_cost *prologue_cost_vec,
1136 stmt_vector_for_cost *body_cost_vec,
1137 bool record_prologue_costs)
720f5239
IR
1138{
1139 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1140 gimple stmt = DR_STMT (dr);
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1142
1143 switch (alignment_support_scheme)
ebfd146a
IR
1144 {
1145 case dr_aligned:
1146 {
92345349
BS
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1148 stmt_info, 0, vect_body);
ebfd146a 1149
73fbfcad 1150 if (dump_enabled_p ())
78c60e3d 1151 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1152 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1153
1154 break;
1155 }
1156 case dr_unaligned_supported:
1157 {
720f5239 1158 /* Here, we assign an additional cost for the unaligned load. */
92345349 1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1160 unaligned_load, stmt_info,
92345349 1161 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1162
73fbfcad 1163 if (dump_enabled_p ())
78c60e3d
SS
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned supported by "
e645e942 1166 "hardware.\n");
ebfd146a
IR
1167
1168 break;
1169 }
1170 case dr_explicit_realign:
1171 {
92345349
BS
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1173 vector_load, stmt_info, 0, vect_body);
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1176
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
92345349 1179 prologue costs. */
ebfd146a 1180 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1181 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1182 stmt_info, 0, vect_body);
ebfd146a 1183
73fbfcad 1184 if (dump_enabled_p ())
e645e942
TJ
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: explicit realign\n");
8bd37302 1187
ebfd146a
IR
1188 break;
1189 }
1190 case dr_explicit_realign_optimized:
1191 {
73fbfcad 1192 if (dump_enabled_p ())
e645e942 1193 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1194 "vect_model_load_cost: unaligned software "
e645e942 1195 "pipelined.\n");
ebfd146a
IR
1196
1197 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1198 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1199 if this is an access in a group of loads, which provide grouped
ebfd146a 1200 access, then the above cost should only be considered for one
ff802fa1 1201 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1202 and a realignment op. */
1203
92345349 1204 if (add_realign_cost && record_prologue_costs)
ebfd146a 1205 {
92345349
BS
1206 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1207 vector_stmt, stmt_info,
1208 0, vect_prologue);
ebfd146a 1209 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
ebfd146a
IR
1213 }
1214
92345349
BS
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1216 stmt_info, 0, vect_body);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1218 stmt_info, 0, vect_body);
8bd37302 1219
73fbfcad 1220 if (dump_enabled_p ())
78c60e3d 1221 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1222 "vect_model_load_cost: explicit realign optimized"
1223 "\n");
8bd37302 1224
ebfd146a
IR
1225 break;
1226 }
1227
38eec4c6
UW
1228 case dr_unaligned_unsupported:
1229 {
1230 *inside_cost = VECT_MAX_COST;
1231
73fbfcad 1232 if (dump_enabled_p ())
78c60e3d 1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1234 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1235 break;
1236 }
1237
ebfd146a
IR
1238 default:
1239 gcc_unreachable ();
1240 }
ebfd146a
IR
1241}
1242
418b7df3
RG
1243/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1245
418b7df3
RG
1246static void
1247vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1248{
ebfd146a 1249 if (gsi)
418b7df3 1250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1251 else
1252 {
418b7df3 1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1254 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1255
a70d6342
IR
1256 if (loop_vinfo)
1257 {
1258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1259 basic_block new_bb;
1260 edge pe;
a70d6342
IR
1261
1262 if (nested_in_vect_loop_p (loop, stmt))
1263 loop = loop->inner;
b8698a0f 1264
a70d6342 1265 pe = loop_preheader_edge (loop);
418b7df3 1266 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1267 gcc_assert (!new_bb);
1268 }
1269 else
1270 {
1271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1272 basic_block bb;
1273 gimple_stmt_iterator gsi_bb_start;
1274
1275 gcc_assert (bb_vinfo);
1276 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1277 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1278 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1279 }
ebfd146a
IR
1280 }
1281
73fbfcad 1282 if (dump_enabled_p ())
ebfd146a 1283 {
78c60e3d
SS
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1287 dump_printf (MSG_NOTE, "\n");
ebfd146a 1288 }
418b7df3
RG
1289}
1290
1291/* Function vect_init_vector.
ebfd146a 1292
5467ee52
RG
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
418b7df3
RG
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1300
1301tree
5467ee52 1302vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1303{
1304 tree new_var;
1305 gimple init_stmt;
1306 tree vec_oprnd;
1307 tree new_temp;
1308
5467ee52
RG
1309 if (TREE_CODE (type) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1311 {
5467ee52 1312 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1313 {
5467ee52
RG
1314 if (CONSTANT_CLASS_P (val))
1315 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1316 else
1317 {
83d5977e 1318 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1319 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1320 new_temp, val,
418b7df3 1321 NULL_TREE);
418b7df3 1322 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1323 val = new_temp;
418b7df3
RG
1324 }
1325 }
5467ee52 1326 val = build_vector_from_val (type, val);
418b7df3
RG
1327 }
1328
5467ee52 1329 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1330 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1331 new_temp = make_ssa_name (new_var, init_stmt);
1332 gimple_assign_set_lhs (init_stmt, new_temp);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1334 vec_oprnd = gimple_assign_lhs (init_stmt);
1335 return vec_oprnd;
1336}
1337
a70d6342 1338
ebfd146a
IR
1339/* Function vect_get_vec_def_for_operand.
1340
ff802fa1 1341 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1342 used in the vectorized stmt for STMT.
1343
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1346
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1349
1350tree
1351vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1352{
1353 tree vec_oprnd;
1354 gimple vec_stmt;
1355 gimple def_stmt;
1356 stmt_vec_info def_stmt_info = NULL;
1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1358 unsigned int nunits;
ebfd146a 1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1360 tree def;
ebfd146a
IR
1361 enum vect_def_type dt;
1362 bool is_simple_use;
1363 tree vector_type;
1364
73fbfcad 1365 if (dump_enabled_p ())
ebfd146a 1366 {
78c60e3d
SS
1367 dump_printf_loc (MSG_NOTE, vect_location,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1370 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1371 }
1372
24ee1384
IR
1373 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1374 &def_stmt, &def, &dt);
ebfd146a 1375 gcc_assert (is_simple_use);
73fbfcad 1376 if (dump_enabled_p ())
ebfd146a 1377 {
78c60e3d 1378 int loc_printed = 0;
ebfd146a
IR
1379 if (def)
1380 {
78c60e3d
SS
1381 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1382 loc_printed = 1;
1383 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1384 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1385 }
1386 if (def_stmt)
1387 {
78c60e3d
SS
1388 if (loc_printed)
1389 dump_printf (MSG_NOTE, " def_stmt = ");
1390 else
1391 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1393 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1394 }
1395 }
1396
1397 switch (dt)
1398 {
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def:
1401 {
7569a6cc
RG
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1403 gcc_assert (vector_type);
9dc3f7de 1404 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1405
b8698a0f 1406 if (scalar_def)
ebfd146a
IR
1407 *scalar_def = op;
1408
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1410 if (dump_enabled_p ())
78c60e3d 1411 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1412 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1413
418b7df3 1414 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1415 }
1416
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1418 case vect_external_def:
ebfd146a
IR
1419 {
1420 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1421 gcc_assert (vector_type);
ebfd146a 1422
b8698a0f 1423 if (scalar_def)
ebfd146a
IR
1424 *scalar_def = def;
1425
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1427 if (dump_enabled_p ())
e645e942 1428 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1429
418b7df3 1430 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1431 }
1432
1433 /* Case 3: operand is defined inside the loop. */
8644a673 1434 case vect_internal_def:
ebfd146a 1435 {
b8698a0f 1436 if (scalar_def)
ebfd146a
IR
1437 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1438
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1441
ebfd146a 1442 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1443 /* Get vectorized pattern statement. */
1444 if (!vec_stmt
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info))
1447 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1449 gcc_assert (vec_stmt);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else if (is_gimple_call (vec_stmt))
1453 vec_oprnd = gimple_call_lhs (vec_stmt);
1454 else
1455 vec_oprnd = gimple_assign_lhs (vec_stmt);
1456 return vec_oprnd;
1457 }
1458
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def:
06066f92 1461 case vect_double_reduction_def:
7c5222ff 1462 case vect_nested_cycle:
ebfd146a
IR
1463 {
1464 struct loop *loop;
1465
1466 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1467 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1468
1469 /* Get the def before the loop */
1470 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1471 return get_initial_def_for_reduction (stmt, op, scalar_def);
1472 }
1473
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def:
1476 {
1477 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1478
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1481 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1482 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1483 vec_oprnd = PHI_RESULT (vec_stmt);
1484 else
1485 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1486 return vec_oprnd;
1487 }
1488
1489 default:
1490 gcc_unreachable ();
1491 }
1492}
1493
1494
1495/* Function vect_get_vec_def_for_stmt_copy
1496
ff802fa1 1497 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1500 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1502 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1503 DT is the type of the vector def VEC_OPRND.
1504
1505 Context:
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1508 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1509 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1513 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1516
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1518
ebfd146a
IR
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1522 VS1.3: vx.3 = memref3
ebfd146a
IR
1523
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1528
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
b8698a0f
L
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1533 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1534 returns the vector-def 'vx.0'.
1535
b8698a0f
L
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1540
b8698a0f
L
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1550
1551tree
1552vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1553{
1554 gimple vec_stmt_for_operand;
1555 stmt_vec_info def_stmt_info;
1556
1557 /* Do nothing; can reuse same def. */
8644a673 1558 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1559 return vec_oprnd;
1560
1561 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1562 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1563 gcc_assert (def_stmt_info);
1564 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1565 gcc_assert (vec_stmt_for_operand);
1566 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1567 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1568 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1569 else
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 return vec_oprnd;
1572}
1573
1574
1575/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1577
1578static void
b8698a0f 1579vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1)
ebfd146a 1582{
9771b263 1583 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1584
1585 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1586 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1587
9771b263 1588 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1589 {
9771b263 1590 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1591 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1592 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1593 }
1594}
1595
1596
d092494c
IR
1597/* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
ebfd146a 1600
d092494c 1601void
ebfd146a 1602vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1603 vec<tree> *vec_oprnds0,
1604 vec<tree> *vec_oprnds1,
d092494c 1605 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1606{
1607 if (slp_node)
d092494c
IR
1608 {
1609 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1610 auto_vec<tree> ops (nops);
1611 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1612
9771b263 1613 ops.quick_push (op0);
d092494c 1614 if (op1)
9771b263 1615 ops.quick_push (op1);
d092494c
IR
1616
1617 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1618
37b5ec8f 1619 *vec_oprnds0 = vec_defs[0];
d092494c 1620 if (op1)
37b5ec8f 1621 *vec_oprnds1 = vec_defs[1];
d092494c 1622 }
ebfd146a
IR
1623 else
1624 {
1625 tree vec_oprnd;
1626
9771b263 1627 vec_oprnds0->create (1);
b8698a0f 1628 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1629 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1630
1631 if (op1)
1632 {
9771b263 1633 vec_oprnds1->create (1);
b8698a0f 1634 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1635 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1636 }
1637 }
1638}
1639
1640
1641/* Function vect_finish_stmt_generation.
1642
1643 Insert a new stmt. */
1644
1645void
1646vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1647 gimple_stmt_iterator *gsi)
1648{
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1652
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654
54e8e2c3
RG
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1657 {
1658 gimple at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 {
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 {
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 }
1679 }
1680 }
ebfd146a
IR
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682
b8698a0f 1683 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1684 bb_vinfo));
ebfd146a 1685
73fbfcad 1686 if (dump_enabled_p ())
ebfd146a 1687 {
78c60e3d
SS
1688 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1690 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1691 }
1692
ad885386 1693 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1694}
1695
1696/* Checks if CALL can be vectorized in type VECTYPE. Returns
1697 a function declaration if the target has a vectorized version
1698 of the function, or NULL_TREE if the function cannot be vectorized. */
1699
1700tree
1701vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1702{
1703 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1704
1705 /* We only handle functions that do not read or clobber memory -- i.e.
1706 const or novops ones. */
1707 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1708 return NULL_TREE;
1709
1710 if (!fndecl
1711 || TREE_CODE (fndecl) != FUNCTION_DECL
1712 || !DECL_BUILT_IN (fndecl))
1713 return NULL_TREE;
1714
62f7fd21 1715 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1716 vectype_in);
1717}
1718
5ce9450f
JJ
1719
1720static tree permute_vec_elements (tree, tree, tree, gimple,
1721 gimple_stmt_iterator *);
1722
1723
1724/* Function vectorizable_mask_load_store.
1725
1726 Check if STMT performs a conditional load or store that can be vectorized.
1727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1728 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1730
1731static bool
1732vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1733 gimple *vec_stmt, slp_tree slp_node)
1734{
1735 tree vec_dest = NULL;
1736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1737 stmt_vec_info prev_stmt_info;
1738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1739 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1740 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1741 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1742 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1743 tree elem_type;
1744 gimple new_stmt;
1745 tree dummy;
1746 tree dataref_ptr = NULL_TREE;
1747 gimple ptr_incr;
1748 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1749 int ncopies;
1750 int i, j;
1751 bool inv_p;
1752 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1753 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1754 int gather_scale = 1;
1755 enum vect_def_type gather_dt = vect_unknown_def_type;
1756 bool is_store;
1757 tree mask;
1758 gimple def_stmt;
1759 tree def;
1760 enum vect_def_type dt;
1761
1762 if (slp_node != NULL)
1763 return false;
1764
1765 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1766 gcc_assert (ncopies >= 1);
1767
1768 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1769 mask = gimple_call_arg (stmt, 2);
1770 if (TYPE_PRECISION (TREE_TYPE (mask))
1771 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1772 return false;
1773
1774 /* FORNOW. This restriction should be relaxed. */
1775 if (nested_in_vect_loop && ncopies > 1)
1776 {
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "multiple types in nested loop.");
1780 return false;
1781 }
1782
1783 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1784 return false;
1785
1786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1787 return false;
1788
1789 if (!STMT_VINFO_DATA_REF (stmt_info))
1790 return false;
1791
1792 elem_type = TREE_TYPE (vectype);
1793
1794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1795 return false;
1796
1797 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1798 return false;
1799
1800 if (STMT_VINFO_GATHER_P (stmt_info))
1801 {
1802 gimple def_stmt;
1803 tree def;
1804 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1805 &gather_off, &gather_scale);
1806 gcc_assert (gather_decl);
1807 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1808 &def_stmt, &def, &gather_dt,
1809 &gather_off_vectype))
1810 {
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "gather index use not simple.");
1814 return false;
1815 }
03b9e8e4
JJ
1816
1817 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1818 tree masktype
1819 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1820 if (TREE_CODE (masktype) == INTEGER_TYPE)
1821 {
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "masked gather with integer mask not supported.");
1825 return false;
1826 }
5ce9450f
JJ
1827 }
1828 else if (tree_int_cst_compare (nested_in_vect_loop
1829 ? STMT_VINFO_DR_STEP (stmt_info)
1830 : DR_STEP (dr), size_zero_node) <= 0)
1831 return false;
1832 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1833 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1834 return false;
1835
1836 if (TREE_CODE (mask) != SSA_NAME)
1837 return false;
1838
1839 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1840 &def_stmt, &def, &dt))
1841 return false;
1842
1843 if (is_store)
1844 {
1845 tree rhs = gimple_call_arg (stmt, 3);
1846 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1847 &def_stmt, &def, &dt))
1848 return false;
1849 }
1850
1851 if (!vec_stmt) /* transformation not required. */
1852 {
1853 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1854 if (is_store)
1855 vect_model_store_cost (stmt_info, ncopies, false, dt,
1856 NULL, NULL, NULL);
1857 else
1858 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1859 return true;
1860 }
1861
1862 /** Transform. **/
1863
1864 if (STMT_VINFO_GATHER_P (stmt_info))
1865 {
1866 tree vec_oprnd0 = NULL_TREE, op;
1867 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1868 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1869 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1870 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1871 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1872 edge pe = loop_preheader_edge (loop);
1873 gimple_seq seq;
1874 basic_block new_bb;
1875 enum { NARROW, NONE, WIDEN } modifier;
1876 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1877
acdcd61b
JJ
1878 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1879 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1880 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1881 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1882 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1883 scaletype = TREE_VALUE (arglist);
1884 gcc_checking_assert (types_compatible_p (srctype, rettype)
1885 && types_compatible_p (srctype, masktype));
1886
5ce9450f
JJ
1887 if (nunits == gather_off_nunits)
1888 modifier = NONE;
1889 else if (nunits == gather_off_nunits / 2)
1890 {
1891 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1892 modifier = WIDEN;
1893
1894 for (i = 0; i < gather_off_nunits; ++i)
1895 sel[i] = i | nunits;
1896
1897 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1898 gcc_assert (perm_mask != NULL_TREE);
1899 }
1900 else if (nunits == gather_off_nunits * 2)
1901 {
1902 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1903 modifier = NARROW;
1904
1905 for (i = 0; i < nunits; ++i)
1906 sel[i] = i < gather_off_nunits
1907 ? i : i + nunits - gather_off_nunits;
1908
1909 perm_mask = vect_gen_perm_mask (vectype, sel);
1910 gcc_assert (perm_mask != NULL_TREE);
1911 ncopies *= 2;
acdcd61b
JJ
1912 for (i = 0; i < nunits; ++i)
1913 sel[i] = i | gather_off_nunits;
1914 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1915 gcc_assert (mask_perm_mask != NULL_TREE);
5ce9450f
JJ
1916 }
1917 else
1918 gcc_unreachable ();
1919
5ce9450f
JJ
1920 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1921
1922 ptr = fold_convert (ptrtype, gather_base);
1923 if (!is_gimple_min_invariant (ptr))
1924 {
1925 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1926 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1927 gcc_assert (!new_bb);
1928 }
1929
1930 scale = build_int_cst (scaletype, gather_scale);
1931
1932 prev_stmt_info = NULL;
1933 for (j = 0; j < ncopies; ++j)
1934 {
1935 if (modifier == WIDEN && (j & 1))
1936 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1937 perm_mask, stmt, gsi);
1938 else if (j == 0)
1939 op = vec_oprnd0
1940 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1941 else
1942 op = vec_oprnd0
1943 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1944
1945 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1946 {
1947 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1948 == TYPE_VECTOR_SUBPARTS (idxtype));
1949 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1950 var = make_ssa_name (var, NULL);
1951 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1952 new_stmt
1953 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1954 op, NULL_TREE);
1955 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1956 op = var;
1957 }
1958
acdcd61b
JJ
1959 if (mask_perm_mask && (j & 1))
1960 mask_op = permute_vec_elements (mask_op, mask_op,
1961 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1962 else
1963 {
acdcd61b
JJ
1964 if (j == 0)
1965 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1966 else
1967 {
1968 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1969 &def_stmt, &def, &dt);
1970 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1971 }
5ce9450f 1972
acdcd61b
JJ
1973 mask_op = vec_mask;
1974 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1975 {
1976 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1977 == TYPE_VECTOR_SUBPARTS (masktype));
1978 var = vect_get_new_vect_var (masktype, vect_simple_var,
1979 NULL);
1980 var = make_ssa_name (var, NULL);
1981 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1982 new_stmt
1983 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1984 mask_op, NULL_TREE);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 mask_op = var;
1987 }
5ce9450f
JJ
1988 }
1989
1990 new_stmt
1991 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1992 scale);
1993
1994 if (!useless_type_conversion_p (vectype, rettype))
1995 {
1996 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1997 == TYPE_VECTOR_SUBPARTS (rettype));
1998 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
1999 op = make_ssa_name (var, new_stmt);
2000 gimple_call_set_lhs (new_stmt, op);
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2002 var = make_ssa_name (vec_dest, NULL);
2003 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2004 new_stmt
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2006 NULL_TREE);
2007 }
2008 else
2009 {
2010 var = make_ssa_name (vec_dest, new_stmt);
2011 gimple_call_set_lhs (new_stmt, var);
2012 }
2013
2014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015
2016 if (modifier == NARROW)
2017 {
2018 if ((j & 1) == 0)
2019 {
2020 prev_res = var;
2021 continue;
2022 }
2023 var = permute_vec_elements (prev_res, var,
2024 perm_mask, stmt, gsi);
2025 new_stmt = SSA_NAME_DEF_STMT (var);
2026 }
2027
2028 if (prev_stmt_info == NULL)
2029 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2030 else
2031 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2032 prev_stmt_info = vinfo_for_stmt (new_stmt);
2033 }
2034 return true;
2035 }
2036 else if (is_store)
2037 {
2038 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2039 prev_stmt_info = NULL;
2040 for (i = 0; i < ncopies; i++)
2041 {
2042 unsigned align, misalign;
2043
2044 if (i == 0)
2045 {
2046 tree rhs = gimple_call_arg (stmt, 3);
2047 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2048 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2049 /* We should have catched mismatched types earlier. */
2050 gcc_assert (useless_type_conversion_p (vectype,
2051 TREE_TYPE (vec_rhs)));
2052 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2053 NULL_TREE, &dummy, gsi,
2054 &ptr_incr, false, &inv_p);
2055 gcc_assert (!inv_p);
2056 }
2057 else
2058 {
2059 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2060 &def, &dt);
2061 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2062 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2063 &def, &dt);
2064 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2066 TYPE_SIZE_UNIT (vectype));
2067 }
2068
2069 align = TYPE_ALIGN_UNIT (vectype);
2070 if (aligned_access_p (dr))
2071 misalign = 0;
2072 else if (DR_MISALIGNMENT (dr) == -1)
2073 {
2074 align = TYPE_ALIGN_UNIT (elem_type);
2075 misalign = 0;
2076 }
2077 else
2078 misalign = DR_MISALIGNMENT (dr);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2080 misalign);
2081 new_stmt
2082 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2083 gimple_call_arg (stmt, 1),
2084 vec_mask, vec_rhs);
2085 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2086 if (i == 0)
2087 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2088 else
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2090 prev_stmt_info = vinfo_for_stmt (new_stmt);
2091 }
2092 }
2093 else
2094 {
2095 tree vec_mask = NULL_TREE;
2096 prev_stmt_info = NULL;
2097 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2098 for (i = 0; i < ncopies; i++)
2099 {
2100 unsigned align, misalign;
2101
2102 if (i == 0)
2103 {
2104 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2105 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2106 NULL_TREE, &dummy, gsi,
2107 &ptr_incr, false, &inv_p);
2108 gcc_assert (!inv_p);
2109 }
2110 else
2111 {
2112 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2113 &def, &dt);
2114 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2115 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2116 TYPE_SIZE_UNIT (vectype));
2117 }
2118
2119 align = TYPE_ALIGN_UNIT (vectype);
2120 if (aligned_access_p (dr))
2121 misalign = 0;
2122 else if (DR_MISALIGNMENT (dr) == -1)
2123 {
2124 align = TYPE_ALIGN_UNIT (elem_type);
2125 misalign = 0;
2126 }
2127 else
2128 misalign = DR_MISALIGNMENT (dr);
2129 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2130 misalign);
2131 new_stmt
2132 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2133 gimple_call_arg (stmt, 1),
2134 vec_mask);
2135 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2137 if (i == 0)
2138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2139 else
2140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2141 prev_stmt_info = vinfo_for_stmt (new_stmt);
2142 }
2143 }
2144
2145 return true;
2146}
2147
2148
ebfd146a
IR
2149/* Function vectorizable_call.
2150
b8698a0f
L
2151 Check if STMT performs a function call that can be vectorized.
2152 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2153 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2154 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2155
2156static bool
190c2236
JJ
2157vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2158 slp_tree slp_node)
ebfd146a
IR
2159{
2160 tree vec_dest;
2161 tree scalar_dest;
2162 tree op, type;
2163 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2165 tree vectype_out, vectype_in;
2166 int nunits_in;
2167 int nunits_out;
2168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2169 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2170 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2171 gimple def_stmt;
0502fb85
UB
2172 enum vect_def_type dt[3]
2173 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2174 gimple new_stmt = NULL;
ebfd146a 2175 int ncopies, j;
6e1aa848 2176 vec<tree> vargs = vNULL;
ebfd146a
IR
2177 enum { NARROW, NONE, WIDEN } modifier;
2178 size_t i, nargs;
9d5e7640 2179 tree lhs;
ebfd146a 2180
190c2236 2181 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2182 return false;
2183
8644a673 2184 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2185 return false;
2186
ebfd146a
IR
2187 /* Is STMT a vectorizable call? */
2188 if (!is_gimple_call (stmt))
2189 return false;
2190
5ce9450f
JJ
2191 if (gimple_call_internal_p (stmt)
2192 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2193 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2194 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2195 slp_node);
2196
0136f8f0
AH
2197 if (gimple_call_lhs (stmt) == NULL_TREE
2198 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2199 return false;
2200
0136f8f0 2201 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2202
b690cc0f
RG
2203 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2204
ebfd146a
IR
2205 /* Process function arguments. */
2206 rhs_type = NULL_TREE;
b690cc0f 2207 vectype_in = NULL_TREE;
ebfd146a
IR
2208 nargs = gimple_call_num_args (stmt);
2209
1b1562a5
MM
2210 /* Bail out if the function has more than three arguments, we do not have
2211 interesting builtin functions to vectorize with more than two arguments
2212 except for fma. No arguments is also not good. */
2213 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2214 return false;
2215
74bf76ed
JJ
2216 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2217 if (gimple_call_internal_p (stmt)
2218 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2219 {
2220 nargs = 0;
2221 rhs_type = unsigned_type_node;
2222 }
2223
ebfd146a
IR
2224 for (i = 0; i < nargs; i++)
2225 {
b690cc0f
RG
2226 tree opvectype;
2227
ebfd146a
IR
2228 op = gimple_call_arg (stmt, i);
2229
2230 /* We can only handle calls with arguments of the same type. */
2231 if (rhs_type
8533c9d8 2232 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2233 {
73fbfcad 2234 if (dump_enabled_p ())
78c60e3d 2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2236 "argument types differ.\n");
ebfd146a
IR
2237 return false;
2238 }
b690cc0f
RG
2239 if (!rhs_type)
2240 rhs_type = TREE_TYPE (op);
ebfd146a 2241
24ee1384 2242 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2243 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2244 {
73fbfcad 2245 if (dump_enabled_p ())
78c60e3d 2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2247 "use not simple.\n");
ebfd146a
IR
2248 return false;
2249 }
ebfd146a 2250
b690cc0f
RG
2251 if (!vectype_in)
2252 vectype_in = opvectype;
2253 else if (opvectype
2254 && opvectype != vectype_in)
2255 {
73fbfcad 2256 if (dump_enabled_p ())
78c60e3d 2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2258 "argument vector types differ.\n");
b690cc0f
RG
2259 return false;
2260 }
2261 }
2262 /* If all arguments are external or constant defs use a vector type with
2263 the same size as the output vector type. */
ebfd146a 2264 if (!vectype_in)
b690cc0f 2265 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2266 if (vec_stmt)
2267 gcc_assert (vectype_in);
2268 if (!vectype_in)
2269 {
73fbfcad 2270 if (dump_enabled_p ())
7d8930a0 2271 {
78c60e3d
SS
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "no vectype for scalar type ");
2274 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2275 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2276 }
2277
2278 return false;
2279 }
ebfd146a
IR
2280
2281 /* FORNOW */
b690cc0f
RG
2282 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2283 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2284 if (nunits_in == nunits_out / 2)
2285 modifier = NARROW;
2286 else if (nunits_out == nunits_in)
2287 modifier = NONE;
2288 else if (nunits_out == nunits_in / 2)
2289 modifier = WIDEN;
2290 else
2291 return false;
2292
2293 /* For now, we only vectorize functions if a target specific builtin
2294 is available. TODO -- in some cases, it might be profitable to
2295 insert the calls for pieces of the vector, in order to be able
2296 to vectorize other operations in the loop. */
2297 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2298 if (fndecl == NULL_TREE)
2299 {
74bf76ed
JJ
2300 if (gimple_call_internal_p (stmt)
2301 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2302 && !slp_node
2303 && loop_vinfo
2304 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2305 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2306 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2307 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2308 {
2309 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2310 { 0, 1, 2, ... vf - 1 } vector. */
2311 gcc_assert (nargs == 0);
2312 }
2313 else
2314 {
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2317 "function is not vectorizable.\n");
74bf76ed
JJ
2318 return false;
2319 }
ebfd146a
IR
2320 }
2321
5006671f 2322 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2323
190c2236
JJ
2324 if (slp_node || PURE_SLP_STMT (stmt_info))
2325 ncopies = 1;
2326 else if (modifier == NARROW)
ebfd146a
IR
2327 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2328 else
2329 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2330
2331 /* Sanity check: make sure that at least one copy of the vectorized stmt
2332 needs to be generated. */
2333 gcc_assert (ncopies >= 1);
2334
2335 if (!vec_stmt) /* transformation not required. */
2336 {
2337 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2338 if (dump_enabled_p ())
e645e942
TJ
2339 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2340 "\n");
c3e7ee41 2341 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2342 return true;
2343 }
2344
2345 /** Transform. **/
2346
73fbfcad 2347 if (dump_enabled_p ())
e645e942 2348 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2349
2350 /* Handle def. */
2351 scalar_dest = gimple_call_lhs (stmt);
2352 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2353
2354 prev_stmt_info = NULL;
2355 switch (modifier)
2356 {
2357 case NONE:
2358 for (j = 0; j < ncopies; ++j)
2359 {
2360 /* Build argument list for the vectorized call. */
2361 if (j == 0)
9771b263 2362 vargs.create (nargs);
ebfd146a 2363 else
9771b263 2364 vargs.truncate (0);
ebfd146a 2365
190c2236
JJ
2366 if (slp_node)
2367 {
ef062b13 2368 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2369 vec<tree> vec_oprnds0;
190c2236
JJ
2370
2371 for (i = 0; i < nargs; i++)
9771b263 2372 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2373 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2374 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2375
2376 /* Arguments are ready. Create the new vector stmt. */
9771b263 2377 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2378 {
2379 size_t k;
2380 for (k = 0; k < nargs; k++)
2381 {
37b5ec8f 2382 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2383 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2384 }
2385 new_stmt = gimple_build_call_vec (fndecl, vargs);
2386 new_temp = make_ssa_name (vec_dest, new_stmt);
2387 gimple_call_set_lhs (new_stmt, new_temp);
2388 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2389 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2390 }
2391
2392 for (i = 0; i < nargs; i++)
2393 {
37b5ec8f 2394 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2395 vec_oprndsi.release ();
190c2236 2396 }
190c2236
JJ
2397 continue;
2398 }
2399
ebfd146a
IR
2400 for (i = 0; i < nargs; i++)
2401 {
2402 op = gimple_call_arg (stmt, i);
2403 if (j == 0)
2404 vec_oprnd0
2405 = vect_get_vec_def_for_operand (op, stmt, NULL);
2406 else
63827fb8
IR
2407 {
2408 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2409 vec_oprnd0
2410 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2411 }
ebfd146a 2412
9771b263 2413 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2414 }
2415
74bf76ed
JJ
2416 if (gimple_call_internal_p (stmt)
2417 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2418 {
2419 tree *v = XALLOCAVEC (tree, nunits_out);
2420 int k;
2421 for (k = 0; k < nunits_out; ++k)
2422 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2423 tree cst = build_vector (vectype_out, v);
2424 tree new_var
2425 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2426 gimple init_stmt = gimple_build_assign (new_var, cst);
2427 new_temp = make_ssa_name (new_var, init_stmt);
2428 gimple_assign_set_lhs (init_stmt, new_temp);
2429 vect_init_vector_1 (stmt, init_stmt, NULL);
2430 new_temp = make_ssa_name (vec_dest, NULL);
2431 new_stmt = gimple_build_assign (new_temp,
2432 gimple_assign_lhs (init_stmt));
2433 }
2434 else
2435 {
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2439 }
ebfd146a
IR
2440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2441
2442 if (j == 0)
2443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2444 else
2445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2446
2447 prev_stmt_info = vinfo_for_stmt (new_stmt);
2448 }
2449
2450 break;
2451
2452 case NARROW:
2453 for (j = 0; j < ncopies; ++j)
2454 {
2455 /* Build argument list for the vectorized call. */
2456 if (j == 0)
9771b263 2457 vargs.create (nargs * 2);
ebfd146a 2458 else
9771b263 2459 vargs.truncate (0);
ebfd146a 2460
190c2236
JJ
2461 if (slp_node)
2462 {
ef062b13 2463 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2464 vec<tree> vec_oprnds0;
190c2236
JJ
2465
2466 for (i = 0; i < nargs; i++)
9771b263 2467 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2468 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2469 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2470
2471 /* Arguments are ready. Create the new vector stmt. */
9771b263 2472 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2473 {
2474 size_t k;
9771b263 2475 vargs.truncate (0);
190c2236
JJ
2476 for (k = 0; k < nargs; k++)
2477 {
37b5ec8f 2478 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2479 vargs.quick_push (vec_oprndsk[i]);
2480 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2481 }
2482 new_stmt = gimple_build_call_vec (fndecl, vargs);
2483 new_temp = make_ssa_name (vec_dest, new_stmt);
2484 gimple_call_set_lhs (new_stmt, new_temp);
2485 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2486 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2487 }
2488
2489 for (i = 0; i < nargs; i++)
2490 {
37b5ec8f 2491 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2492 vec_oprndsi.release ();
190c2236 2493 }
190c2236
JJ
2494 continue;
2495 }
2496
ebfd146a
IR
2497 for (i = 0; i < nargs; i++)
2498 {
2499 op = gimple_call_arg (stmt, i);
2500 if (j == 0)
2501 {
2502 vec_oprnd0
2503 = vect_get_vec_def_for_operand (op, stmt, NULL);
2504 vec_oprnd1
63827fb8 2505 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2506 }
2507 else
2508 {
336ecb65 2509 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2510 vec_oprnd0
63827fb8 2511 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2512 vec_oprnd1
63827fb8 2513 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2514 }
2515
9771b263
DN
2516 vargs.quick_push (vec_oprnd0);
2517 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2518 }
2519
2520 new_stmt = gimple_build_call_vec (fndecl, vargs);
2521 new_temp = make_ssa_name (vec_dest, new_stmt);
2522 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2524
2525 if (j == 0)
2526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2527 else
2528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2529
2530 prev_stmt_info = vinfo_for_stmt (new_stmt);
2531 }
2532
2533 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2534
2535 break;
2536
2537 case WIDEN:
2538 /* No current target implements this case. */
2539 return false;
2540 }
2541
9771b263 2542 vargs.release ();
ebfd146a 2543
ebfd146a
IR
2544 /* The call in STMT might prevent it from being removed in dce.
2545 We however cannot remove it here, due to the way the ssa name
2546 it defines is mapped to the new definition. So just replace
2547 rhs of the statement with something harmless. */
2548
dd34c087
JJ
2549 if (slp_node)
2550 return true;
2551
ebfd146a 2552 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2553 if (is_pattern_stmt_p (stmt_info))
2554 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2555 else
2556 lhs = gimple_call_lhs (stmt);
2557 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2558 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2559 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2560 STMT_VINFO_STMT (stmt_info) = new_stmt;
2561 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2562
2563 return true;
2564}
2565
2566
0136f8f0
AH
2567struct simd_call_arg_info
2568{
2569 tree vectype;
2570 tree op;
2571 enum vect_def_type dt;
2572 HOST_WIDE_INT linear_step;
2573 unsigned int align;
2574};
2575
2576/* Function vectorizable_simd_clone_call.
2577
2578 Check if STMT performs a function call that can be vectorized
2579 by calling a simd clone of the function.
2580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2583
2584static bool
2585vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2586 gimple *vec_stmt, slp_tree slp_node)
2587{
2588 tree vec_dest;
2589 tree scalar_dest;
2590 tree op, type;
2591 tree vec_oprnd0 = NULL_TREE;
2592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2593 tree vectype;
2594 unsigned int nunits;
2595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2597 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2598 tree fndecl, new_temp, def;
2599 gimple def_stmt;
2600 gimple new_stmt = NULL;
2601 int ncopies, j;
2602 vec<simd_call_arg_info> arginfo = vNULL;
2603 vec<tree> vargs = vNULL;
2604 size_t i, nargs;
2605 tree lhs, rtype, ratype;
2606 vec<constructor_elt, va_gc> *ret_ctor_elts;
2607
2608 /* Is STMT a vectorizable call? */
2609 if (!is_gimple_call (stmt))
2610 return false;
2611
2612 fndecl = gimple_call_fndecl (stmt);
2613 if (fndecl == NULL_TREE)
2614 return false;
2615
2616 struct cgraph_node *node = cgraph_get_node (fndecl);
2617 if (node == NULL || node->simd_clones == NULL)
2618 return false;
2619
2620 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2621 return false;
2622
2623 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2624 return false;
2625
2626 if (gimple_call_lhs (stmt)
2627 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2628 return false;
2629
2630 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2631
2632 vectype = STMT_VINFO_VECTYPE (stmt_info);
2633
2634 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2635 return false;
2636
2637 /* FORNOW */
2638 if (slp_node || PURE_SLP_STMT (stmt_info))
2639 return false;
2640
2641 /* Process function arguments. */
2642 nargs = gimple_call_num_args (stmt);
2643
2644 /* Bail out if the function has zero arguments. */
2645 if (nargs == 0)
2646 return false;
2647
2648 arginfo.create (nargs);
2649
2650 for (i = 0; i < nargs; i++)
2651 {
2652 simd_call_arg_info thisarginfo;
2653 affine_iv iv;
2654
2655 thisarginfo.linear_step = 0;
2656 thisarginfo.align = 0;
2657 thisarginfo.op = NULL_TREE;
2658
2659 op = gimple_call_arg (stmt, i);
2660 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2661 &def_stmt, &def, &thisarginfo.dt,
2662 &thisarginfo.vectype)
2663 || thisarginfo.dt == vect_uninitialized_def)
2664 {
2665 if (dump_enabled_p ())
2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2667 "use not simple.\n");
2668 arginfo.release ();
2669 return false;
2670 }
2671
2672 if (thisarginfo.dt == vect_constant_def
2673 || thisarginfo.dt == vect_external_def)
2674 gcc_assert (thisarginfo.vectype == NULL_TREE);
2675 else
2676 gcc_assert (thisarginfo.vectype != NULL_TREE);
2677
2678 if (thisarginfo.dt != vect_constant_def
2679 && thisarginfo.dt != vect_external_def
2680 && loop_vinfo
2681 && TREE_CODE (op) == SSA_NAME
2682 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2683 && tree_fits_shwi_p (iv.step))
2684 {
2685 thisarginfo.linear_step = tree_to_shwi (iv.step);
2686 thisarginfo.op = iv.base;
2687 }
2688 else if ((thisarginfo.dt == vect_constant_def
2689 || thisarginfo.dt == vect_external_def)
2690 && POINTER_TYPE_P (TREE_TYPE (op)))
2691 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2692
2693 arginfo.quick_push (thisarginfo);
2694 }
2695
2696 unsigned int badness = 0;
2697 struct cgraph_node *bestn = NULL;
2698 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2699 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2700 else
2701 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2702 n = n->simdclone->next_clone)
2703 {
2704 unsigned int this_badness = 0;
2705 if (n->simdclone->simdlen
2706 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2707 || n->simdclone->nargs != nargs)
2708 continue;
2709 if (n->simdclone->simdlen
2710 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2711 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2712 - exact_log2 (n->simdclone->simdlen)) * 1024;
2713 if (n->simdclone->inbranch)
2714 this_badness += 2048;
2715 int target_badness = targetm.simd_clone.usable (n);
2716 if (target_badness < 0)
2717 continue;
2718 this_badness += target_badness * 512;
2719 /* FORNOW: Have to add code to add the mask argument. */
2720 if (n->simdclone->inbranch)
2721 continue;
2722 for (i = 0; i < nargs; i++)
2723 {
2724 switch (n->simdclone->args[i].arg_type)
2725 {
2726 case SIMD_CLONE_ARG_TYPE_VECTOR:
2727 if (!useless_type_conversion_p
2728 (n->simdclone->args[i].orig_type,
2729 TREE_TYPE (gimple_call_arg (stmt, i))))
2730 i = -1;
2731 else if (arginfo[i].dt == vect_constant_def
2732 || arginfo[i].dt == vect_external_def
2733 || arginfo[i].linear_step)
2734 this_badness += 64;
2735 break;
2736 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2737 if (arginfo[i].dt != vect_constant_def
2738 && arginfo[i].dt != vect_external_def)
2739 i = -1;
2740 break;
2741 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2742 if (arginfo[i].dt == vect_constant_def
2743 || arginfo[i].dt == vect_external_def
2744 || (arginfo[i].linear_step
2745 != n->simdclone->args[i].linear_step))
2746 i = -1;
2747 break;
2748 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2749 /* FORNOW */
2750 i = -1;
2751 break;
2752 case SIMD_CLONE_ARG_TYPE_MASK:
2753 gcc_unreachable ();
2754 }
2755 if (i == (size_t) -1)
2756 break;
2757 if (n->simdclone->args[i].alignment > arginfo[i].align)
2758 {
2759 i = -1;
2760 break;
2761 }
2762 if (arginfo[i].align)
2763 this_badness += (exact_log2 (arginfo[i].align)
2764 - exact_log2 (n->simdclone->args[i].alignment));
2765 }
2766 if (i == (size_t) -1)
2767 continue;
2768 if (bestn == NULL || this_badness < badness)
2769 {
2770 bestn = n;
2771 badness = this_badness;
2772 }
2773 }
2774
2775 if (bestn == NULL)
2776 {
2777 arginfo.release ();
2778 return false;
2779 }
2780
2781 for (i = 0; i < nargs; i++)
2782 if ((arginfo[i].dt == vect_constant_def
2783 || arginfo[i].dt == vect_external_def)
2784 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2785 {
2786 arginfo[i].vectype
2787 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2788 i)));
2789 if (arginfo[i].vectype == NULL
2790 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2791 > bestn->simdclone->simdlen))
2792 {
2793 arginfo.release ();
2794 return false;
2795 }
2796 }
2797
2798 fndecl = bestn->decl;
2799 nunits = bestn->simdclone->simdlen;
2800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2801
2802 /* If the function isn't const, only allow it in simd loops where user
2803 has asserted that at least nunits consecutive iterations can be
2804 performed using SIMD instructions. */
2805 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2806 && gimple_vuse (stmt))
2807 {
2808 arginfo.release ();
2809 return false;
2810 }
2811
2812 /* Sanity check: make sure that at least one copy of the vectorized stmt
2813 needs to be generated. */
2814 gcc_assert (ncopies >= 1);
2815
2816 if (!vec_stmt) /* transformation not required. */
2817 {
2818 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2819 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE, vect_location,
2822 "=== vectorizable_simd_clone_call ===\n");
2823/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2824 arginfo.release ();
2825 return true;
2826 }
2827
2828 /** Transform. **/
2829
2830 if (dump_enabled_p ())
2831 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2832
2833 /* Handle def. */
2834 scalar_dest = gimple_call_lhs (stmt);
2835 vec_dest = NULL_TREE;
2836 rtype = NULL_TREE;
2837 ratype = NULL_TREE;
2838 if (scalar_dest)
2839 {
2840 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2841 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2842 if (TREE_CODE (rtype) == ARRAY_TYPE)
2843 {
2844 ratype = rtype;
2845 rtype = TREE_TYPE (ratype);
2846 }
2847 }
2848
2849 prev_stmt_info = NULL;
2850 for (j = 0; j < ncopies; ++j)
2851 {
2852 /* Build argument list for the vectorized call. */
2853 if (j == 0)
2854 vargs.create (nargs);
2855 else
2856 vargs.truncate (0);
2857
2858 for (i = 0; i < nargs; i++)
2859 {
2860 unsigned int k, l, m, o;
2861 tree atype;
2862 op = gimple_call_arg (stmt, i);
2863 switch (bestn->simdclone->args[i].arg_type)
2864 {
2865 case SIMD_CLONE_ARG_TYPE_VECTOR:
2866 atype = bestn->simdclone->args[i].vector_type;
2867 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2868 for (m = j * o; m < (j + 1) * o; m++)
2869 {
2870 if (TYPE_VECTOR_SUBPARTS (atype)
2871 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2872 {
2873 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2874 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2875 / TYPE_VECTOR_SUBPARTS (atype));
2876 gcc_assert ((k & (k - 1)) == 0);
2877 if (m == 0)
2878 vec_oprnd0
2879 = vect_get_vec_def_for_operand (op, stmt, NULL);
2880 else
2881 {
2882 vec_oprnd0 = arginfo[i].op;
2883 if ((m & (k - 1)) == 0)
2884 vec_oprnd0
2885 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2886 vec_oprnd0);
2887 }
2888 arginfo[i].op = vec_oprnd0;
2889 vec_oprnd0
2890 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2891 size_int (prec),
2892 bitsize_int ((m & (k - 1)) * prec));
2893 new_stmt
2894 = gimple_build_assign (make_ssa_name (atype, NULL),
2895 vec_oprnd0);
2896 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2897 vargs.safe_push (gimple_assign_lhs (new_stmt));
2898 }
2899 else
2900 {
2901 k = (TYPE_VECTOR_SUBPARTS (atype)
2902 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2903 gcc_assert ((k & (k - 1)) == 0);
2904 vec<constructor_elt, va_gc> *ctor_elts;
2905 if (k != 1)
2906 vec_alloc (ctor_elts, k);
2907 else
2908 ctor_elts = NULL;
2909 for (l = 0; l < k; l++)
2910 {
2911 if (m == 0 && l == 0)
2912 vec_oprnd0
2913 = vect_get_vec_def_for_operand (op, stmt, NULL);
2914 else
2915 vec_oprnd0
2916 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2917 arginfo[i].op);
2918 arginfo[i].op = vec_oprnd0;
2919 if (k == 1)
2920 break;
2921 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2922 vec_oprnd0);
2923 }
2924 if (k == 1)
2925 vargs.safe_push (vec_oprnd0);
2926 else
2927 {
2928 vec_oprnd0 = build_constructor (atype, ctor_elts);
2929 new_stmt
2930 = gimple_build_assign (make_ssa_name (atype, NULL),
2931 vec_oprnd0);
2932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933 vargs.safe_push (gimple_assign_lhs (new_stmt));
2934 }
2935 }
2936 }
2937 break;
2938 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2939 vargs.safe_push (op);
2940 break;
2941 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2942 if (j == 0)
2943 {
2944 gimple_seq stmts;
2945 arginfo[i].op
2946 = force_gimple_operand (arginfo[i].op, &stmts, true,
2947 NULL_TREE);
2948 if (stmts != NULL)
2949 {
2950 basic_block new_bb;
2951 edge pe = loop_preheader_edge (loop);
2952 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2953 gcc_assert (!new_bb);
2954 }
2955 tree phi_res = copy_ssa_name (op, NULL);
2956 gimple new_phi = create_phi_node (phi_res, loop->header);
2957 set_vinfo_for_stmt (new_phi,
2958 new_stmt_vec_info (new_phi, loop_vinfo,
2959 NULL));
2960 add_phi_arg (new_phi, arginfo[i].op,
2961 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2962 enum tree_code code
2963 = POINTER_TYPE_P (TREE_TYPE (op))
2964 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2965 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2966 ? sizetype : TREE_TYPE (op);
2967 double_int cst
2968 = double_int::from_shwi
2969 (bestn->simdclone->args[i].linear_step);
2970 cst *= double_int::from_uhwi (ncopies * nunits);
2971 tree tcst = double_int_to_tree (type, cst);
2972 tree phi_arg = copy_ssa_name (op, NULL);
2973 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
2974 phi_res, tcst);
2975 gimple_stmt_iterator si = gsi_after_labels (loop->header);
2976 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
2977 set_vinfo_for_stmt (new_stmt,
2978 new_stmt_vec_info (new_stmt, loop_vinfo,
2979 NULL));
2980 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
2981 UNKNOWN_LOCATION);
2982 arginfo[i].op = phi_res;
2983 vargs.safe_push (phi_res);
2984 }
2985 else
2986 {
2987 enum tree_code code
2988 = POINTER_TYPE_P (TREE_TYPE (op))
2989 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2990 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2991 ? sizetype : TREE_TYPE (op);
2992 double_int cst
2993 = double_int::from_shwi
2994 (bestn->simdclone->args[i].linear_step);
2995 cst *= double_int::from_uhwi (j * nunits);
2996 tree tcst = double_int_to_tree (type, cst);
2997 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
2998 new_stmt
2999 = gimple_build_assign_with_ops (code, new_temp,
3000 arginfo[i].op, tcst);
3001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3002 vargs.safe_push (new_temp);
3003 }
3004 break;
3005 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3006 default:
3007 gcc_unreachable ();
3008 }
3009 }
3010
3011 new_stmt = gimple_build_call_vec (fndecl, vargs);
3012 if (vec_dest)
3013 {
3014 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3015 if (ratype)
3016 new_temp = create_tmp_var (ratype, NULL);
3017 else if (TYPE_VECTOR_SUBPARTS (vectype)
3018 == TYPE_VECTOR_SUBPARTS (rtype))
3019 new_temp = make_ssa_name (vec_dest, new_stmt);
3020 else
3021 new_temp = make_ssa_name (rtype, new_stmt);
3022 gimple_call_set_lhs (new_stmt, new_temp);
3023 }
3024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3025
3026 if (vec_dest)
3027 {
3028 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3029 {
3030 unsigned int k, l;
3031 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3032 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3033 gcc_assert ((k & (k - 1)) == 0);
3034 for (l = 0; l < k; l++)
3035 {
3036 tree t;
3037 if (ratype)
3038 {
3039 t = build_fold_addr_expr (new_temp);
3040 t = build2 (MEM_REF, vectype, t,
3041 build_int_cst (TREE_TYPE (t),
3042 l * prec / BITS_PER_UNIT));
3043 }
3044 else
3045 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3046 size_int (prec), bitsize_int (l * prec));
3047 new_stmt
3048 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3049 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3050 if (j == 0 && l == 0)
3051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3052 else
3053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3054
3055 prev_stmt_info = vinfo_for_stmt (new_stmt);
3056 }
3057
3058 if (ratype)
3059 {
3060 tree clobber = build_constructor (ratype, NULL);
3061 TREE_THIS_VOLATILE (clobber) = 1;
3062 new_stmt = gimple_build_assign (new_temp, clobber);
3063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3064 }
3065 continue;
3066 }
3067 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3068 {
3069 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3070 / TYPE_VECTOR_SUBPARTS (rtype));
3071 gcc_assert ((k & (k - 1)) == 0);
3072 if ((j & (k - 1)) == 0)
3073 vec_alloc (ret_ctor_elts, k);
3074 if (ratype)
3075 {
3076 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3077 for (m = 0; m < o; m++)
3078 {
3079 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3080 size_int (m), NULL_TREE, NULL_TREE);
3081 new_stmt
3082 = gimple_build_assign (make_ssa_name (rtype, NULL),
3083 tem);
3084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3085 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3086 gimple_assign_lhs (new_stmt));
3087 }
3088 tree clobber = build_constructor (ratype, NULL);
3089 TREE_THIS_VOLATILE (clobber) = 1;
3090 new_stmt = gimple_build_assign (new_temp, clobber);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3092 }
3093 else
3094 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3095 if ((j & (k - 1)) != k - 1)
3096 continue;
3097 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3098 new_stmt
3099 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3100 vec_oprnd0);
3101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3102
3103 if ((unsigned) j == k - 1)
3104 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3105 else
3106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3107
3108 prev_stmt_info = vinfo_for_stmt (new_stmt);
3109 continue;
3110 }
3111 else if (ratype)
3112 {
3113 tree t = build_fold_addr_expr (new_temp);
3114 t = build2 (MEM_REF, vectype, t,
3115 build_int_cst (TREE_TYPE (t), 0));
3116 new_stmt
3117 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3119 tree clobber = build_constructor (ratype, NULL);
3120 TREE_THIS_VOLATILE (clobber) = 1;
3121 vect_finish_stmt_generation (stmt,
3122 gimple_build_assign (new_temp,
3123 clobber), gsi);
3124 }
3125 }
3126
3127 if (j == 0)
3128 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3129 else
3130 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3131
3132 prev_stmt_info = vinfo_for_stmt (new_stmt);
3133 }
3134
3135 vargs.release ();
3136
3137 /* The call in STMT might prevent it from being removed in dce.
3138 We however cannot remove it here, due to the way the ssa name
3139 it defines is mapped to the new definition. So just replace
3140 rhs of the statement with something harmless. */
3141
3142 if (slp_node)
3143 return true;
3144
3145 if (scalar_dest)
3146 {
3147 type = TREE_TYPE (scalar_dest);
3148 if (is_pattern_stmt_p (stmt_info))
3149 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3150 else
3151 lhs = gimple_call_lhs (stmt);
3152 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3153 }
3154 else
3155 new_stmt = gimple_build_nop ();
3156 set_vinfo_for_stmt (new_stmt, stmt_info);
3157 set_vinfo_for_stmt (stmt, NULL);
3158 STMT_VINFO_STMT (stmt_info) = new_stmt;
3159 gsi_replace (gsi, new_stmt, false);
3160 unlink_stmt_vdef (stmt);
3161
3162 return true;
3163}
3164
3165
ebfd146a
IR
3166/* Function vect_gen_widened_results_half
3167
3168 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3169 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3170 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3171 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3172 needs to be created (DECL is a function-decl of a target-builtin).
3173 STMT is the original scalar stmt that we are vectorizing. */
3174
3175static gimple
3176vect_gen_widened_results_half (enum tree_code code,
3177 tree decl,
3178 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3179 tree vec_dest, gimple_stmt_iterator *gsi,
3180 gimple stmt)
b8698a0f 3181{
ebfd146a 3182 gimple new_stmt;
b8698a0f
L
3183 tree new_temp;
3184
3185 /* Generate half of the widened result: */
3186 if (code == CALL_EXPR)
3187 {
3188 /* Target specific support */
ebfd146a
IR
3189 if (op_type == binary_op)
3190 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3191 else
3192 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3193 new_temp = make_ssa_name (vec_dest, new_stmt);
3194 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3195 }
3196 else
ebfd146a 3197 {
b8698a0f
L
3198 /* Generic support */
3199 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3200 if (op_type != binary_op)
3201 vec_oprnd1 = NULL;
3202 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3203 vec_oprnd1);
3204 new_temp = make_ssa_name (vec_dest, new_stmt);
3205 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3206 }
ebfd146a
IR
3207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3208
ebfd146a
IR
3209 return new_stmt;
3210}
3211
4a00c761
JJ
3212
3213/* Get vectorized definitions for loop-based vectorization. For the first
3214 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3215 scalar operand), and for the rest we get a copy with
3216 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3217 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3218 The vectors are collected into VEC_OPRNDS. */
3219
3220static void
3221vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3222 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3223{
3224 tree vec_oprnd;
3225
3226 /* Get first vector operand. */
3227 /* All the vector operands except the very first one (that is scalar oprnd)
3228 are stmt copies. */
3229 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3230 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3231 else
3232 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3233
9771b263 3234 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3235
3236 /* Get second vector operand. */
3237 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3238 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3239
3240 *oprnd = vec_oprnd;
3241
3242 /* For conversion in multiple steps, continue to get operands
3243 recursively. */
3244 if (multi_step_cvt)
3245 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3246}
3247
3248
3249/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3250 For multi-step conversions store the resulting vectors and call the function
3251 recursively. */
3252
3253static void
9771b263 3254vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3255 int multi_step_cvt, gimple stmt,
9771b263 3256 vec<tree> vec_dsts,
4a00c761
JJ
3257 gimple_stmt_iterator *gsi,
3258 slp_tree slp_node, enum tree_code code,
3259 stmt_vec_info *prev_stmt_info)
3260{
3261 unsigned int i;
3262 tree vop0, vop1, new_tmp, vec_dest;
3263 gimple new_stmt;
3264 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3265
9771b263 3266 vec_dest = vec_dsts.pop ();
4a00c761 3267
9771b263 3268 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3269 {
3270 /* Create demotion operation. */
9771b263
DN
3271 vop0 = (*vec_oprnds)[i];
3272 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
3273 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3274 new_tmp = make_ssa_name (vec_dest, new_stmt);
3275 gimple_assign_set_lhs (new_stmt, new_tmp);
3276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3277
3278 if (multi_step_cvt)
3279 /* Store the resulting vector for next recursive call. */
9771b263 3280 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3281 else
3282 {
3283 /* This is the last step of the conversion sequence. Store the
3284 vectors in SLP_NODE or in vector info of the scalar statement
3285 (or in STMT_VINFO_RELATED_STMT chain). */
3286 if (slp_node)
9771b263 3287 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3288 else
3289 {
3290 if (!*prev_stmt_info)
3291 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3292 else
3293 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3294
3295 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3296 }
3297 }
3298 }
3299
3300 /* For multi-step demotion operations we first generate demotion operations
3301 from the source type to the intermediate types, and then combine the
3302 results (stored in VEC_OPRNDS) in demotion operation to the destination
3303 type. */
3304 if (multi_step_cvt)
3305 {
3306 /* At each level of recursion we have half of the operands we had at the
3307 previous level. */
9771b263 3308 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3309 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3310 stmt, vec_dsts, gsi, slp_node,
3311 VEC_PACK_TRUNC_EXPR,
3312 prev_stmt_info);
3313 }
3314
9771b263 3315 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3316}
3317
3318
3319/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3320 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3321 the resulting vectors and call the function recursively. */
3322
3323static void
9771b263
DN
3324vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3325 vec<tree> *vec_oprnds1,
4a00c761
JJ
3326 gimple stmt, tree vec_dest,
3327 gimple_stmt_iterator *gsi,
3328 enum tree_code code1,
3329 enum tree_code code2, tree decl1,
3330 tree decl2, int op_type)
3331{
3332 int i;
3333 tree vop0, vop1, new_tmp1, new_tmp2;
3334 gimple new_stmt1, new_stmt2;
6e1aa848 3335 vec<tree> vec_tmp = vNULL;
4a00c761 3336
9771b263
DN
3337 vec_tmp.create (vec_oprnds0->length () * 2);
3338 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3339 {
3340 if (op_type == binary_op)
9771b263 3341 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3342 else
3343 vop1 = NULL_TREE;
3344
3345 /* Generate the two halves of promotion operation. */
3346 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3347 op_type, vec_dest, gsi, stmt);
3348 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3349 op_type, vec_dest, gsi, stmt);
3350 if (is_gimple_call (new_stmt1))
3351 {
3352 new_tmp1 = gimple_call_lhs (new_stmt1);
3353 new_tmp2 = gimple_call_lhs (new_stmt2);
3354 }
3355 else
3356 {
3357 new_tmp1 = gimple_assign_lhs (new_stmt1);
3358 new_tmp2 = gimple_assign_lhs (new_stmt2);
3359 }
3360
3361 /* Store the results for the next step. */
9771b263
DN
3362 vec_tmp.quick_push (new_tmp1);
3363 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3364 }
3365
689eaba3 3366 vec_oprnds0->release ();
4a00c761
JJ
3367 *vec_oprnds0 = vec_tmp;
3368}
3369
3370
b8698a0f
L
3371/* Check if STMT performs a conversion operation, that can be vectorized.
3372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3373 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3375
3376static bool
3377vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3378 gimple *vec_stmt, slp_tree slp_node)
3379{
3380 tree vec_dest;
3381 tree scalar_dest;
4a00c761 3382 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3383 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3385 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3386 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3387 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3388 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3389 tree new_temp;
3390 tree def;
3391 gimple def_stmt;
3392 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3393 gimple new_stmt = NULL;
3394 stmt_vec_info prev_stmt_info;
3395 int nunits_in;
3396 int nunits_out;
3397 tree vectype_out, vectype_in;
4a00c761
JJ
3398 int ncopies, i, j;
3399 tree lhs_type, rhs_type;
ebfd146a 3400 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3401 vec<tree> vec_oprnds0 = vNULL;
3402 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3403 tree vop0;
4a00c761
JJ
3404 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3405 int multi_step_cvt = 0;
6e1aa848
DN
3406 vec<tree> vec_dsts = vNULL;
3407 vec<tree> interm_types = vNULL;
4a00c761
JJ
3408 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3409 int op_type;
3410 enum machine_mode rhs_mode;
3411 unsigned short fltsz;
ebfd146a
IR
3412
3413 /* Is STMT a vectorizable conversion? */
3414
4a00c761 3415 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3416 return false;
3417
8644a673 3418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3419 return false;
3420
3421 if (!is_gimple_assign (stmt))
3422 return false;
3423
3424 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3425 return false;
3426
3427 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3428 if (!CONVERT_EXPR_CODE_P (code)
3429 && code != FIX_TRUNC_EXPR
3430 && code != FLOAT_EXPR
3431 && code != WIDEN_MULT_EXPR
3432 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3433 return false;
3434
4a00c761
JJ
3435 op_type = TREE_CODE_LENGTH (code);
3436
ebfd146a 3437 /* Check types of lhs and rhs. */
b690cc0f 3438 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3439 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3440 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3441
ebfd146a
IR
3442 op0 = gimple_assign_rhs1 (stmt);
3443 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3444
3445 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3446 && !((INTEGRAL_TYPE_P (lhs_type)
3447 && INTEGRAL_TYPE_P (rhs_type))
3448 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3449 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3450 return false;
3451
3452 if ((INTEGRAL_TYPE_P (lhs_type)
3453 && (TYPE_PRECISION (lhs_type)
3454 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3455 || (INTEGRAL_TYPE_P (rhs_type)
3456 && (TYPE_PRECISION (rhs_type)
3457 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3458 {
73fbfcad 3459 if (dump_enabled_p ())
78c60e3d 3460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3461 "type conversion to/from bit-precision unsupported."
3462 "\n");
4a00c761
JJ
3463 return false;
3464 }
3465
b690cc0f 3466 /* Check the operands of the operation. */
24ee1384 3467 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3468 &def_stmt, &def, &dt[0], &vectype_in))
3469 {
73fbfcad 3470 if (dump_enabled_p ())
78c60e3d 3471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3472 "use not simple.\n");
b690cc0f
RG
3473 return false;
3474 }
4a00c761
JJ
3475 if (op_type == binary_op)
3476 {
3477 bool ok;
3478
3479 op1 = gimple_assign_rhs2 (stmt);
3480 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3481 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3482 OP1. */
3483 if (CONSTANT_CLASS_P (op0))
f5709183 3484 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3485 &def_stmt, &def, &dt[1], &vectype_in);
3486 else
f5709183 3487 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3488 &def, &dt[1]);
4a00c761
JJ
3489
3490 if (!ok)
3491 {
73fbfcad 3492 if (dump_enabled_p ())
78c60e3d 3493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3494 "use not simple.\n");
4a00c761
JJ
3495 return false;
3496 }
3497 }
3498
b690cc0f
RG
3499 /* If op0 is an external or constant defs use a vector type of
3500 the same size as the output vector type. */
ebfd146a 3501 if (!vectype_in)
b690cc0f 3502 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3503 if (vec_stmt)
3504 gcc_assert (vectype_in);
3505 if (!vectype_in)
3506 {
73fbfcad 3507 if (dump_enabled_p ())
4a00c761 3508 {
78c60e3d
SS
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3510 "no vectype for scalar type ");
3511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3513 }
7d8930a0
IR
3514
3515 return false;
3516 }
ebfd146a 3517
b690cc0f
RG
3518 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3519 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3520 if (nunits_in < nunits_out)
ebfd146a
IR
3521 modifier = NARROW;
3522 else if (nunits_out == nunits_in)
3523 modifier = NONE;
ebfd146a 3524 else
4a00c761 3525 modifier = WIDEN;
ebfd146a 3526
ff802fa1
IR
3527 /* Multiple types in SLP are handled by creating the appropriate number of
3528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3529 case of SLP. */
437f4a00 3530 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3531 ncopies = 1;
4a00c761
JJ
3532 else if (modifier == NARROW)
3533 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3534 else
3535 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3536
ebfd146a
IR
3537 /* Sanity check: make sure that at least one copy of the vectorized stmt
3538 needs to be generated. */
3539 gcc_assert (ncopies >= 1);
3540
ebfd146a 3541 /* Supportable by target? */
4a00c761 3542 switch (modifier)
ebfd146a 3543 {
4a00c761
JJ
3544 case NONE:
3545 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3546 return false;
3547 if (supportable_convert_operation (code, vectype_out, vectype_in,
3548 &decl1, &code1))
3549 break;
3550 /* FALLTHRU */
3551 unsupported:
73fbfcad 3552 if (dump_enabled_p ())
78c60e3d 3553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3554 "conversion not supported by target.\n");
ebfd146a 3555 return false;
ebfd146a 3556
4a00c761
JJ
3557 case WIDEN:
3558 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3559 &code1, &code2, &multi_step_cvt,
3560 &interm_types))
4a00c761
JJ
3561 {
3562 /* Binary widening operation can only be supported directly by the
3563 architecture. */
3564 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3565 break;
3566 }
3567
3568 if (code != FLOAT_EXPR
3569 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3570 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3571 goto unsupported;
3572
3573 rhs_mode = TYPE_MODE (rhs_type);
3574 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3575 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3576 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3577 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3578 {
3579 cvt_type
3580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3581 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3582 if (cvt_type == NULL_TREE)
3583 goto unsupported;
3584
3585 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3586 {
3587 if (!supportable_convert_operation (code, vectype_out,
3588 cvt_type, &decl1, &codecvt1))
3589 goto unsupported;
3590 }
3591 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3592 cvt_type, &codecvt1,
3593 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3594 &interm_types))
3595 continue;
3596 else
3597 gcc_assert (multi_step_cvt == 0);
3598
3599 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3600 vectype_in, &code1, &code2,
3601 &multi_step_cvt, &interm_types))
4a00c761
JJ
3602 break;
3603 }
3604
3605 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3606 goto unsupported;
3607
3608 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3609 codecvt2 = ERROR_MARK;
3610 else
3611 {
3612 multi_step_cvt++;
9771b263 3613 interm_types.safe_push (cvt_type);
4a00c761
JJ
3614 cvt_type = NULL_TREE;
3615 }
3616 break;
3617
3618 case NARROW:
3619 gcc_assert (op_type == unary_op);
3620 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3621 &code1, &multi_step_cvt,
3622 &interm_types))
3623 break;
3624
3625 if (code != FIX_TRUNC_EXPR
3626 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3627 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3628 goto unsupported;
3629
3630 rhs_mode = TYPE_MODE (rhs_type);
3631 cvt_type
3632 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3633 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3634 if (cvt_type == NULL_TREE)
3635 goto unsupported;
3636 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3637 &decl1, &codecvt1))
3638 goto unsupported;
3639 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3640 &code1, &multi_step_cvt,
3641 &interm_types))
3642 break;
3643 goto unsupported;
3644
3645 default:
3646 gcc_unreachable ();
ebfd146a
IR
3647 }
3648
3649 if (!vec_stmt) /* transformation not required. */
3650 {
73fbfcad 3651 if (dump_enabled_p ())
78c60e3d 3652 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3653 "=== vectorizable_conversion ===\n");
4a00c761 3654 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3655 {
3656 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3657 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3658 }
4a00c761
JJ
3659 else if (modifier == NARROW)
3660 {
3661 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3662 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3663 }
3664 else
3665 {
3666 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3667 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3668 }
9771b263 3669 interm_types.release ();
ebfd146a
IR
3670 return true;
3671 }
3672
3673 /** Transform. **/
73fbfcad 3674 if (dump_enabled_p ())
78c60e3d 3675 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3676 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3677
4a00c761
JJ
3678 if (op_type == binary_op)
3679 {
3680 if (CONSTANT_CLASS_P (op0))
3681 op0 = fold_convert (TREE_TYPE (op1), op0);
3682 else if (CONSTANT_CLASS_P (op1))
3683 op1 = fold_convert (TREE_TYPE (op0), op1);
3684 }
3685
3686 /* In case of multi-step conversion, we first generate conversion operations
3687 to the intermediate types, and then from that types to the final one.
3688 We create vector destinations for the intermediate type (TYPES) received
3689 from supportable_*_operation, and store them in the correct order
3690 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3691 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3692 vec_dest = vect_create_destination_var (scalar_dest,
3693 (cvt_type && modifier == WIDEN)
3694 ? cvt_type : vectype_out);
9771b263 3695 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3696
3697 if (multi_step_cvt)
3698 {
9771b263
DN
3699 for (i = interm_types.length () - 1;
3700 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3701 {
3702 vec_dest = vect_create_destination_var (scalar_dest,
3703 intermediate_type);
9771b263 3704 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3705 }
3706 }
ebfd146a 3707
4a00c761 3708 if (cvt_type)
82294ec1
JJ
3709 vec_dest = vect_create_destination_var (scalar_dest,
3710 modifier == WIDEN
3711 ? vectype_out : cvt_type);
4a00c761
JJ
3712
3713 if (!slp_node)
3714 {
30862efc 3715 if (modifier == WIDEN)
4a00c761 3716 {
c3284718 3717 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3718 if (op_type == binary_op)
9771b263 3719 vec_oprnds1.create (1);
4a00c761 3720 }
30862efc 3721 else if (modifier == NARROW)
9771b263
DN
3722 vec_oprnds0.create (
3723 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3724 }
3725 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3726 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3727
4a00c761 3728 last_oprnd = op0;
ebfd146a
IR
3729 prev_stmt_info = NULL;
3730 switch (modifier)
3731 {
3732 case NONE:
3733 for (j = 0; j < ncopies; j++)
3734 {
ebfd146a 3735 if (j == 0)
d092494c
IR
3736 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3737 -1);
ebfd146a
IR
3738 else
3739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3740
9771b263 3741 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3742 {
3743 /* Arguments are ready, create the new vector stmt. */
3744 if (code1 == CALL_EXPR)
3745 {
3746 new_stmt = gimple_build_call (decl1, 1, vop0);
3747 new_temp = make_ssa_name (vec_dest, new_stmt);
3748 gimple_call_set_lhs (new_stmt, new_temp);
3749 }
3750 else
3751 {
3752 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3753 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3754 vop0, NULL);
3755 new_temp = make_ssa_name (vec_dest, new_stmt);
3756 gimple_assign_set_lhs (new_stmt, new_temp);
3757 }
3758
3759 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3760 if (slp_node)
9771b263 3761 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3762 }
3763
ebfd146a
IR
3764 if (j == 0)
3765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3766 else
3767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3768 prev_stmt_info = vinfo_for_stmt (new_stmt);
3769 }
3770 break;
3771
3772 case WIDEN:
3773 /* In case the vectorization factor (VF) is bigger than the number
3774 of elements that we can fit in a vectype (nunits), we have to
3775 generate more than one vector stmt - i.e - we need to "unroll"
3776 the vector stmt by a factor VF/nunits. */
3777 for (j = 0; j < ncopies; j++)
3778 {
4a00c761 3779 /* Handle uses. */
ebfd146a 3780 if (j == 0)
4a00c761
JJ
3781 {
3782 if (slp_node)
3783 {
3784 if (code == WIDEN_LSHIFT_EXPR)
3785 {
3786 unsigned int k;
ebfd146a 3787
4a00c761
JJ
3788 vec_oprnd1 = op1;
3789 /* Store vec_oprnd1 for every vector stmt to be created
3790 for SLP_NODE. We check during the analysis that all
3791 the shift arguments are the same. */
3792 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3793 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3794
3795 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3796 slp_node, -1);
3797 }
3798 else
3799 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3800 &vec_oprnds1, slp_node, -1);
3801 }
3802 else
3803 {
3804 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3805 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3806 if (op_type == binary_op)
3807 {
3808 if (code == WIDEN_LSHIFT_EXPR)
3809 vec_oprnd1 = op1;
3810 else
3811 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3812 NULL);
9771b263 3813 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3814 }
3815 }
3816 }
ebfd146a 3817 else
4a00c761
JJ
3818 {
3819 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3820 vec_oprnds0.truncate (0);
3821 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3822 if (op_type == binary_op)
3823 {
3824 if (code == WIDEN_LSHIFT_EXPR)
3825 vec_oprnd1 = op1;
3826 else
3827 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3828 vec_oprnd1);
9771b263
DN
3829 vec_oprnds1.truncate (0);
3830 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3831 }
3832 }
ebfd146a 3833
4a00c761
JJ
3834 /* Arguments are ready. Create the new vector stmts. */
3835 for (i = multi_step_cvt; i >= 0; i--)
3836 {
9771b263 3837 tree this_dest = vec_dsts[i];
4a00c761
JJ
3838 enum tree_code c1 = code1, c2 = code2;
3839 if (i == 0 && codecvt2 != ERROR_MARK)
3840 {
3841 c1 = codecvt1;
3842 c2 = codecvt2;
3843 }
3844 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3845 &vec_oprnds1,
3846 stmt, this_dest, gsi,
3847 c1, c2, decl1, decl2,
3848 op_type);
3849 }
3850
9771b263 3851 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3852 {
3853 if (cvt_type)
3854 {
3855 if (codecvt1 == CALL_EXPR)
3856 {
3857 new_stmt = gimple_build_call (decl1, 1, vop0);
3858 new_temp = make_ssa_name (vec_dest, new_stmt);
3859 gimple_call_set_lhs (new_stmt, new_temp);
3860 }
3861 else
3862 {
3863 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3864 new_temp = make_ssa_name (vec_dest, NULL);
3865 new_stmt = gimple_build_assign_with_ops (codecvt1,
3866 new_temp,
3867 vop0, NULL);
3868 }
3869
3870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3871 }
3872 else
3873 new_stmt = SSA_NAME_DEF_STMT (vop0);
3874
3875 if (slp_node)
9771b263 3876 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3877 else
3878 {
3879 if (!prev_stmt_info)
3880 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3881 else
3882 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3883 prev_stmt_info = vinfo_for_stmt (new_stmt);
3884 }
3885 }
ebfd146a 3886 }
4a00c761
JJ
3887
3888 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3889 break;
3890
3891 case NARROW:
3892 /* In case the vectorization factor (VF) is bigger than the number
3893 of elements that we can fit in a vectype (nunits), we have to
3894 generate more than one vector stmt - i.e - we need to "unroll"
3895 the vector stmt by a factor VF/nunits. */
3896 for (j = 0; j < ncopies; j++)
3897 {
3898 /* Handle uses. */
4a00c761
JJ
3899 if (slp_node)
3900 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3901 slp_node, -1);
ebfd146a
IR
3902 else
3903 {
9771b263 3904 vec_oprnds0.truncate (0);
4a00c761
JJ
3905 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3906 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3907 }
3908
4a00c761
JJ
3909 /* Arguments are ready. Create the new vector stmts. */
3910 if (cvt_type)
9771b263 3911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3912 {
3913 if (codecvt1 == CALL_EXPR)
3914 {
3915 new_stmt = gimple_build_call (decl1, 1, vop0);
3916 new_temp = make_ssa_name (vec_dest, new_stmt);
3917 gimple_call_set_lhs (new_stmt, new_temp);
3918 }
3919 else
3920 {
3921 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3922 new_temp = make_ssa_name (vec_dest, NULL);
3923 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3924 vop0, NULL);
3925 }
ebfd146a 3926
4a00c761 3927 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3928 vec_oprnds0[i] = new_temp;
4a00c761 3929 }
ebfd146a 3930
4a00c761
JJ
3931 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3932 stmt, vec_dsts, gsi,
3933 slp_node, code1,
3934 &prev_stmt_info);
ebfd146a
IR
3935 }
3936
3937 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 3938 break;
ebfd146a
IR
3939 }
3940
9771b263
DN
3941 vec_oprnds0.release ();
3942 vec_oprnds1.release ();
3943 vec_dsts.release ();
3944 interm_types.release ();
ebfd146a
IR
3945
3946 return true;
3947}
ff802fa1
IR
3948
3949
ebfd146a
IR
3950/* Function vectorizable_assignment.
3951
b8698a0f
L
3952 Check if STMT performs an assignment (copy) that can be vectorized.
3953 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3954 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3955 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3956
3957static bool
3958vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3959 gimple *vec_stmt, slp_tree slp_node)
3960{
3961 tree vec_dest;
3962 tree scalar_dest;
3963 tree op;
3964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3965 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3967 tree new_temp;
3968 tree def;
3969 gimple def_stmt;
3970 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 3971 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3972 int ncopies;
f18b55bd 3973 int i, j;
6e1aa848 3974 vec<tree> vec_oprnds = vNULL;
ebfd146a 3975 tree vop;
a70d6342 3976 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
3977 gimple new_stmt = NULL;
3978 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
3979 enum tree_code code;
3980 tree vectype_in;
ebfd146a
IR
3981
3982 /* Multiple types in SLP are handled by creating the appropriate number of
3983 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3984 case of SLP. */
437f4a00 3985 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3986 ncopies = 1;
3987 else
3988 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3989
3990 gcc_assert (ncopies >= 1);
ebfd146a 3991
a70d6342 3992 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3993 return false;
3994
8644a673 3995 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3996 return false;
3997
3998 /* Is vectorizable assignment? */
3999 if (!is_gimple_assign (stmt))
4000 return false;
4001
4002 scalar_dest = gimple_assign_lhs (stmt);
4003 if (TREE_CODE (scalar_dest) != SSA_NAME)
4004 return false;
4005
fde9c428 4006 code = gimple_assign_rhs_code (stmt);
ebfd146a 4007 if (gimple_assign_single_p (stmt)
fde9c428
RG
4008 || code == PAREN_EXPR
4009 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4010 op = gimple_assign_rhs1 (stmt);
4011 else
4012 return false;
4013
7b7ec6c5
RG
4014 if (code == VIEW_CONVERT_EXPR)
4015 op = TREE_OPERAND (op, 0);
4016
24ee1384 4017 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 4018 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 4019 {
73fbfcad 4020 if (dump_enabled_p ())
78c60e3d 4021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4022 "use not simple.\n");
ebfd146a
IR
4023 return false;
4024 }
4025
fde9c428
RG
4026 /* We can handle NOP_EXPR conversions that do not change the number
4027 of elements or the vector size. */
7b7ec6c5
RG
4028 if ((CONVERT_EXPR_CODE_P (code)
4029 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4030 && (!vectype_in
4031 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4032 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4033 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4034 return false;
4035
7b7b1813
RG
4036 /* We do not handle bit-precision changes. */
4037 if ((CONVERT_EXPR_CODE_P (code)
4038 || code == VIEW_CONVERT_EXPR)
4039 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4040 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4041 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4042 || ((TYPE_PRECISION (TREE_TYPE (op))
4043 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4044 /* But a conversion that does not change the bit-pattern is ok. */
4045 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4046 > TYPE_PRECISION (TREE_TYPE (op)))
4047 && TYPE_UNSIGNED (TREE_TYPE (op))))
4048 {
73fbfcad 4049 if (dump_enabled_p ())
78c60e3d
SS
4050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4051 "type conversion to/from bit-precision "
e645e942 4052 "unsupported.\n");
7b7b1813
RG
4053 return false;
4054 }
4055
ebfd146a
IR
4056 if (!vec_stmt) /* transformation not required. */
4057 {
4058 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4059 if (dump_enabled_p ())
78c60e3d 4060 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4061 "=== vectorizable_assignment ===\n");
c3e7ee41 4062 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4063 return true;
4064 }
4065
4066 /** Transform. **/
73fbfcad 4067 if (dump_enabled_p ())
e645e942 4068 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4069
4070 /* Handle def. */
4071 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4072
4073 /* Handle use. */
f18b55bd 4074 for (j = 0; j < ncopies; j++)
ebfd146a 4075 {
f18b55bd
IR
4076 /* Handle uses. */
4077 if (j == 0)
d092494c 4078 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4079 else
4080 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4081
4082 /* Arguments are ready. create the new vector stmt. */
9771b263 4083 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4084 {
7b7ec6c5
RG
4085 if (CONVERT_EXPR_CODE_P (code)
4086 || code == VIEW_CONVERT_EXPR)
4a73490d 4087 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4088 new_stmt = gimple_build_assign (vec_dest, vop);
4089 new_temp = make_ssa_name (vec_dest, new_stmt);
4090 gimple_assign_set_lhs (new_stmt, new_temp);
4091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4092 if (slp_node)
9771b263 4093 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4094 }
ebfd146a
IR
4095
4096 if (slp_node)
f18b55bd
IR
4097 continue;
4098
4099 if (j == 0)
4100 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4101 else
4102 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4103
4104 prev_stmt_info = vinfo_for_stmt (new_stmt);
4105 }
b8698a0f 4106
9771b263 4107 vec_oprnds.release ();
ebfd146a
IR
4108 return true;
4109}
4110
9dc3f7de 4111
1107f3ae
IR
4112/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4113 either as shift by a scalar or by a vector. */
4114
4115bool
4116vect_supportable_shift (enum tree_code code, tree scalar_type)
4117{
4118
4119 enum machine_mode vec_mode;
4120 optab optab;
4121 int icode;
4122 tree vectype;
4123
4124 vectype = get_vectype_for_scalar_type (scalar_type);
4125 if (!vectype)
4126 return false;
4127
4128 optab = optab_for_tree_code (code, vectype, optab_scalar);
4129 if (!optab
4130 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4131 {
4132 optab = optab_for_tree_code (code, vectype, optab_vector);
4133 if (!optab
4134 || (optab_handler (optab, TYPE_MODE (vectype))
4135 == CODE_FOR_nothing))
4136 return false;
4137 }
4138
4139 vec_mode = TYPE_MODE (vectype);
4140 icode = (int) optab_handler (optab, vec_mode);
4141 if (icode == CODE_FOR_nothing)
4142 return false;
4143
4144 return true;
4145}
4146
4147
9dc3f7de
IR
4148/* Function vectorizable_shift.
4149
4150 Check if STMT performs a shift operation that can be vectorized.
4151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4154
4155static bool
4156vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4157 gimple *vec_stmt, slp_tree slp_node)
4158{
4159 tree vec_dest;
4160 tree scalar_dest;
4161 tree op0, op1 = NULL;
4162 tree vec_oprnd1 = NULL_TREE;
4163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4164 tree vectype;
4165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4166 enum tree_code code;
4167 enum machine_mode vec_mode;
4168 tree new_temp;
4169 optab optab;
4170 int icode;
4171 enum machine_mode optab_op2_mode;
4172 tree def;
4173 gimple def_stmt;
4174 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4175 gimple new_stmt = NULL;
4176 stmt_vec_info prev_stmt_info;
4177 int nunits_in;
4178 int nunits_out;
4179 tree vectype_out;
cede2577 4180 tree op1_vectype;
9dc3f7de
IR
4181 int ncopies;
4182 int j, i;
6e1aa848
DN
4183 vec<tree> vec_oprnds0 = vNULL;
4184 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4185 tree vop0, vop1;
4186 unsigned int k;
49eab32e 4187 bool scalar_shift_arg = true;
9dc3f7de
IR
4188 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4189 int vf;
4190
4191 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4192 return false;
4193
4194 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4195 return false;
4196
4197 /* Is STMT a vectorizable binary/unary operation? */
4198 if (!is_gimple_assign (stmt))
4199 return false;
4200
4201 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4202 return false;
4203
4204 code = gimple_assign_rhs_code (stmt);
4205
4206 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4207 || code == RROTATE_EXPR))
4208 return false;
4209
4210 scalar_dest = gimple_assign_lhs (stmt);
4211 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4212 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4213 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4214 {
73fbfcad 4215 if (dump_enabled_p ())
78c60e3d 4216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4217 "bit-precision shifts not supported.\n");
7b7b1813
RG
4218 return false;
4219 }
9dc3f7de
IR
4220
4221 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4222 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4223 &def_stmt, &def, &dt[0], &vectype))
4224 {
73fbfcad 4225 if (dump_enabled_p ())
78c60e3d 4226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4227 "use not simple.\n");
9dc3f7de
IR
4228 return false;
4229 }
4230 /* If op0 is an external or constant def use a vector type with
4231 the same size as the output vector type. */
4232 if (!vectype)
4233 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4234 if (vec_stmt)
4235 gcc_assert (vectype);
4236 if (!vectype)
4237 {
73fbfcad 4238 if (dump_enabled_p ())
78c60e3d 4239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4240 "no vectype for scalar type\n");
9dc3f7de
IR
4241 return false;
4242 }
4243
4244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4245 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4246 if (nunits_out != nunits_in)
4247 return false;
4248
4249 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4250 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4251 &def, &dt[1], &op1_vectype))
9dc3f7de 4252 {
73fbfcad 4253 if (dump_enabled_p ())
78c60e3d 4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4255 "use not simple.\n");
9dc3f7de
IR
4256 return false;
4257 }
4258
4259 if (loop_vinfo)
4260 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4261 else
4262 vf = 1;
4263
4264 /* Multiple types in SLP are handled by creating the appropriate number of
4265 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4266 case of SLP. */
437f4a00 4267 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4268 ncopies = 1;
4269 else
4270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4271
4272 gcc_assert (ncopies >= 1);
4273
4274 /* Determine whether the shift amount is a vector, or scalar. If the
4275 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4276
49eab32e
JJ
4277 if (dt[1] == vect_internal_def && !slp_node)
4278 scalar_shift_arg = false;
4279 else if (dt[1] == vect_constant_def
4280 || dt[1] == vect_external_def
4281 || dt[1] == vect_internal_def)
4282 {
4283 /* In SLP, need to check whether the shift count is the same,
4284 in loops if it is a constant or invariant, it is always
4285 a scalar shift. */
4286 if (slp_node)
4287 {
9771b263 4288 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4289 gimple slpstmt;
4290
9771b263 4291 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4292 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4293 scalar_shift_arg = false;
4294 }
4295 }
4296 else
4297 {
73fbfcad 4298 if (dump_enabled_p ())
78c60e3d 4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4300 "operand mode requires invariant argument.\n");
49eab32e
JJ
4301 return false;
4302 }
4303
9dc3f7de 4304 /* Vector shifted by vector. */
49eab32e 4305 if (!scalar_shift_arg)
9dc3f7de
IR
4306 {
4307 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4308 if (dump_enabled_p ())
78c60e3d 4309 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4310 "vector/vector shift/rotate found.\n");
78c60e3d 4311
aa948027
JJ
4312 if (!op1_vectype)
4313 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4314 if (op1_vectype == NULL_TREE
4315 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4316 {
73fbfcad 4317 if (dump_enabled_p ())
78c60e3d
SS
4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4319 "unusable type for last operand in"
e645e942 4320 " vector/vector shift/rotate.\n");
cede2577
JJ
4321 return false;
4322 }
9dc3f7de
IR
4323 }
4324 /* See if the machine has a vector shifted by scalar insn and if not
4325 then see if it has a vector shifted by vector insn. */
49eab32e 4326 else
9dc3f7de
IR
4327 {
4328 optab = optab_for_tree_code (code, vectype, optab_scalar);
4329 if (optab
4330 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4331 {
73fbfcad 4332 if (dump_enabled_p ())
78c60e3d 4333 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4334 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4335 }
4336 else
4337 {
4338 optab = optab_for_tree_code (code, vectype, optab_vector);
4339 if (optab
4340 && (optab_handler (optab, TYPE_MODE (vectype))
4341 != CODE_FOR_nothing))
4342 {
49eab32e
JJ
4343 scalar_shift_arg = false;
4344
73fbfcad 4345 if (dump_enabled_p ())
78c60e3d 4346 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4347 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4348
4349 /* Unlike the other binary operators, shifts/rotates have
4350 the rhs being int, instead of the same type as the lhs,
4351 so make sure the scalar is the right type if we are
aa948027 4352 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4353 if (dt[1] == vect_constant_def)
4354 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4355 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4356 TREE_TYPE (op1)))
4357 {
4358 if (slp_node
4359 && TYPE_MODE (TREE_TYPE (vectype))
4360 != TYPE_MODE (TREE_TYPE (op1)))
4361 {
73fbfcad 4362 if (dump_enabled_p ())
78c60e3d
SS
4363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4364 "unusable type for last operand in"
e645e942 4365 " vector/vector shift/rotate.\n");
aa948027
JJ
4366 return false;
4367 }
4368 if (vec_stmt && !slp_node)
4369 {
4370 op1 = fold_convert (TREE_TYPE (vectype), op1);
4371 op1 = vect_init_vector (stmt, op1,
4372 TREE_TYPE (vectype), NULL);
4373 }
4374 }
9dc3f7de
IR
4375 }
4376 }
4377 }
9dc3f7de
IR
4378
4379 /* Supportable by target? */
4380 if (!optab)
4381 {
73fbfcad 4382 if (dump_enabled_p ())
78c60e3d 4383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4384 "no optab.\n");
9dc3f7de
IR
4385 return false;
4386 }
4387 vec_mode = TYPE_MODE (vectype);
4388 icode = (int) optab_handler (optab, vec_mode);
4389 if (icode == CODE_FOR_nothing)
4390 {
73fbfcad 4391 if (dump_enabled_p ())
78c60e3d 4392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4393 "op not supported by target.\n");
9dc3f7de
IR
4394 /* Check only during analysis. */
4395 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4396 || (vf < vect_min_worthwhile_factor (code)
4397 && !vec_stmt))
4398 return false;
73fbfcad 4399 if (dump_enabled_p ())
e645e942
TJ
4400 dump_printf_loc (MSG_NOTE, vect_location,
4401 "proceeding using word mode.\n");
9dc3f7de
IR
4402 }
4403
4404 /* Worthwhile without SIMD support? Check only during analysis. */
4405 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4406 && vf < vect_min_worthwhile_factor (code)
4407 && !vec_stmt)
4408 {
73fbfcad 4409 if (dump_enabled_p ())
78c60e3d 4410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4411 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4412 return false;
4413 }
4414
4415 if (!vec_stmt) /* transformation not required. */
4416 {
4417 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4418 if (dump_enabled_p ())
e645e942
TJ
4419 dump_printf_loc (MSG_NOTE, vect_location,
4420 "=== vectorizable_shift ===\n");
c3e7ee41 4421 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4422 return true;
4423 }
4424
4425 /** Transform. **/
4426
73fbfcad 4427 if (dump_enabled_p ())
78c60e3d 4428 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4429 "transform binary/unary operation.\n");
9dc3f7de
IR
4430
4431 /* Handle def. */
4432 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4433
9dc3f7de
IR
4434 prev_stmt_info = NULL;
4435 for (j = 0; j < ncopies; j++)
4436 {
4437 /* Handle uses. */
4438 if (j == 0)
4439 {
4440 if (scalar_shift_arg)
4441 {
4442 /* Vector shl and shr insn patterns can be defined with scalar
4443 operand 2 (shift operand). In this case, use constant or loop
4444 invariant op1 directly, without extending it to vector mode
4445 first. */
4446 optab_op2_mode = insn_data[icode].operand[2].mode;
4447 if (!VECTOR_MODE_P (optab_op2_mode))
4448 {
73fbfcad 4449 if (dump_enabled_p ())
78c60e3d 4450 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4451 "operand 1 using scalar mode.\n");
9dc3f7de 4452 vec_oprnd1 = op1;
8930f723 4453 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4454 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4455 if (slp_node)
4456 {
4457 /* Store vec_oprnd1 for every vector stmt to be created
4458 for SLP_NODE. We check during the analysis that all
4459 the shift arguments are the same.
4460 TODO: Allow different constants for different vector
4461 stmts generated for an SLP instance. */
4462 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4463 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4464 }
4465 }
4466 }
4467
4468 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4469 (a special case for certain kind of vector shifts); otherwise,
4470 operand 1 should be of a vector type (the usual case). */
4471 if (vec_oprnd1)
4472 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4473 slp_node, -1);
9dc3f7de
IR
4474 else
4475 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4476 slp_node, -1);
9dc3f7de
IR
4477 }
4478 else
4479 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4480
4481 /* Arguments are ready. Create the new vector stmt. */
9771b263 4482 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4483 {
9771b263 4484 vop1 = vec_oprnds1[i];
9dc3f7de
IR
4485 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4486 new_temp = make_ssa_name (vec_dest, new_stmt);
4487 gimple_assign_set_lhs (new_stmt, new_temp);
4488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4489 if (slp_node)
9771b263 4490 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4491 }
4492
4493 if (slp_node)
4494 continue;
4495
4496 if (j == 0)
4497 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4498 else
4499 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4500 prev_stmt_info = vinfo_for_stmt (new_stmt);
4501 }
4502
9771b263
DN
4503 vec_oprnds0.release ();
4504 vec_oprnds1.release ();
9dc3f7de
IR
4505
4506 return true;
4507}
4508
4509
ebfd146a
IR
4510/* Function vectorizable_operation.
4511
16949072
RG
4512 Check if STMT performs a binary, unary or ternary operation that can
4513 be vectorized.
b8698a0f 4514 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4515 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4516 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4517
4518static bool
4519vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4520 gimple *vec_stmt, slp_tree slp_node)
4521{
00f07b86 4522 tree vec_dest;
ebfd146a 4523 tree scalar_dest;
16949072 4524 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4526 tree vectype;
ebfd146a
IR
4527 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4528 enum tree_code code;
4529 enum machine_mode vec_mode;
4530 tree new_temp;
4531 int op_type;
00f07b86 4532 optab optab;
ebfd146a 4533 int icode;
ebfd146a
IR
4534 tree def;
4535 gimple def_stmt;
16949072
RG
4536 enum vect_def_type dt[3]
4537 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4538 gimple new_stmt = NULL;
4539 stmt_vec_info prev_stmt_info;
b690cc0f 4540 int nunits_in;
ebfd146a
IR
4541 int nunits_out;
4542 tree vectype_out;
4543 int ncopies;
4544 int j, i;
6e1aa848
DN
4545 vec<tree> vec_oprnds0 = vNULL;
4546 vec<tree> vec_oprnds1 = vNULL;
4547 vec<tree> vec_oprnds2 = vNULL;
16949072 4548 tree vop0, vop1, vop2;
a70d6342
IR
4549 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4550 int vf;
4551
a70d6342 4552 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4553 return false;
4554
8644a673 4555 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4556 return false;
4557
4558 /* Is STMT a vectorizable binary/unary operation? */
4559 if (!is_gimple_assign (stmt))
4560 return false;
4561
4562 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4563 return false;
4564
ebfd146a
IR
4565 code = gimple_assign_rhs_code (stmt);
4566
4567 /* For pointer addition, we should use the normal plus for
4568 the vector addition. */
4569 if (code == POINTER_PLUS_EXPR)
4570 code = PLUS_EXPR;
4571
4572 /* Support only unary or binary operations. */
4573 op_type = TREE_CODE_LENGTH (code);
16949072 4574 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4575 {
73fbfcad 4576 if (dump_enabled_p ())
78c60e3d 4577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4578 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4579 op_type);
ebfd146a
IR
4580 return false;
4581 }
4582
b690cc0f
RG
4583 scalar_dest = gimple_assign_lhs (stmt);
4584 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4585
7b7b1813
RG
4586 /* Most operations cannot handle bit-precision types without extra
4587 truncations. */
4588 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4589 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4590 /* Exception are bitwise binary operations. */
4591 && code != BIT_IOR_EXPR
4592 && code != BIT_XOR_EXPR
4593 && code != BIT_AND_EXPR)
4594 {
73fbfcad 4595 if (dump_enabled_p ())
78c60e3d 4596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4597 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4598 return false;
4599 }
4600
ebfd146a 4601 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4602 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4603 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4604 {
73fbfcad 4605 if (dump_enabled_p ())
78c60e3d 4606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4607 "use not simple.\n");
ebfd146a
IR
4608 return false;
4609 }
b690cc0f
RG
4610 /* If op0 is an external or constant def use a vector type with
4611 the same size as the output vector type. */
4612 if (!vectype)
4613 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4614 if (vec_stmt)
4615 gcc_assert (vectype);
4616 if (!vectype)
4617 {
73fbfcad 4618 if (dump_enabled_p ())
7d8930a0 4619 {
78c60e3d
SS
4620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4621 "no vectype for scalar type ");
4622 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4623 TREE_TYPE (op0));
e645e942 4624 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4625 }
4626
4627 return false;
4628 }
b690cc0f
RG
4629
4630 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4631 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4632 if (nunits_out != nunits_in)
4633 return false;
ebfd146a 4634
16949072 4635 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4636 {
4637 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4638 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4639 &def, &dt[1]))
ebfd146a 4640 {
73fbfcad 4641 if (dump_enabled_p ())
78c60e3d 4642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4643 "use not simple.\n");
ebfd146a
IR
4644 return false;
4645 }
4646 }
16949072
RG
4647 if (op_type == ternary_op)
4648 {
4649 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4650 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4651 &def, &dt[2]))
16949072 4652 {
73fbfcad 4653 if (dump_enabled_p ())
78c60e3d 4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4655 "use not simple.\n");
16949072
RG
4656 return false;
4657 }
4658 }
ebfd146a 4659
b690cc0f
RG
4660 if (loop_vinfo)
4661 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4662 else
4663 vf = 1;
4664
4665 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4666 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4667 case of SLP. */
437f4a00 4668 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4669 ncopies = 1;
4670 else
4671 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4672
4673 gcc_assert (ncopies >= 1);
4674
9dc3f7de 4675 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4676 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4677 || code == RROTATE_EXPR)
9dc3f7de 4678 return false;
ebfd146a 4679
ebfd146a 4680 /* Supportable by target? */
00f07b86
RH
4681
4682 vec_mode = TYPE_MODE (vectype);
4683 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4684 {
00f07b86 4685 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4686 icode = LAST_INSN_CODE;
00f07b86
RH
4687 else
4688 icode = CODE_FOR_nothing;
ebfd146a 4689 }
00f07b86
RH
4690 else
4691 {
4692 optab = optab_for_tree_code (code, vectype, optab_default);
4693 if (!optab)
5deb57cb 4694 {
73fbfcad 4695 if (dump_enabled_p ())
78c60e3d 4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4697 "no optab.\n");
00f07b86 4698 return false;
5deb57cb 4699 }
00f07b86 4700 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4701 }
4702
ebfd146a
IR
4703 if (icode == CODE_FOR_nothing)
4704 {
73fbfcad 4705 if (dump_enabled_p ())
78c60e3d 4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4707 "op not supported by target.\n");
ebfd146a
IR
4708 /* Check only during analysis. */
4709 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4710 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4711 return false;
73fbfcad 4712 if (dump_enabled_p ())
e645e942
TJ
4713 dump_printf_loc (MSG_NOTE, vect_location,
4714 "proceeding using word mode.\n");
383d9c83
IR
4715 }
4716
4a00c761 4717 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4718 if (!VECTOR_MODE_P (vec_mode)
4719 && !vec_stmt
4720 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4721 {
73fbfcad 4722 if (dump_enabled_p ())
78c60e3d 4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4724 "not worthwhile without SIMD support.\n");
e34842c6 4725 return false;
7d8930a0 4726 }
ebfd146a 4727
ebfd146a
IR
4728 if (!vec_stmt) /* transformation not required. */
4729 {
4a00c761 4730 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4731 if (dump_enabled_p ())
78c60e3d 4732 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4733 "=== vectorizable_operation ===\n");
c3e7ee41 4734 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4735 return true;
4736 }
4737
4738 /** Transform. **/
4739
73fbfcad 4740 if (dump_enabled_p ())
78c60e3d 4741 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4742 "transform binary/unary operation.\n");
383d9c83 4743
ebfd146a 4744 /* Handle def. */
00f07b86 4745 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4746
ebfd146a
IR
4747 /* In case the vectorization factor (VF) is bigger than the number
4748 of elements that we can fit in a vectype (nunits), we have to generate
4749 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4751 from one copy of the vector stmt to the next, in the field
4752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4753 stages to find the correct vector defs to be used when vectorizing
4754 stmts that use the defs of the current stmt. The example below
4755 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4756 we need to create 4 vectorized stmts):
4757
4758 before vectorization:
4759 RELATED_STMT VEC_STMT
4760 S1: x = memref - -
4761 S2: z = x + 1 - -
4762
4763 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4764 there):
4765 RELATED_STMT VEC_STMT
4766 VS1_0: vx0 = memref0 VS1_1 -
4767 VS1_1: vx1 = memref1 VS1_2 -
4768 VS1_2: vx2 = memref2 VS1_3 -
4769 VS1_3: vx3 = memref3 - -
4770 S1: x = load - VS1_0
4771 S2: z = x + 1 - -
4772
4773 step2: vectorize stmt S2 (done here):
4774 To vectorize stmt S2 we first need to find the relevant vector
4775 def for the first operand 'x'. This is, as usual, obtained from
4776 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4777 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4778 relevant vector def 'vx0'. Having found 'vx0' we can generate
4779 the vector stmt VS2_0, and as usual, record it in the
4780 STMT_VINFO_VEC_STMT of stmt S2.
4781 When creating the second copy (VS2_1), we obtain the relevant vector
4782 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4783 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4784 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4785 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4786 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4787 chain of stmts and pointers:
4788 RELATED_STMT VEC_STMT
4789 VS1_0: vx0 = memref0 VS1_1 -
4790 VS1_1: vx1 = memref1 VS1_2 -
4791 VS1_2: vx2 = memref2 VS1_3 -
4792 VS1_3: vx3 = memref3 - -
4793 S1: x = load - VS1_0
4794 VS2_0: vz0 = vx0 + v1 VS2_1 -
4795 VS2_1: vz1 = vx1 + v1 VS2_2 -
4796 VS2_2: vz2 = vx2 + v1 VS2_3 -
4797 VS2_3: vz3 = vx3 + v1 - -
4798 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4799
4800 prev_stmt_info = NULL;
4801 for (j = 0; j < ncopies; j++)
4802 {
4803 /* Handle uses. */
4804 if (j == 0)
4a00c761
JJ
4805 {
4806 if (op_type == binary_op || op_type == ternary_op)
4807 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4808 slp_node, -1);
4809 else
4810 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4811 slp_node, -1);
4812 if (op_type == ternary_op)
36ba4aae 4813 {
9771b263
DN
4814 vec_oprnds2.create (1);
4815 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4816 stmt,
4817 NULL));
36ba4aae 4818 }
4a00c761 4819 }
ebfd146a 4820 else
4a00c761
JJ
4821 {
4822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4823 if (op_type == ternary_op)
4824 {
9771b263
DN
4825 tree vec_oprnd = vec_oprnds2.pop ();
4826 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4827 vec_oprnd));
4a00c761
JJ
4828 }
4829 }
4830
4831 /* Arguments are ready. Create the new vector stmt. */
9771b263 4832 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4833 {
4a00c761 4834 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4835 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4836 vop2 = ((op_type == ternary_op)
9771b263 4837 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
4838 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4839 vop0, vop1, vop2);
4a00c761
JJ
4840 new_temp = make_ssa_name (vec_dest, new_stmt);
4841 gimple_assign_set_lhs (new_stmt, new_temp);
4842 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4843 if (slp_node)
9771b263 4844 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4845 }
4846
4a00c761
JJ
4847 if (slp_node)
4848 continue;
4849
4850 if (j == 0)
4851 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4852 else
4853 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4854 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4855 }
4856
9771b263
DN
4857 vec_oprnds0.release ();
4858 vec_oprnds1.release ();
4859 vec_oprnds2.release ();
ebfd146a 4860
ebfd146a
IR
4861 return true;
4862}
4863
c716e67f
XDL
4864/* A helper function to ensure data reference DR's base alignment
4865 for STMT_INFO. */
4866
4867static void
4868ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4869{
4870 if (!dr->aux)
4871 return;
4872
4873 if (((dataref_aux *)dr->aux)->base_misaligned)
4874 {
4875 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4876 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4877
4878 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4879 DECL_USER_ALIGN (base_decl) = 1;
4880 ((dataref_aux *)dr->aux)->base_misaligned = false;
4881 }
4882}
4883
ebfd146a 4884
09dfa495
BM
4885/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4886 reversal of the vector elements. If that is impossible to do,
4887 returns NULL. */
4888
4889static tree
4890perm_mask_for_reverse (tree vectype)
4891{
4892 int i, nunits;
4893 unsigned char *sel;
4894
4895 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4896 sel = XALLOCAVEC (unsigned char, nunits);
4897
4898 for (i = 0; i < nunits; ++i)
4899 sel[i] = nunits - 1 - i;
4900
4901 return vect_gen_perm_mask (vectype, sel);
4902}
4903
ebfd146a
IR
4904/* Function vectorizable_store.
4905
b8698a0f
L
4906 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4907 can be vectorized.
4908 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4909 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4910 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4911
4912static bool
4913vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4914 slp_tree slp_node)
ebfd146a
IR
4915{
4916 tree scalar_dest;
4917 tree data_ref;
4918 tree op;
4919 tree vec_oprnd = NULL_TREE;
4920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4921 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4922 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4923 tree elem_type;
ebfd146a 4924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4925 struct loop *loop = NULL;
ebfd146a
IR
4926 enum machine_mode vec_mode;
4927 tree dummy;
4928 enum dr_alignment_support alignment_support_scheme;
4929 tree def;
4930 gimple def_stmt;
4931 enum vect_def_type dt;
4932 stmt_vec_info prev_stmt_info = NULL;
4933 tree dataref_ptr = NULL_TREE;
74bf76ed 4934 tree dataref_offset = NULL_TREE;
fef4d2b3 4935 gimple ptr_incr = NULL;
ebfd146a
IR
4936 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4937 int ncopies;
4938 int j;
4939 gimple next_stmt, first_stmt = NULL;
0d0293ac 4940 bool grouped_store = false;
272c6793 4941 bool store_lanes_p = false;
ebfd146a 4942 unsigned int group_size, i;
6e1aa848
DN
4943 vec<tree> dr_chain = vNULL;
4944 vec<tree> oprnds = vNULL;
4945 vec<tree> result_chain = vNULL;
ebfd146a 4946 bool inv_p;
09dfa495
BM
4947 bool negative = false;
4948 tree offset = NULL_TREE;
6e1aa848 4949 vec<tree> vec_oprnds = vNULL;
ebfd146a 4950 bool slp = (slp_node != NULL);
ebfd146a 4951 unsigned int vec_num;
a70d6342 4952 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 4953 tree aggr_type;
a70d6342
IR
4954
4955 if (loop_vinfo)
4956 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
4957
4958 /* Multiple types in SLP are handled by creating the appropriate number of
4959 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4960 case of SLP. */
437f4a00 4961 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4962 ncopies = 1;
4963 else
4964 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4965
4966 gcc_assert (ncopies >= 1);
4967
4968 /* FORNOW. This restriction should be relaxed. */
a70d6342 4969 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 4970 {
73fbfcad 4971 if (dump_enabled_p ())
78c60e3d 4972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4973 "multiple types in nested loop.\n");
ebfd146a
IR
4974 return false;
4975 }
4976
a70d6342 4977 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4978 return false;
4979
8644a673 4980 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4981 return false;
4982
4983 /* Is vectorizable store? */
4984
4985 if (!is_gimple_assign (stmt))
4986 return false;
4987
4988 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
4989 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
4990 && is_pattern_stmt_p (stmt_info))
4991 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 4992 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 4993 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 4994 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
4995 && TREE_CODE (scalar_dest) != COMPONENT_REF
4996 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
4997 && TREE_CODE (scalar_dest) != REALPART_EXPR
4998 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
4999 return false;
5000
5001 gcc_assert (gimple_assign_single_p (stmt));
5002 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
5003 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5004 &def, &dt))
ebfd146a 5005 {
73fbfcad 5006 if (dump_enabled_p ())
78c60e3d 5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5008 "use not simple.\n");
ebfd146a
IR
5009 return false;
5010 }
5011
272c6793 5012 elem_type = TREE_TYPE (vectype);
ebfd146a 5013 vec_mode = TYPE_MODE (vectype);
7b7b1813 5014
ebfd146a
IR
5015 /* FORNOW. In some cases can vectorize even if data-type not supported
5016 (e.g. - array initialization with 0). */
947131ba 5017 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5018 return false;
5019
5020 if (!STMT_VINFO_DATA_REF (stmt_info))
5021 return false;
5022
09dfa495
BM
5023 negative =
5024 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5025 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5026 size_zero_node) < 0;
5027 if (negative && ncopies > 1)
a1e53f3f 5028 {
73fbfcad 5029 if (dump_enabled_p ())
78c60e3d 5030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5031 "multiple types with negative step.\n");
a1e53f3f
L
5032 return false;
5033 }
5034
09dfa495
BM
5035 if (negative)
5036 {
5037 gcc_assert (!grouped_store);
5038 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5039 if (alignment_support_scheme != dr_aligned
5040 && alignment_support_scheme != dr_unaligned_supported)
5041 {
5042 if (dump_enabled_p ())
5043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5044 "negative step but alignment required.\n");
09dfa495
BM
5045 return false;
5046 }
f234d260
BM
5047 if (dt != vect_constant_def
5048 && dt != vect_external_def
5049 && !perm_mask_for_reverse (vectype))
09dfa495
BM
5050 {
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5053 "negative step and reversing not supported.\n");
09dfa495
BM
5054 return false;
5055 }
5056 }
5057
0d0293ac 5058 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5059 {
0d0293ac 5060 grouped_store = true;
e14c1050 5061 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5062 if (!slp && !PURE_SLP_STMT (stmt_info))
5063 {
e14c1050 5064 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5065 if (vect_store_lanes_supported (vectype, group_size))
5066 store_lanes_p = true;
0d0293ac 5067 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5068 return false;
5069 }
b8698a0f 5070
ebfd146a
IR
5071 if (first_stmt == stmt)
5072 {
5073 /* STMT is the leader of the group. Check the operands of all the
5074 stmts of the group. */
e14c1050 5075 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5076 while (next_stmt)
5077 {
5078 gcc_assert (gimple_assign_single_p (next_stmt));
5079 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5080 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5081 &def_stmt, &def, &dt))
ebfd146a 5082 {
73fbfcad 5083 if (dump_enabled_p ())
78c60e3d 5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5085 "use not simple.\n");
ebfd146a
IR
5086 return false;
5087 }
e14c1050 5088 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5089 }
5090 }
5091 }
5092
5093 if (!vec_stmt) /* transformation not required. */
5094 {
5095 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
5096 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5097 NULL, NULL, NULL);
ebfd146a
IR
5098 return true;
5099 }
5100
5101 /** Transform. **/
5102
c716e67f
XDL
5103 ensure_base_align (stmt_info, dr);
5104
0d0293ac 5105 if (grouped_store)
ebfd146a
IR
5106 {
5107 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5108 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5109
e14c1050 5110 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5111
5112 /* FORNOW */
a70d6342 5113 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5114
5115 /* We vectorize all the stmts of the interleaving group when we
5116 reach the last stmt in the group. */
e14c1050
IR
5117 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5118 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5119 && !slp)
5120 {
5121 *vec_stmt = NULL;
5122 return true;
5123 }
5124
5125 if (slp)
4b5caab7 5126 {
0d0293ac 5127 grouped_store = false;
4b5caab7
IR
5128 /* VEC_NUM is the number of vect stmts to be created for this
5129 group. */
5130 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5131 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5132 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5133 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5134 }
ebfd146a 5135 else
4b5caab7
IR
5136 /* VEC_NUM is the number of vect stmts to be created for this
5137 group. */
ebfd146a
IR
5138 vec_num = group_size;
5139 }
b8698a0f 5140 else
ebfd146a
IR
5141 {
5142 first_stmt = stmt;
5143 first_dr = dr;
5144 group_size = vec_num = 1;
ebfd146a 5145 }
b8698a0f 5146
73fbfcad 5147 if (dump_enabled_p ())
78c60e3d 5148 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5149 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5150
9771b263
DN
5151 dr_chain.create (group_size);
5152 oprnds.create (group_size);
ebfd146a 5153
720f5239 5154 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5155 gcc_assert (alignment_support_scheme);
272c6793
RS
5156 /* Targets with store-lane instructions must not require explicit
5157 realignment. */
5158 gcc_assert (!store_lanes_p
5159 || alignment_support_scheme == dr_aligned
5160 || alignment_support_scheme == dr_unaligned_supported);
5161
09dfa495
BM
5162 if (negative)
5163 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5164
272c6793
RS
5165 if (store_lanes_p)
5166 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5167 else
5168 aggr_type = vectype;
ebfd146a
IR
5169
5170 /* In case the vectorization factor (VF) is bigger than the number
5171 of elements that we can fit in a vectype (nunits), we have to generate
5172 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5173 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5174 vect_get_vec_def_for_copy_stmt. */
5175
0d0293ac 5176 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5177
5178 S1: &base + 2 = x2
5179 S2: &base = x0
5180 S3: &base + 1 = x1
5181 S4: &base + 3 = x3
5182
5183 We create vectorized stores starting from base address (the access of the
5184 first stmt in the chain (S2 in the above example), when the last store stmt
5185 of the chain (S4) is reached:
5186
5187 VS1: &base = vx2
5188 VS2: &base + vec_size*1 = vx0
5189 VS3: &base + vec_size*2 = vx1
5190 VS4: &base + vec_size*3 = vx3
5191
5192 Then permutation statements are generated:
5193
3fcc1b55
JJ
5194 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5195 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5196 ...
b8698a0f 5197
ebfd146a
IR
5198 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5199 (the order of the data-refs in the output of vect_permute_store_chain
5200 corresponds to the order of scalar stmts in the interleaving chain - see
5201 the documentation of vect_permute_store_chain()).
5202
5203 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5204 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5205 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5206 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5207 */
5208
5209 prev_stmt_info = NULL;
5210 for (j = 0; j < ncopies; j++)
5211 {
5212 gimple new_stmt;
ebfd146a
IR
5213
5214 if (j == 0)
5215 {
5216 if (slp)
5217 {
5218 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5219 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5220 NULL, slp_node, -1);
ebfd146a 5221
9771b263 5222 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5223 }
5224 else
5225 {
b8698a0f
L
5226 /* For interleaved stores we collect vectorized defs for all the
5227 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5228 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5229 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5230
0d0293ac 5231 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5232 OPRNDS are of size 1. */
b8698a0f 5233 next_stmt = first_stmt;
ebfd146a
IR
5234 for (i = 0; i < group_size; i++)
5235 {
b8698a0f
L
5236 /* Since gaps are not supported for interleaved stores,
5237 GROUP_SIZE is the exact number of stmts in the chain.
5238 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5239 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5240 iteration of the loop will be executed. */
5241 gcc_assert (next_stmt
5242 && gimple_assign_single_p (next_stmt));
5243 op = gimple_assign_rhs1 (next_stmt);
5244
b8698a0f 5245 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5246 NULL);
9771b263
DN
5247 dr_chain.quick_push (vec_oprnd);
5248 oprnds.quick_push (vec_oprnd);
e14c1050 5249 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5250 }
5251 }
5252
5253 /* We should have catched mismatched types earlier. */
5254 gcc_assert (useless_type_conversion_p (vectype,
5255 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5256 bool simd_lane_access_p
5257 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5258 if (simd_lane_access_p
5259 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5260 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5261 && integer_zerop (DR_OFFSET (first_dr))
5262 && integer_zerop (DR_INIT (first_dr))
5263 && alias_sets_conflict_p (get_alias_set (aggr_type),
5264 get_alias_set (DR_REF (first_dr))))
5265 {
5266 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5267 dataref_offset = build_int_cst (reference_alias_ptr_type
5268 (DR_REF (first_dr)), 0);
8928eff3 5269 inv_p = false;
74bf76ed
JJ
5270 }
5271 else
5272 dataref_ptr
5273 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5274 simd_lane_access_p ? loop : NULL,
09dfa495 5275 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5276 simd_lane_access_p, &inv_p);
a70d6342 5277 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5278 }
b8698a0f 5279 else
ebfd146a 5280 {
b8698a0f
L
5281 /* For interleaved stores we created vectorized defs for all the
5282 defs stored in OPRNDS in the previous iteration (previous copy).
5283 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5284 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5285 next copy.
0d0293ac 5286 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5287 OPRNDS are of size 1. */
5288 for (i = 0; i < group_size; i++)
5289 {
9771b263 5290 op = oprnds[i];
24ee1384
IR
5291 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5292 &def, &dt);
b8698a0f 5293 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5294 dr_chain[i] = vec_oprnd;
5295 oprnds[i] = vec_oprnd;
ebfd146a 5296 }
74bf76ed
JJ
5297 if (dataref_offset)
5298 dataref_offset
5299 = int_const_binop (PLUS_EXPR, dataref_offset,
5300 TYPE_SIZE_UNIT (aggr_type));
5301 else
5302 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5303 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5304 }
5305
272c6793 5306 if (store_lanes_p)
ebfd146a 5307 {
272c6793 5308 tree vec_array;
267d3070 5309
272c6793
RS
5310 /* Combine all the vectors into an array. */
5311 vec_array = create_vector_array (vectype, vec_num);
5312 for (i = 0; i < vec_num; i++)
c2d7ab2a 5313 {
9771b263 5314 vec_oprnd = dr_chain[i];
272c6793 5315 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5316 }
b8698a0f 5317
272c6793
RS
5318 /* Emit:
5319 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5320 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5321 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5322 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5324 }
5325 else
5326 {
5327 new_stmt = NULL;
0d0293ac 5328 if (grouped_store)
272c6793 5329 {
b6b9227d
JJ
5330 if (j == 0)
5331 result_chain.create (group_size);
272c6793
RS
5332 /* Permute. */
5333 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5334 &result_chain);
5335 }
c2d7ab2a 5336
272c6793
RS
5337 next_stmt = first_stmt;
5338 for (i = 0; i < vec_num; i++)
5339 {
644ffefd 5340 unsigned align, misalign;
272c6793
RS
5341
5342 if (i > 0)
5343 /* Bump the vector pointer. */
5344 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5345 stmt, NULL_TREE);
5346
5347 if (slp)
9771b263 5348 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5349 else if (grouped_store)
5350 /* For grouped stores vectorized defs are interleaved in
272c6793 5351 vect_permute_store_chain(). */
9771b263 5352 vec_oprnd = result_chain[i];
272c6793
RS
5353
5354 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
5355 dataref_offset
5356 ? dataref_offset
5357 : build_int_cst (reference_alias_ptr_type
5358 (DR_REF (first_dr)), 0));
644ffefd 5359 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5360 if (aligned_access_p (first_dr))
644ffefd 5361 misalign = 0;
272c6793
RS
5362 else if (DR_MISALIGNMENT (first_dr) == -1)
5363 {
5364 TREE_TYPE (data_ref)
5365 = build_aligned_type (TREE_TYPE (data_ref),
5366 TYPE_ALIGN (elem_type));
644ffefd
MJ
5367 align = TYPE_ALIGN_UNIT (elem_type);
5368 misalign = 0;
272c6793
RS
5369 }
5370 else
5371 {
5372 TREE_TYPE (data_ref)
5373 = build_aligned_type (TREE_TYPE (data_ref),
5374 TYPE_ALIGN (elem_type));
644ffefd 5375 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5376 }
74bf76ed
JJ
5377 if (dataref_offset == NULL_TREE)
5378 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5379 misalign);
c2d7ab2a 5380
f234d260
BM
5381 if (negative
5382 && dt != vect_constant_def
5383 && dt != vect_external_def)
09dfa495
BM
5384 {
5385 tree perm_mask = perm_mask_for_reverse (vectype);
5386 tree perm_dest
5387 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5388 vectype);
5389 tree new_temp = make_ssa_name (perm_dest, NULL);
5390
5391 /* Generate the permute statement. */
5392 gimple perm_stmt
5393 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5394 vec_oprnd, vec_oprnd,
5395 perm_mask);
5396 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5397
5398 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5399 vec_oprnd = new_temp;
5400 }
5401
272c6793
RS
5402 /* Arguments are ready. Create the new vector stmt. */
5403 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5404 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5405
5406 if (slp)
5407 continue;
5408
e14c1050 5409 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5410 if (!next_stmt)
5411 break;
5412 }
ebfd146a 5413 }
1da0876c
RS
5414 if (!slp)
5415 {
5416 if (j == 0)
5417 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5418 else
5419 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5420 prev_stmt_info = vinfo_for_stmt (new_stmt);
5421 }
ebfd146a
IR
5422 }
5423
9771b263
DN
5424 dr_chain.release ();
5425 oprnds.release ();
5426 result_chain.release ();
5427 vec_oprnds.release ();
ebfd146a
IR
5428
5429 return true;
5430}
5431
aec7ae7d
JJ
5432/* Given a vector type VECTYPE and permutation SEL returns
5433 the VECTOR_CST mask that implements the permutation of the
5434 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 5435
3fcc1b55
JJ
5436tree
5437vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 5438{
d2a12ae7 5439 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5440 int i, nunits;
a1e53f3f 5441
22e4dee7 5442 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
5443
5444 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
5445 return NULL;
5446
96f9265a
RG
5447 mask_elt_type = lang_hooks.types.type_for_mode
5448 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5449 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5450
d2a12ae7 5451 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5452 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5453 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5454 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5455
2635892a 5456 return mask_vec;
a1e53f3f
L
5457}
5458
aec7ae7d
JJ
5459/* Given a vector variable X and Y, that was generated for the scalar
5460 STMT, generate instructions to permute the vector elements of X and Y
5461 using permutation mask MASK_VEC, insert them at *GSI and return the
5462 permuted vector variable. */
a1e53f3f
L
5463
5464static tree
aec7ae7d
JJ
5465permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5466 gimple_stmt_iterator *gsi)
a1e53f3f
L
5467{
5468 tree vectype = TREE_TYPE (x);
aec7ae7d 5469 tree perm_dest, data_ref;
a1e53f3f
L
5470 gimple perm_stmt;
5471
acdcd61b 5472 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
aec7ae7d 5473 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
5474
5475 /* Generate the permute statement. */
73804b12
RG
5476 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5477 x, y, mask_vec);
a1e53f3f
L
5478 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5479
5480 return data_ref;
5481}
5482
6b916b36
RB
5483/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5484 inserting them on the loops preheader edge. Returns true if we
5485 were successful in doing so (and thus STMT can be moved then),
5486 otherwise returns false. */
5487
5488static bool
5489hoist_defs_of_uses (gimple stmt, struct loop *loop)
5490{
5491 ssa_op_iter i;
5492 tree op;
5493 bool any = false;
5494
5495 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5496 {
5497 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5498 if (!gimple_nop_p (def_stmt)
5499 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5500 {
5501 /* Make sure we don't need to recurse. While we could do
5502 so in simple cases when there are more complex use webs
5503 we don't have an easy way to preserve stmt order to fulfil
5504 dependencies within them. */
5505 tree op2;
5506 ssa_op_iter i2;
5507 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5508 {
5509 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5510 if (!gimple_nop_p (def_stmt2)
5511 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5512 return false;
5513 }
5514 any = true;
5515 }
5516 }
5517
5518 if (!any)
5519 return true;
5520
5521 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5522 {
5523 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5524 if (!gimple_nop_p (def_stmt)
5525 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5526 {
5527 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5528 gsi_remove (&gsi, false);
5529 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5530 }
5531 }
5532
5533 return true;
5534}
5535
ebfd146a
IR
5536/* vectorizable_load.
5537
b8698a0f
L
5538 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5539 can be vectorized.
5540 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5541 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5542 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5543
5544static bool
5545vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5546 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5547{
5548 tree scalar_dest;
5549 tree vec_dest = NULL;
5550 tree data_ref = NULL;
5551 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5552 stmt_vec_info prev_stmt_info;
ebfd146a 5553 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5554 struct loop *loop = NULL;
ebfd146a 5555 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5556 bool nested_in_vect_loop = false;
c716e67f 5557 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5558 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5559 tree elem_type;
ebfd146a 5560 tree new_temp;
947131ba 5561 enum machine_mode mode;
ebfd146a
IR
5562 gimple new_stmt = NULL;
5563 tree dummy;
5564 enum dr_alignment_support alignment_support_scheme;
5565 tree dataref_ptr = NULL_TREE;
74bf76ed 5566 tree dataref_offset = NULL_TREE;
fef4d2b3 5567 gimple ptr_incr = NULL;
ebfd146a
IR
5568 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5569 int ncopies;
a64b9c26 5570 int i, j, group_size, group_gap;
ebfd146a
IR
5571 tree msq = NULL_TREE, lsq;
5572 tree offset = NULL_TREE;
5573 tree realignment_token = NULL_TREE;
5574 gimple phi = NULL;
6e1aa848 5575 vec<tree> dr_chain = vNULL;
0d0293ac 5576 bool grouped_load = false;
272c6793 5577 bool load_lanes_p = false;
ebfd146a 5578 gimple first_stmt;
ebfd146a 5579 bool inv_p;
319e6439 5580 bool negative = false;
ebfd146a
IR
5581 bool compute_in_loop = false;
5582 struct loop *at_loop;
5583 int vec_num;
5584 bool slp = (slp_node != NULL);
5585 bool slp_perm = false;
5586 enum tree_code code;
a70d6342
IR
5587 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5588 int vf;
272c6793 5589 tree aggr_type;
aec7ae7d
JJ
5590 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5591 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5592 int gather_scale = 1;
5593 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5594
5595 if (loop_vinfo)
5596 {
5597 loop = LOOP_VINFO_LOOP (loop_vinfo);
5598 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5599 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5600 }
5601 else
3533e503 5602 vf = 1;
ebfd146a
IR
5603
5604 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5605 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5606 case of SLP. */
437f4a00 5607 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5608 ncopies = 1;
5609 else
5610 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5611
5612 gcc_assert (ncopies >= 1);
5613
5614 /* FORNOW. This restriction should be relaxed. */
5615 if (nested_in_vect_loop && ncopies > 1)
5616 {
73fbfcad 5617 if (dump_enabled_p ())
78c60e3d 5618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5619 "multiple types in nested loop.\n");
ebfd146a
IR
5620 return false;
5621 }
5622
a70d6342 5623 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5624 return false;
5625
8644a673 5626 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5627 return false;
5628
5629 /* Is vectorizable load? */
5630 if (!is_gimple_assign (stmt))
5631 return false;
5632
5633 scalar_dest = gimple_assign_lhs (stmt);
5634 if (TREE_CODE (scalar_dest) != SSA_NAME)
5635 return false;
5636
5637 code = gimple_assign_rhs_code (stmt);
5638 if (code != ARRAY_REF
38000232 5639 && code != BIT_FIELD_REF
ebfd146a 5640 && code != INDIRECT_REF
e9dbe7bb
IR
5641 && code != COMPONENT_REF
5642 && code != IMAGPART_EXPR
70f34814 5643 && code != REALPART_EXPR
42373e0b
RG
5644 && code != MEM_REF
5645 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5646 return false;
5647
5648 if (!STMT_VINFO_DATA_REF (stmt_info))
5649 return false;
5650
7b7b1813 5651 elem_type = TREE_TYPE (vectype);
947131ba 5652 mode = TYPE_MODE (vectype);
ebfd146a
IR
5653
5654 /* FORNOW. In some cases can vectorize even if data-type not supported
5655 (e.g. - data copies). */
947131ba 5656 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5657 {
73fbfcad 5658 if (dump_enabled_p ())
78c60e3d 5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5660 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5661 return false;
5662 }
5663
ebfd146a 5664 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5665 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5666 {
0d0293ac 5667 grouped_load = true;
ebfd146a 5668 /* FORNOW */
aec7ae7d 5669 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5670
e14c1050 5671 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5672 if (!slp && !PURE_SLP_STMT (stmt_info))
5673 {
e14c1050 5674 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5675 if (vect_load_lanes_supported (vectype, group_size))
5676 load_lanes_p = true;
0d0293ac 5677 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5678 return false;
5679 }
ebfd146a
IR
5680 }
5681
a1e53f3f 5682
aec7ae7d
JJ
5683 if (STMT_VINFO_GATHER_P (stmt_info))
5684 {
5685 gimple def_stmt;
5686 tree def;
5687 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5688 &gather_off, &gather_scale);
5689 gcc_assert (gather_decl);
24ee1384 5690 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
5691 &def_stmt, &def, &gather_dt,
5692 &gather_off_vectype))
5693 {
73fbfcad 5694 if (dump_enabled_p ())
78c60e3d 5695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5696 "gather index use not simple.\n");
aec7ae7d
JJ
5697 return false;
5698 }
5699 }
7d75abc8 5700 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 5701 ;
319e6439
RG
5702 else
5703 {
5704 negative = tree_int_cst_compare (nested_in_vect_loop
5705 ? STMT_VINFO_DR_STEP (stmt_info)
5706 : DR_STEP (dr),
5707 size_zero_node) < 0;
5708 if (negative && ncopies > 1)
5709 {
73fbfcad 5710 if (dump_enabled_p ())
78c60e3d 5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5712 "multiple types with negative step.\n");
319e6439
RG
5713 return false;
5714 }
5715
5716 if (negative)
5717 {
08940f33
RB
5718 if (grouped_load)
5719 {
5720 if (dump_enabled_p ())
5721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5722 "negative step for group load not supported"
5723 "\n");
08940f33
RB
5724 return false;
5725 }
319e6439
RG
5726 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5727 if (alignment_support_scheme != dr_aligned
5728 && alignment_support_scheme != dr_unaligned_supported)
5729 {
73fbfcad 5730 if (dump_enabled_p ())
78c60e3d 5731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5732 "negative step but alignment required.\n");
319e6439
RG
5733 return false;
5734 }
5735 if (!perm_mask_for_reverse (vectype))
5736 {
73fbfcad 5737 if (dump_enabled_p ())
78c60e3d 5738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5739 "negative step and reversing not supported."
5740 "\n");
319e6439
RG
5741 return false;
5742 }
5743 }
7d75abc8 5744 }
aec7ae7d 5745
ebfd146a
IR
5746 if (!vec_stmt) /* transformation not required. */
5747 {
5748 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 5749 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
5750 return true;
5751 }
5752
73fbfcad 5753 if (dump_enabled_p ())
78c60e3d 5754 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5755 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
5756
5757 /** Transform. **/
5758
c716e67f
XDL
5759 ensure_base_align (stmt_info, dr);
5760
aec7ae7d
JJ
5761 if (STMT_VINFO_GATHER_P (stmt_info))
5762 {
5763 tree vec_oprnd0 = NULL_TREE, op;
5764 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5765 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 5766 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
5767 edge pe = loop_preheader_edge (loop);
5768 gimple_seq seq;
5769 basic_block new_bb;
5770 enum { NARROW, NONE, WIDEN } modifier;
5771 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5772
5773 if (nunits == gather_off_nunits)
5774 modifier = NONE;
5775 else if (nunits == gather_off_nunits / 2)
5776 {
5777 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5778 modifier = WIDEN;
5779
5780 for (i = 0; i < gather_off_nunits; ++i)
5781 sel[i] = i | nunits;
5782
3fcc1b55 5783 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
5784 gcc_assert (perm_mask != NULL_TREE);
5785 }
5786 else if (nunits == gather_off_nunits * 2)
5787 {
5788 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5789 modifier = NARROW;
5790
5791 for (i = 0; i < nunits; ++i)
5792 sel[i] = i < gather_off_nunits
5793 ? i : i + nunits - gather_off_nunits;
5794
3fcc1b55 5795 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
5796 gcc_assert (perm_mask != NULL_TREE);
5797 ncopies *= 2;
5798 }
5799 else
5800 gcc_unreachable ();
5801
5802 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5803 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5804 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5805 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5806 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5807 scaletype = TREE_VALUE (arglist);
d3c2fee0 5808 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
5809
5810 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5811
5812 ptr = fold_convert (ptrtype, gather_base);
5813 if (!is_gimple_min_invariant (ptr))
5814 {
5815 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5816 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5817 gcc_assert (!new_bb);
5818 }
5819
5820 /* Currently we support only unconditional gather loads,
5821 so mask should be all ones. */
d3c2fee0
AI
5822 if (TREE_CODE (masktype) == INTEGER_TYPE)
5823 mask = build_int_cst (masktype, -1);
5824 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5825 {
5826 mask = build_int_cst (TREE_TYPE (masktype), -1);
5827 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5828 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 5829 }
aec7ae7d
JJ
5830 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5831 {
5832 REAL_VALUE_TYPE r;
5833 long tmp[6];
5834 for (j = 0; j < 6; ++j)
5835 tmp[j] = -1;
5836 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5837 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 5838 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5839 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
5840 }
5841 else
5842 gcc_unreachable ();
aec7ae7d
JJ
5843
5844 scale = build_int_cst (scaletype, gather_scale);
5845
d3c2fee0
AI
5846 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5847 merge = build_int_cst (TREE_TYPE (rettype), 0);
5848 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5849 {
5850 REAL_VALUE_TYPE r;
5851 long tmp[6];
5852 for (j = 0; j < 6; ++j)
5853 tmp[j] = 0;
5854 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5855 merge = build_real (TREE_TYPE (rettype), r);
5856 }
5857 else
5858 gcc_unreachable ();
5859 merge = build_vector_from_val (rettype, merge);
5860 merge = vect_init_vector (stmt, merge, rettype, NULL);
5861
aec7ae7d
JJ
5862 prev_stmt_info = NULL;
5863 for (j = 0; j < ncopies; ++j)
5864 {
5865 if (modifier == WIDEN && (j & 1))
5866 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5867 perm_mask, stmt, gsi);
5868 else if (j == 0)
5869 op = vec_oprnd0
5870 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5871 else
5872 op = vec_oprnd0
5873 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5874
5875 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5876 {
5877 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5878 == TYPE_VECTOR_SUBPARTS (idxtype));
5879 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
5880 var = make_ssa_name (var, NULL);
5881 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5882 new_stmt
5883 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5884 op, NULL_TREE);
5885 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5886 op = var;
5887 }
5888
5889 new_stmt
d3c2fee0 5890 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
5891
5892 if (!useless_type_conversion_p (vectype, rettype))
5893 {
5894 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5895 == TYPE_VECTOR_SUBPARTS (rettype));
5896 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
5897 op = make_ssa_name (var, new_stmt);
5898 gimple_call_set_lhs (new_stmt, op);
5899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5900 var = make_ssa_name (vec_dest, NULL);
5901 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5902 new_stmt
5903 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5904 NULL_TREE);
5905 }
5906 else
5907 {
5908 var = make_ssa_name (vec_dest, new_stmt);
5909 gimple_call_set_lhs (new_stmt, var);
5910 }
5911
5912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5913
5914 if (modifier == NARROW)
5915 {
5916 if ((j & 1) == 0)
5917 {
5918 prev_res = var;
5919 continue;
5920 }
5921 var = permute_vec_elements (prev_res, var,
5922 perm_mask, stmt, gsi);
5923 new_stmt = SSA_NAME_DEF_STMT (var);
5924 }
5925
5926 if (prev_stmt_info == NULL)
5927 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5928 else
5929 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5930 prev_stmt_info = vinfo_for_stmt (new_stmt);
5931 }
5932 return true;
5933 }
7d75abc8
MM
5934 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5935 {
5936 gimple_stmt_iterator incr_gsi;
5937 bool insert_after;
5938 gimple incr;
5939 tree offvar;
7d75abc8
MM
5940 tree ivstep;
5941 tree running_off;
9771b263 5942 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 5943 gimple_seq stmts = NULL;
14ac6aa2
RB
5944 tree stride_base, stride_step, alias_off;
5945
5946 gcc_assert (!nested_in_vect_loop);
7d75abc8 5947
14ac6aa2
RB
5948 stride_base
5949 = fold_build_pointer_plus
5950 (unshare_expr (DR_BASE_ADDRESS (dr)),
5951 size_binop (PLUS_EXPR,
5952 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 5953 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 5954 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
5955
5956 /* For a load with loop-invariant (but other than power-of-2)
5957 stride (i.e. not a grouped access) like so:
5958
5959 for (i = 0; i < n; i += stride)
5960 ... = array[i];
5961
5962 we generate a new induction variable and new accesses to
5963 form a new vector (or vectors, depending on ncopies):
5964
5965 for (j = 0; ; j += VF*stride)
5966 tmp1 = array[j];
5967 tmp2 = array[j + stride];
5968 ...
5969 vectemp = {tmp1, tmp2, ...}
5970 */
5971
5972 ivstep = stride_step;
5973 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5974 build_int_cst (TREE_TYPE (ivstep), vf));
5975
5976 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5977
5978 create_iv (stride_base, ivstep, NULL,
5979 loop, &incr_gsi, insert_after,
5980 &offvar, NULL);
5981 incr = gsi_stmt (incr_gsi);
5982 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5983
5984 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5985 if (stmts)
5986 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5987
5988 prev_stmt_info = NULL;
5989 running_off = offvar;
14ac6aa2 5990 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
5991 for (j = 0; j < ncopies; j++)
5992 {
5993 tree vec_inv;
5994
9771b263 5995 vec_alloc (v, nunits);
7d75abc8
MM
5996 for (i = 0; i < nunits; i++)
5997 {
5998 tree newref, newoff;
5999 gimple incr;
14ac6aa2
RB
6000 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6001 running_off, alias_off);
7d75abc8
MM
6002
6003 newref = force_gimple_operand_gsi (gsi, newref, true,
6004 NULL_TREE, true,
6005 GSI_SAME_STMT);
6006 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 6007 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
6008 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6009 running_off, stride_step);
7d75abc8
MM
6010 vect_finish_stmt_generation (stmt, incr, gsi);
6011
6012 running_off = newoff;
6013 }
6014
6015 vec_inv = build_constructor (vectype, v);
6016 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6017 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
6018
6019 if (j == 0)
6020 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6021 else
6022 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6023 prev_stmt_info = vinfo_for_stmt (new_stmt);
6024 }
6025 return true;
6026 }
aec7ae7d 6027
0d0293ac 6028 if (grouped_load)
ebfd146a 6029 {
e14c1050 6030 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 6031 if (slp
01d8bf07 6032 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
6033 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6034 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6035
ebfd146a 6036 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6037 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6038 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6039 ??? But we can only do so if there is exactly one
6040 as we have no way to get at the rest. Leave the CSE
6041 opportunity alone.
6042 ??? With the group load eventually participating
6043 in multiple different permutations (having multiple
6044 slp nodes which refer to the same group) the CSE
6045 is even wrong code. See PR56270. */
6046 && !slp)
ebfd146a
IR
6047 {
6048 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6049 return true;
6050 }
6051 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6052 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
6053
6054 /* VEC_NUM is the number of vect stmts to be created for this group. */
6055 if (slp)
6056 {
0d0293ac 6057 grouped_load = false;
ebfd146a 6058 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 6059 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 6060 slp_perm = true;
a64b9c26 6061 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 6062 }
ebfd146a 6063 else
a64b9c26
RB
6064 {
6065 vec_num = group_size;
6066 group_gap = 0;
6067 }
ebfd146a
IR
6068 }
6069 else
6070 {
6071 first_stmt = stmt;
6072 first_dr = dr;
6073 group_size = vec_num = 1;
a64b9c26 6074 group_gap = 0;
ebfd146a
IR
6075 }
6076
720f5239 6077 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6078 gcc_assert (alignment_support_scheme);
272c6793
RS
6079 /* Targets with load-lane instructions must not require explicit
6080 realignment. */
6081 gcc_assert (!load_lanes_p
6082 || alignment_support_scheme == dr_aligned
6083 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6084
6085 /* In case the vectorization factor (VF) is bigger than the number
6086 of elements that we can fit in a vectype (nunits), we have to generate
6087 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6088 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6089 from one copy of the vector stmt to the next, in the field
ff802fa1 6090 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6091 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6092 stmts that use the defs of the current stmt. The example below
6093 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6094 need to create 4 vectorized stmts):
ebfd146a
IR
6095
6096 before vectorization:
6097 RELATED_STMT VEC_STMT
6098 S1: x = memref - -
6099 S2: z = x + 1 - -
6100
6101 step 1: vectorize stmt S1:
6102 We first create the vector stmt VS1_0, and, as usual, record a
6103 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6104 Next, we create the vector stmt VS1_1, and record a pointer to
6105 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6106 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6107 stmts and pointers:
6108 RELATED_STMT VEC_STMT
6109 VS1_0: vx0 = memref0 VS1_1 -
6110 VS1_1: vx1 = memref1 VS1_2 -
6111 VS1_2: vx2 = memref2 VS1_3 -
6112 VS1_3: vx3 = memref3 - -
6113 S1: x = load - VS1_0
6114 S2: z = x + 1 - -
6115
b8698a0f
L
6116 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6117 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6118 stmt S2. */
6119
0d0293ac 6120 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6121
6122 S1: x2 = &base + 2
6123 S2: x0 = &base
6124 S3: x1 = &base + 1
6125 S4: x3 = &base + 3
6126
b8698a0f 6127 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6128 starting from the access of the first stmt of the chain:
6129
6130 VS1: vx0 = &base
6131 VS2: vx1 = &base + vec_size*1
6132 VS3: vx3 = &base + vec_size*2
6133 VS4: vx4 = &base + vec_size*3
6134
6135 Then permutation statements are generated:
6136
e2c83630
RH
6137 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6138 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6139 ...
6140
6141 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6142 (the order of the data-refs in the output of vect_permute_load_chain
6143 corresponds to the order of scalar stmts in the interleaving chain - see
6144 the documentation of vect_permute_load_chain()).
6145 The generation of permutation stmts and recording them in
0d0293ac 6146 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6147
b8698a0f 6148 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6149 permutation stmts above are created for every copy. The result vector
6150 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6151 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6152
6153 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6154 on a target that supports unaligned accesses (dr_unaligned_supported)
6155 we generate the following code:
6156 p = initial_addr;
6157 indx = 0;
6158 loop {
6159 p = p + indx * vectype_size;
6160 vec_dest = *(p);
6161 indx = indx + 1;
6162 }
6163
6164 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6165 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6166 then generate the following code, in which the data in each iteration is
6167 obtained by two vector loads, one from the previous iteration, and one
6168 from the current iteration:
6169 p1 = initial_addr;
6170 msq_init = *(floor(p1))
6171 p2 = initial_addr + VS - 1;
6172 realignment_token = call target_builtin;
6173 indx = 0;
6174 loop {
6175 p2 = p2 + indx * vectype_size
6176 lsq = *(floor(p2))
6177 vec_dest = realign_load (msq, lsq, realignment_token)
6178 indx = indx + 1;
6179 msq = lsq;
6180 } */
6181
6182 /* If the misalignment remains the same throughout the execution of the
6183 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6184 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6185 This can only occur when vectorizing memory accesses in the inner-loop
6186 nested within an outer-loop that is being vectorized. */
6187
d1e4b493 6188 if (nested_in_vect_loop
211bea38 6189 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6190 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6191 {
6192 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6193 compute_in_loop = true;
6194 }
6195
6196 if ((alignment_support_scheme == dr_explicit_realign_optimized
6197 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6198 && !compute_in_loop)
ebfd146a
IR
6199 {
6200 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6201 alignment_support_scheme, NULL_TREE,
6202 &at_loop);
6203 if (alignment_support_scheme == dr_explicit_realign_optimized)
6204 {
6205 phi = SSA_NAME_DEF_STMT (msq);
6206 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6207 }
6208 }
6209 else
6210 at_loop = loop;
6211
a1e53f3f
L
6212 if (negative)
6213 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6214
272c6793
RS
6215 if (load_lanes_p)
6216 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6217 else
6218 aggr_type = vectype;
6219
ebfd146a
IR
6220 prev_stmt_info = NULL;
6221 for (j = 0; j < ncopies; j++)
b8698a0f 6222 {
272c6793 6223 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6224 if (j == 0)
74bf76ed
JJ
6225 {
6226 bool simd_lane_access_p
6227 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6228 if (simd_lane_access_p
6229 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6230 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6231 && integer_zerop (DR_OFFSET (first_dr))
6232 && integer_zerop (DR_INIT (first_dr))
6233 && alias_sets_conflict_p (get_alias_set (aggr_type),
6234 get_alias_set (DR_REF (first_dr)))
6235 && (alignment_support_scheme == dr_aligned
6236 || alignment_support_scheme == dr_unaligned_supported))
6237 {
6238 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6239 dataref_offset = build_int_cst (reference_alias_ptr_type
6240 (DR_REF (first_dr)), 0);
8928eff3 6241 inv_p = false;
74bf76ed
JJ
6242 }
6243 else
6244 dataref_ptr
6245 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6246 offset, &dummy, gsi, &ptr_incr,
6247 simd_lane_access_p, &inv_p);
6248 }
6249 else if (dataref_offset)
6250 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6251 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6252 else
272c6793
RS
6253 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6254 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6255
0d0293ac 6256 if (grouped_load || slp_perm)
9771b263 6257 dr_chain.create (vec_num);
5ce1ee7f 6258
272c6793 6259 if (load_lanes_p)
ebfd146a 6260 {
272c6793
RS
6261 tree vec_array;
6262
6263 vec_array = create_vector_array (vectype, vec_num);
6264
6265 /* Emit:
6266 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6267 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6268 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6269 gimple_call_set_lhs (new_stmt, vec_array);
6270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6271
272c6793
RS
6272 /* Extract each vector into an SSA_NAME. */
6273 for (i = 0; i < vec_num; i++)
ebfd146a 6274 {
272c6793
RS
6275 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6276 vec_array, i);
9771b263 6277 dr_chain.quick_push (new_temp);
272c6793
RS
6278 }
6279
6280 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6281 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6282 }
6283 else
6284 {
6285 for (i = 0; i < vec_num; i++)
6286 {
6287 if (i > 0)
6288 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6289 stmt, NULL_TREE);
6290
6291 /* 2. Create the vector-load in the loop. */
6292 switch (alignment_support_scheme)
6293 {
6294 case dr_aligned:
6295 case dr_unaligned_supported:
be1ac4ec 6296 {
644ffefd
MJ
6297 unsigned int align, misalign;
6298
272c6793
RS
6299 data_ref
6300 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
6301 dataref_offset
6302 ? dataref_offset
6303 : build_int_cst (reference_alias_ptr_type
6304 (DR_REF (first_dr)), 0));
644ffefd 6305 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6306 if (alignment_support_scheme == dr_aligned)
6307 {
6308 gcc_assert (aligned_access_p (first_dr));
644ffefd 6309 misalign = 0;
272c6793
RS
6310 }
6311 else if (DR_MISALIGNMENT (first_dr) == -1)
6312 {
6313 TREE_TYPE (data_ref)
6314 = build_aligned_type (TREE_TYPE (data_ref),
6315 TYPE_ALIGN (elem_type));
644ffefd
MJ
6316 align = TYPE_ALIGN_UNIT (elem_type);
6317 misalign = 0;
272c6793
RS
6318 }
6319 else
6320 {
6321 TREE_TYPE (data_ref)
6322 = build_aligned_type (TREE_TYPE (data_ref),
6323 TYPE_ALIGN (elem_type));
644ffefd 6324 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6325 }
74bf76ed
JJ
6326 if (dataref_offset == NULL_TREE)
6327 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6328 align, misalign);
272c6793 6329 break;
be1ac4ec 6330 }
272c6793 6331 case dr_explicit_realign:
267d3070 6332 {
272c6793
RS
6333 tree ptr, bump;
6334 tree vs_minus_1;
6335
6336 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6337
6338 if (compute_in_loop)
6339 msq = vect_setup_realignment (first_stmt, gsi,
6340 &realignment_token,
6341 dr_explicit_realign,
6342 dataref_ptr, NULL);
6343
070ecdfd 6344 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 6345 new_stmt = gimple_build_assign_with_ops
070ecdfd 6346 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
6347 build_int_cst
6348 (TREE_TYPE (dataref_ptr),
6349 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6350 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6351 data_ref
6352 = build2 (MEM_REF, vectype, ptr,
6353 build_int_cst (reference_alias_ptr_type
6354 (DR_REF (first_dr)), 0));
6355 vec_dest = vect_create_destination_var (scalar_dest,
6356 vectype);
6357 new_stmt = gimple_build_assign (vec_dest, data_ref);
6358 new_temp = make_ssa_name (vec_dest, new_stmt);
6359 gimple_assign_set_lhs (new_stmt, new_temp);
6360 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6361 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6363 msq = new_temp;
6364
6365 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 6366 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
6367 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6368 new_stmt = gimple_build_assign_with_ops
6369 (BIT_AND_EXPR, NULL_TREE, ptr,
6370 build_int_cst
6371 (TREE_TYPE (ptr),
6372 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 6373 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
6374 gimple_assign_set_lhs (new_stmt, ptr);
6375 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6376 data_ref
6377 = build2 (MEM_REF, vectype, ptr,
6378 build_int_cst (reference_alias_ptr_type
6379 (DR_REF (first_dr)), 0));
6380 break;
267d3070 6381 }
272c6793 6382 case dr_explicit_realign_optimized:
070ecdfd 6383 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 6384 new_stmt = gimple_build_assign_with_ops
070ecdfd 6385 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
6386 build_int_cst
6387 (TREE_TYPE (dataref_ptr),
6388 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6389 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6390 data_ref
6391 = build2 (MEM_REF, vectype, new_temp,
6392 build_int_cst (reference_alias_ptr_type
6393 (DR_REF (first_dr)), 0));
6394 break;
6395 default:
6396 gcc_unreachable ();
6397 }
ebfd146a 6398 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6399 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6400 new_temp = make_ssa_name (vec_dest, new_stmt);
6401 gimple_assign_set_lhs (new_stmt, new_temp);
6402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6403
272c6793
RS
6404 /* 3. Handle explicit realignment if necessary/supported.
6405 Create in loop:
6406 vec_dest = realign_load (msq, lsq, realignment_token) */
6407 if (alignment_support_scheme == dr_explicit_realign_optimized
6408 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6409 {
272c6793
RS
6410 lsq = gimple_assign_lhs (new_stmt);
6411 if (!realignment_token)
6412 realignment_token = dataref_ptr;
6413 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6414 new_stmt
73804b12
RG
6415 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6416 vec_dest, msq, lsq,
6417 realignment_token);
272c6793
RS
6418 new_temp = make_ssa_name (vec_dest, new_stmt);
6419 gimple_assign_set_lhs (new_stmt, new_temp);
6420 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6421
6422 if (alignment_support_scheme == dr_explicit_realign_optimized)
6423 {
6424 gcc_assert (phi);
6425 if (i == vec_num - 1 && j == ncopies - 1)
6426 add_phi_arg (phi, lsq,
6427 loop_latch_edge (containing_loop),
9e227d60 6428 UNKNOWN_LOCATION);
272c6793
RS
6429 msq = lsq;
6430 }
ebfd146a 6431 }
ebfd146a 6432
59fd17e3
RB
6433 /* 4. Handle invariant-load. */
6434 if (inv_p && !bb_vinfo)
6435 {
59fd17e3 6436 gcc_assert (!grouped_load);
a0e35eb0
RB
6437 /* If we have versioned for aliasing then we are sure
6438 this is a loop invariant load and thus we can insert
6439 it on the preheader edge. */
6b916b36
RB
6440 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
6441 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
6442 {
6443 if (dump_enabled_p ())
6444 {
6445 dump_printf_loc (MSG_NOTE, vect_location,
6446 "hoisting out of the vectorized "
6447 "loop: ");
6448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6449 dump_printf (MSG_NOTE, "\n");
6450 }
6451 tree tem = copy_ssa_name (scalar_dest, NULL);
6452 gsi_insert_on_edge_immediate
6453 (loop_preheader_edge (loop),
6454 gimple_build_assign (tem,
6455 unshare_expr
6456 (gimple_assign_rhs1 (stmt))));
6457 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6458 }
6459 else
6460 {
6461 gimple_stmt_iterator gsi2 = *gsi;
6462 gsi_next (&gsi2);
6463 new_temp = vect_init_vector (stmt, scalar_dest,
6464 vectype, &gsi2);
6465 }
59fd17e3 6466 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0
RB
6467 set_vinfo_for_stmt (new_stmt,
6468 new_stmt_vec_info (new_stmt, loop_vinfo,
6469 bb_vinfo));
59fd17e3
RB
6470 }
6471
272c6793
RS
6472 if (negative)
6473 {
aec7ae7d
JJ
6474 tree perm_mask = perm_mask_for_reverse (vectype);
6475 new_temp = permute_vec_elements (new_temp, new_temp,
6476 perm_mask, stmt, gsi);
ebfd146a
IR
6477 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6478 }
267d3070 6479
272c6793 6480 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6481 vect_transform_grouped_load (). */
6482 if (grouped_load || slp_perm)
9771b263 6483 dr_chain.quick_push (new_temp);
267d3070 6484
272c6793
RS
6485 /* Store vector loads in the corresponding SLP_NODE. */
6486 if (slp && !slp_perm)
9771b263 6487 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6488 }
a64b9c26
RB
6489 /* Bump the vector pointer to account for a gap. */
6490 if (slp && group_gap != 0)
6491 {
6492 tree bump = size_binop (MULT_EXPR,
6493 TYPE_SIZE_UNIT (elem_type),
6494 size_int (group_gap));
6495 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6496 stmt, bump);
6497 }
ebfd146a
IR
6498 }
6499
6500 if (slp && !slp_perm)
6501 continue;
6502
6503 if (slp_perm)
6504 {
01d8bf07 6505 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6506 slp_node_instance, false))
6507 {
9771b263 6508 dr_chain.release ();
ebfd146a
IR
6509 return false;
6510 }
6511 }
6512 else
6513 {
0d0293ac 6514 if (grouped_load)
ebfd146a 6515 {
272c6793 6516 if (!load_lanes_p)
0d0293ac 6517 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6518 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6519 }
6520 else
6521 {
6522 if (j == 0)
6523 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6524 else
6525 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6526 prev_stmt_info = vinfo_for_stmt (new_stmt);
6527 }
6528 }
9771b263 6529 dr_chain.release ();
ebfd146a
IR
6530 }
6531
ebfd146a
IR
6532 return true;
6533}
6534
6535/* Function vect_is_simple_cond.
b8698a0f 6536
ebfd146a
IR
6537 Input:
6538 LOOP - the loop that is being vectorized.
6539 COND - Condition that is checked for simple use.
6540
e9e1d143
RG
6541 Output:
6542 *COMP_VECTYPE - the vector type for the comparison.
6543
ebfd146a
IR
6544 Returns whether a COND can be vectorized. Checks whether
6545 condition operands are supportable using vec_is_simple_use. */
6546
87aab9b2 6547static bool
24ee1384
IR
6548vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6549 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6550{
6551 tree lhs, rhs;
6552 tree def;
6553 enum vect_def_type dt;
e9e1d143 6554 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6555
6556 if (!COMPARISON_CLASS_P (cond))
6557 return false;
6558
6559 lhs = TREE_OPERAND (cond, 0);
6560 rhs = TREE_OPERAND (cond, 1);
6561
6562 if (TREE_CODE (lhs) == SSA_NAME)
6563 {
6564 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6565 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6566 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6567 return false;
6568 }
6569 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6570 && TREE_CODE (lhs) != FIXED_CST)
6571 return false;
6572
6573 if (TREE_CODE (rhs) == SSA_NAME)
6574 {
6575 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6576 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6577 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6578 return false;
6579 }
f7e531cf 6580 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6581 && TREE_CODE (rhs) != FIXED_CST)
6582 return false;
6583
e9e1d143 6584 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6585 return true;
6586}
6587
6588/* vectorizable_condition.
6589
b8698a0f
L
6590 Check if STMT is conditional modify expression that can be vectorized.
6591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6592 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6593 at GSI.
6594
6595 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6596 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6597 else caluse if it is 2).
ebfd146a
IR
6598
6599 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6600
4bbe8262 6601bool
ebfd146a 6602vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
6603 gimple *vec_stmt, tree reduc_def, int reduc_index,
6604 slp_tree slp_node)
ebfd146a
IR
6605{
6606 tree scalar_dest = NULL_TREE;
6607 tree vec_dest = NULL_TREE;
ebfd146a
IR
6608 tree cond_expr, then_clause, else_clause;
6609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 6611 tree comp_vectype = NULL_TREE;
ff802fa1
IR
6612 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6613 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
6614 tree vec_compare, vec_cond_expr;
6615 tree new_temp;
6616 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 6617 tree def;
a855b1b1 6618 enum vect_def_type dt, dts[4];
ebfd146a 6619 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 6620 int ncopies;
ebfd146a 6621 enum tree_code code;
a855b1b1 6622 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
6623 int i, j;
6624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
6625 vec<tree> vec_oprnds0 = vNULL;
6626 vec<tree> vec_oprnds1 = vNULL;
6627 vec<tree> vec_oprnds2 = vNULL;
6628 vec<tree> vec_oprnds3 = vNULL;
74946978 6629 tree vec_cmp_type;
b8698a0f 6630
f7e531cf
IR
6631 if (slp_node || PURE_SLP_STMT (stmt_info))
6632 ncopies = 1;
6633 else
6634 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 6635
ebfd146a 6636 gcc_assert (ncopies >= 1);
a855b1b1 6637 if (reduc_index && ncopies > 1)
ebfd146a
IR
6638 return false; /* FORNOW */
6639
f7e531cf
IR
6640 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6641 return false;
6642
6643 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6644 return false;
6645
4bbe8262
IR
6646 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6647 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6648 && reduc_def))
ebfd146a
IR
6649 return false;
6650
ebfd146a 6651 /* FORNOW: not yet supported. */
b8698a0f 6652 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6653 {
73fbfcad 6654 if (dump_enabled_p ())
78c60e3d 6655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6656 "value used after loop.\n");
ebfd146a
IR
6657 return false;
6658 }
6659
6660 /* Is vectorizable conditional operation? */
6661 if (!is_gimple_assign (stmt))
6662 return false;
6663
6664 code = gimple_assign_rhs_code (stmt);
6665
6666 if (code != COND_EXPR)
6667 return false;
6668
4e71066d
RG
6669 cond_expr = gimple_assign_rhs1 (stmt);
6670 then_clause = gimple_assign_rhs2 (stmt);
6671 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 6672
24ee1384
IR
6673 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6674 &comp_vectype)
e9e1d143 6675 || !comp_vectype)
ebfd146a
IR
6676 return false;
6677
6678 if (TREE_CODE (then_clause) == SSA_NAME)
6679 {
6680 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 6681 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6682 &then_def_stmt, &def, &dt))
6683 return false;
6684 }
b8698a0f 6685 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
6686 && TREE_CODE (then_clause) != REAL_CST
6687 && TREE_CODE (then_clause) != FIXED_CST)
6688 return false;
6689
6690 if (TREE_CODE (else_clause) == SSA_NAME)
6691 {
6692 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 6693 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6694 &else_def_stmt, &def, &dt))
6695 return false;
6696 }
b8698a0f 6697 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
6698 && TREE_CODE (else_clause) != REAL_CST
6699 && TREE_CODE (else_clause) != FIXED_CST)
6700 return false;
6701
74946978
MP
6702 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6703 /* The result of a vector comparison should be signed type. */
6704 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6705 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6706 if (vec_cmp_type == NULL_TREE)
6707 return false;
784fb9b3 6708
b8698a0f 6709 if (!vec_stmt)
ebfd146a
IR
6710 {
6711 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 6712 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
6713 }
6714
f7e531cf
IR
6715 /* Transform. */
6716
6717 if (!slp_node)
6718 {
9771b263
DN
6719 vec_oprnds0.create (1);
6720 vec_oprnds1.create (1);
6721 vec_oprnds2.create (1);
6722 vec_oprnds3.create (1);
f7e531cf 6723 }
ebfd146a
IR
6724
6725 /* Handle def. */
6726 scalar_dest = gimple_assign_lhs (stmt);
6727 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6728
6729 /* Handle cond expr. */
a855b1b1
MM
6730 for (j = 0; j < ncopies; j++)
6731 {
f7e531cf 6732 gimple new_stmt = NULL;
a855b1b1
MM
6733 if (j == 0)
6734 {
f7e531cf
IR
6735 if (slp_node)
6736 {
00f96dc9
TS
6737 auto_vec<tree, 4> ops;
6738 auto_vec<vec<tree>, 4> vec_defs;
9771b263 6739
9771b263
DN
6740 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6741 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6742 ops.safe_push (then_clause);
6743 ops.safe_push (else_clause);
f7e531cf 6744 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
6745 vec_oprnds3 = vec_defs.pop ();
6746 vec_oprnds2 = vec_defs.pop ();
6747 vec_oprnds1 = vec_defs.pop ();
6748 vec_oprnds0 = vec_defs.pop ();
f7e531cf 6749
9771b263
DN
6750 ops.release ();
6751 vec_defs.release ();
f7e531cf
IR
6752 }
6753 else
6754 {
6755 gimple gtemp;
6756 vec_cond_lhs =
a855b1b1
MM
6757 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6758 stmt, NULL);
24ee1384
IR
6759 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6760 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
6761
6762 vec_cond_rhs =
6763 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6764 stmt, NULL);
24ee1384
IR
6765 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6766 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
6767 if (reduc_index == 1)
6768 vec_then_clause = reduc_def;
6769 else
6770 {
6771 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6772 stmt, NULL);
24ee1384 6773 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
6774 NULL, &gtemp, &def, &dts[2]);
6775 }
6776 if (reduc_index == 2)
6777 vec_else_clause = reduc_def;
6778 else
6779 {
6780 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 6781 stmt, NULL);
24ee1384 6782 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 6783 NULL, &gtemp, &def, &dts[3]);
f7e531cf 6784 }
a855b1b1
MM
6785 }
6786 }
6787 else
6788 {
f7e531cf 6789 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 6790 vec_oprnds0.pop ());
f7e531cf 6791 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 6792 vec_oprnds1.pop ());
a855b1b1 6793 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 6794 vec_oprnds2.pop ());
a855b1b1 6795 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 6796 vec_oprnds3.pop ());
f7e531cf
IR
6797 }
6798
6799 if (!slp_node)
6800 {
9771b263
DN
6801 vec_oprnds0.quick_push (vec_cond_lhs);
6802 vec_oprnds1.quick_push (vec_cond_rhs);
6803 vec_oprnds2.quick_push (vec_then_clause);
6804 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
6805 }
6806
9dc3f7de 6807 /* Arguments are ready. Create the new vector stmt. */
9771b263 6808 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 6809 {
9771b263
DN
6810 vec_cond_rhs = vec_oprnds1[i];
6811 vec_then_clause = vec_oprnds2[i];
6812 vec_else_clause = vec_oprnds3[i];
a855b1b1 6813
784fb9b3
JJ
6814 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6815 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
6816 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6817 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 6818
f7e531cf
IR
6819 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6820 new_temp = make_ssa_name (vec_dest, new_stmt);
6821 gimple_assign_set_lhs (new_stmt, new_temp);
6822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6823 if (slp_node)
9771b263 6824 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
6825 }
6826
6827 if (slp_node)
6828 continue;
6829
6830 if (j == 0)
6831 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6832 else
6833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6834
6835 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 6836 }
b8698a0f 6837
9771b263
DN
6838 vec_oprnds0.release ();
6839 vec_oprnds1.release ();
6840 vec_oprnds2.release ();
6841 vec_oprnds3.release ();
f7e531cf 6842
ebfd146a
IR
6843 return true;
6844}
6845
6846
8644a673 6847/* Make sure the statement is vectorizable. */
ebfd146a
IR
6848
6849bool
a70d6342 6850vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 6851{
8644a673 6852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 6853 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 6854 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 6855 bool ok;
a70d6342 6856 tree scalar_type, vectype;
363477c0
JJ
6857 gimple pattern_stmt;
6858 gimple_seq pattern_def_seq;
ebfd146a 6859
73fbfcad 6860 if (dump_enabled_p ())
ebfd146a 6861 {
78c60e3d
SS
6862 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6863 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6864 dump_printf (MSG_NOTE, "\n");
8644a673 6865 }
ebfd146a 6866
1825a1f3 6867 if (gimple_has_volatile_ops (stmt))
b8698a0f 6868 {
73fbfcad 6869 if (dump_enabled_p ())
78c60e3d 6870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6871 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
6872
6873 return false;
6874 }
b8698a0f
L
6875
6876 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
6877 to include:
6878 - the COND_EXPR which is the loop exit condition
6879 - any LABEL_EXPRs in the loop
b8698a0f 6880 - computations that are used only for array indexing or loop control.
8644a673 6881 In basic blocks we only analyze statements that are a part of some SLP
83197f37 6882 instance, therefore, all the statements are relevant.
ebfd146a 6883
d092494c 6884 Pattern statement needs to be analyzed instead of the original statement
83197f37 6885 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
6886 statements. In basic blocks we are called from some SLP instance
6887 traversal, don't analyze pattern stmts instead, the pattern stmts
6888 already will be part of SLP instance. */
83197f37
IR
6889
6890 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 6891 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 6892 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6893 {
9d5e7640 6894 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 6895 && pattern_stmt
9d5e7640
IR
6896 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6897 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6898 {
83197f37 6899 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
6900 stmt = pattern_stmt;
6901 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 6902 if (dump_enabled_p ())
9d5e7640 6903 {
78c60e3d
SS
6904 dump_printf_loc (MSG_NOTE, vect_location,
6905 "==> examining pattern statement: ");
6906 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6907 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
6908 }
6909 }
6910 else
6911 {
73fbfcad 6912 if (dump_enabled_p ())
e645e942 6913 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 6914
9d5e7640
IR
6915 return true;
6916 }
8644a673 6917 }
83197f37 6918 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 6919 && node == NULL
83197f37
IR
6920 && pattern_stmt
6921 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6922 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6923 {
6924 /* Analyze PATTERN_STMT too. */
73fbfcad 6925 if (dump_enabled_p ())
83197f37 6926 {
78c60e3d
SS
6927 dump_printf_loc (MSG_NOTE, vect_location,
6928 "==> examining pattern statement: ");
6929 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6930 dump_printf (MSG_NOTE, "\n");
83197f37
IR
6931 }
6932
6933 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6934 return false;
6935 }
ebfd146a 6936
1107f3ae 6937 if (is_pattern_stmt_p (stmt_info)
079c527f 6938 && node == NULL
363477c0 6939 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 6940 {
363477c0 6941 gimple_stmt_iterator si;
1107f3ae 6942
363477c0
JJ
6943 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
6944 {
6945 gimple pattern_def_stmt = gsi_stmt (si);
6946 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
6947 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
6948 {
6949 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 6950 if (dump_enabled_p ())
363477c0 6951 {
78c60e3d
SS
6952 dump_printf_loc (MSG_NOTE, vect_location,
6953 "==> examining pattern def statement: ");
6954 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 6955 dump_printf (MSG_NOTE, "\n");
363477c0 6956 }
1107f3ae 6957
363477c0
JJ
6958 if (!vect_analyze_stmt (pattern_def_stmt,
6959 need_to_vectorize, node))
6960 return false;
6961 }
6962 }
6963 }
1107f3ae 6964
8644a673
IR
6965 switch (STMT_VINFO_DEF_TYPE (stmt_info))
6966 {
6967 case vect_internal_def:
6968 break;
ebfd146a 6969
8644a673 6970 case vect_reduction_def:
7c5222ff 6971 case vect_nested_cycle:
a70d6342 6972 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 6973 || relevance == vect_used_in_outer_by_reduction
a70d6342 6974 || relevance == vect_unused_in_scope));
8644a673
IR
6975 break;
6976
6977 case vect_induction_def:
6978 case vect_constant_def:
6979 case vect_external_def:
6980 case vect_unknown_def_type:
6981 default:
6982 gcc_unreachable ();
6983 }
ebfd146a 6984
a70d6342
IR
6985 if (bb_vinfo)
6986 {
6987 gcc_assert (PURE_SLP_STMT (stmt_info));
6988
b690cc0f 6989 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 6990 if (dump_enabled_p ())
a70d6342 6991 {
78c60e3d
SS
6992 dump_printf_loc (MSG_NOTE, vect_location,
6993 "get vectype for scalar type: ");
6994 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 6995 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
6996 }
6997
6998 vectype = get_vectype_for_scalar_type (scalar_type);
6999 if (!vectype)
7000 {
73fbfcad 7001 if (dump_enabled_p ())
a70d6342 7002 {
78c60e3d
SS
7003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7004 "not SLPed: unsupported data-type ");
7005 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7006 scalar_type);
e645e942 7007 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
7008 }
7009 return false;
7010 }
7011
73fbfcad 7012 if (dump_enabled_p ())
a70d6342 7013 {
78c60e3d
SS
7014 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7015 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 7016 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7017 }
7018
7019 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7020 }
7021
8644a673 7022 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 7023 {
8644a673 7024 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
7025 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7026 || (is_gimple_call (stmt)
7027 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 7028 *need_to_vectorize = true;
ebfd146a
IR
7029 }
7030
8644a673 7031 ok = true;
b8698a0f 7032 if (!bb_vinfo
a70d6342
IR
7033 && (STMT_VINFO_RELEVANT_P (stmt_info)
7034 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
0136f8f0
AH
7035 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7036 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 7037 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
7038 || vectorizable_operation (stmt, NULL, NULL, NULL)
7039 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7040 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 7041 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 7042 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 7043 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 7044 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
7045 else
7046 {
7047 if (bb_vinfo)
0136f8f0
AH
7048 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7049 || vectorizable_conversion (stmt, NULL, NULL, node)
4a00c761 7050 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 7051 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
7052 || vectorizable_assignment (stmt, NULL, NULL, node)
7053 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 7054 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
7055 || vectorizable_store (stmt, NULL, NULL, node)
7056 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 7057 }
8644a673
IR
7058
7059 if (!ok)
ebfd146a 7060 {
73fbfcad 7061 if (dump_enabled_p ())
8644a673 7062 {
78c60e3d
SS
7063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7064 "not vectorized: relevant stmt not ");
7065 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7066 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 7067 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 7068 }
b8698a0f 7069
ebfd146a
IR
7070 return false;
7071 }
7072
a70d6342
IR
7073 if (bb_vinfo)
7074 return true;
7075
8644a673
IR
7076 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7077 need extra handling, except for vectorizable reductions. */
7078 if (STMT_VINFO_LIVE_P (stmt_info)
7079 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7080 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 7081
8644a673 7082 if (!ok)
ebfd146a 7083 {
73fbfcad 7084 if (dump_enabled_p ())
8644a673 7085 {
78c60e3d
SS
7086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7087 "not vectorized: live stmt not ");
7088 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7089 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 7090 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 7091 }
b8698a0f 7092
8644a673 7093 return false;
ebfd146a
IR
7094 }
7095
ebfd146a
IR
7096 return true;
7097}
7098
7099
7100/* Function vect_transform_stmt.
7101
7102 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7103
7104bool
7105vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 7106 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
7107 slp_instance slp_node_instance)
7108{
7109 bool is_store = false;
7110 gimple vec_stmt = NULL;
7111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 7112 bool done;
ebfd146a
IR
7113
7114 switch (STMT_VINFO_TYPE (stmt_info))
7115 {
7116 case type_demotion_vec_info_type:
ebfd146a 7117 case type_promotion_vec_info_type:
ebfd146a
IR
7118 case type_conversion_vec_info_type:
7119 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7120 gcc_assert (done);
7121 break;
7122
7123 case induc_vec_info_type:
7124 gcc_assert (!slp_node);
7125 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7126 gcc_assert (done);
7127 break;
7128
9dc3f7de
IR
7129 case shift_vec_info_type:
7130 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7131 gcc_assert (done);
7132 break;
7133
ebfd146a
IR
7134 case op_vec_info_type:
7135 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7136 gcc_assert (done);
7137 break;
7138
7139 case assignment_vec_info_type:
7140 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7141 gcc_assert (done);
7142 break;
7143
7144 case load_vec_info_type:
b8698a0f 7145 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7146 slp_node_instance);
7147 gcc_assert (done);
7148 break;
7149
7150 case store_vec_info_type:
7151 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7152 gcc_assert (done);
0d0293ac 7153 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7154 {
7155 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7156 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7157 one are skipped, and there vec_stmt_info shouldn't be freed
7158 meanwhile. */
0d0293ac 7159 *grouped_store = true;
ebfd146a
IR
7160 if (STMT_VINFO_VEC_STMT (stmt_info))
7161 is_store = true;
7162 }
7163 else
7164 is_store = true;
7165 break;
7166
7167 case condition_vec_info_type:
f7e531cf 7168 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7169 gcc_assert (done);
7170 break;
7171
7172 case call_vec_info_type:
190c2236 7173 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7174 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7175 if (is_gimple_call (stmt)
7176 && gimple_call_internal_p (stmt)
7177 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7178 is_store = true;
ebfd146a
IR
7179 break;
7180
0136f8f0
AH
7181 case call_simd_clone_vec_info_type:
7182 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7183 stmt = gsi_stmt (*gsi);
7184 break;
7185
ebfd146a 7186 case reduc_vec_info_type:
b5aeb3bb 7187 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7188 gcc_assert (done);
7189 break;
7190
7191 default:
7192 if (!STMT_VINFO_LIVE_P (stmt_info))
7193 {
73fbfcad 7194 if (dump_enabled_p ())
78c60e3d 7195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7196 "stmt not supported.\n");
ebfd146a
IR
7197 gcc_unreachable ();
7198 }
7199 }
7200
7201 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7202 is being vectorized, but outside the immediately enclosing loop. */
7203 if (vec_stmt
a70d6342
IR
7204 && STMT_VINFO_LOOP_VINFO (stmt_info)
7205 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7206 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7207 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7208 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7209 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7210 vect_used_in_outer_by_reduction))
ebfd146a 7211 {
a70d6342
IR
7212 struct loop *innerloop = LOOP_VINFO_LOOP (
7213 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7214 imm_use_iterator imm_iter;
7215 use_operand_p use_p;
7216 tree scalar_dest;
7217 gimple exit_phi;
7218
73fbfcad 7219 if (dump_enabled_p ())
78c60e3d 7220 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7221 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7222
7223 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7224 (to be used when vectorizing outer-loop stmts that use the DEF of
7225 STMT). */
7226 if (gimple_code (stmt) == GIMPLE_PHI)
7227 scalar_dest = PHI_RESULT (stmt);
7228 else
7229 scalar_dest = gimple_assign_lhs (stmt);
7230
7231 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7232 {
7233 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7234 {
7235 exit_phi = USE_STMT (use_p);
7236 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7237 }
7238 }
7239 }
7240
7241 /* Handle stmts whose DEF is used outside the loop-nest that is
7242 being vectorized. */
7243 if (STMT_VINFO_LIVE_P (stmt_info)
7244 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7245 {
7246 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7247 gcc_assert (done);
7248 }
7249
7250 if (vec_stmt)
83197f37 7251 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7252
b8698a0f 7253 return is_store;
ebfd146a
IR
7254}
7255
7256
b8698a0f 7257/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7258 stmt_vec_info. */
7259
7260void
7261vect_remove_stores (gimple first_stmt)
7262{
7263 gimple next = first_stmt;
7264 gimple tmp;
7265 gimple_stmt_iterator next_si;
7266
7267 while (next)
7268 {
78048b1c
JJ
7269 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7270
7271 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7272 if (is_pattern_stmt_p (stmt_info))
7273 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7274 /* Free the attached stmt_vec_info and remove the stmt. */
7275 next_si = gsi_for_stmt (next);
3d3f2249 7276 unlink_stmt_vdef (next);
ebfd146a 7277 gsi_remove (&next_si, true);
3d3f2249 7278 release_defs (next);
ebfd146a
IR
7279 free_stmt_vec_info (next);
7280 next = tmp;
7281 }
7282}
7283
7284
7285/* Function new_stmt_vec_info.
7286
7287 Create and initialize a new stmt_vec_info struct for STMT. */
7288
7289stmt_vec_info
b8698a0f 7290new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7291 bb_vec_info bb_vinfo)
ebfd146a
IR
7292{
7293 stmt_vec_info res;
7294 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7295
7296 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7297 STMT_VINFO_STMT (res) = stmt;
7298 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7299 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7300 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7301 STMT_VINFO_LIVE_P (res) = false;
7302 STMT_VINFO_VECTYPE (res) = NULL;
7303 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7304 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7305 STMT_VINFO_IN_PATTERN_P (res) = false;
7306 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7307 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7308 STMT_VINFO_DATA_REF (res) = NULL;
7309
7310 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7311 STMT_VINFO_DR_OFFSET (res) = NULL;
7312 STMT_VINFO_DR_INIT (res) = NULL;
7313 STMT_VINFO_DR_STEP (res) = NULL;
7314 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7315
7316 if (gimple_code (stmt) == GIMPLE_PHI
7317 && is_loop_header_bb_p (gimple_bb (stmt)))
7318 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7319 else
8644a673
IR
7320 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7321
9771b263 7322 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7323 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7324 GROUP_FIRST_ELEMENT (res) = NULL;
7325 GROUP_NEXT_ELEMENT (res) = NULL;
7326 GROUP_SIZE (res) = 0;
7327 GROUP_STORE_COUNT (res) = 0;
7328 GROUP_GAP (res) = 0;
7329 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7330
7331 return res;
7332}
7333
7334
7335/* Create a hash table for stmt_vec_info. */
7336
7337void
7338init_stmt_vec_info_vec (void)
7339{
9771b263
DN
7340 gcc_assert (!stmt_vec_info_vec.exists ());
7341 stmt_vec_info_vec.create (50);
ebfd146a
IR
7342}
7343
7344
7345/* Free hash table for stmt_vec_info. */
7346
7347void
7348free_stmt_vec_info_vec (void)
7349{
93675444
JJ
7350 unsigned int i;
7351 vec_void_p info;
7352 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7353 if (info != NULL)
7354 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7355 gcc_assert (stmt_vec_info_vec.exists ());
7356 stmt_vec_info_vec.release ();
ebfd146a
IR
7357}
7358
7359
7360/* Free stmt vectorization related info. */
7361
7362void
7363free_stmt_vec_info (gimple stmt)
7364{
7365 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7366
7367 if (!stmt_info)
7368 return;
7369
78048b1c
JJ
7370 /* Check if this statement has a related "pattern stmt"
7371 (introduced by the vectorizer during the pattern recognition
7372 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7373 too. */
7374 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7375 {
7376 stmt_vec_info patt_info
7377 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7378 if (patt_info)
7379 {
363477c0
JJ
7380 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7381 if (seq)
7382 {
7383 gimple_stmt_iterator si;
7384 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7385 free_stmt_vec_info (gsi_stmt (si));
7386 }
78048b1c
JJ
7387 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
7388 }
7389 }
7390
9771b263 7391 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
7392 set_vinfo_for_stmt (stmt, NULL);
7393 free (stmt_info);
7394}
7395
7396
bb67d9c7 7397/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7398
bb67d9c7 7399 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7400 by the target. */
7401
bb67d9c7
RG
7402static tree
7403get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
7404{
7405 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 7406 enum machine_mode simd_mode;
2f816591 7407 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7408 int nunits;
7409 tree vectype;
7410
cc4b5170 7411 if (nbytes == 0)
ebfd146a
IR
7412 return NULL_TREE;
7413
48f2e373
RB
7414 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7415 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7416 return NULL_TREE;
7417
7b7b1813
RG
7418 /* For vector types of elements whose mode precision doesn't
7419 match their types precision we use a element type of mode
7420 precision. The vectorization routines will have to make sure
48f2e373
RB
7421 they support the proper result truncation/extension.
7422 We also make sure to build vector types with INTEGER_TYPE
7423 component type only. */
6d7971b8 7424 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7425 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7426 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7427 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7428 TYPE_UNSIGNED (scalar_type));
6d7971b8 7429
ccbf5bb4
RG
7430 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7431 When the component mode passes the above test simply use a type
7432 corresponding to that mode. The theory is that any use that
7433 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7434 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7435 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7436 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7437
7438 /* We can't build a vector type of elements with alignment bigger than
7439 their size. */
dfc2e2ac 7440 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7441 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7442 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7443
dfc2e2ac
RB
7444 /* If we felt back to using the mode fail if there was
7445 no scalar type for it. */
7446 if (scalar_type == NULL_TREE)
7447 return NULL_TREE;
7448
bb67d9c7
RG
7449 /* If no size was supplied use the mode the target prefers. Otherwise
7450 lookup a vector mode of the specified size. */
7451 if (size == 0)
7452 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7453 else
7454 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7455 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7456 if (nunits <= 1)
7457 return NULL_TREE;
ebfd146a
IR
7458
7459 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7460
7461 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7462 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7463 return NULL_TREE;
ebfd146a
IR
7464
7465 return vectype;
7466}
7467
bb67d9c7
RG
7468unsigned int current_vector_size;
7469
7470/* Function get_vectype_for_scalar_type.
7471
7472 Returns the vector type corresponding to SCALAR_TYPE as supported
7473 by the target. */
7474
7475tree
7476get_vectype_for_scalar_type (tree scalar_type)
7477{
7478 tree vectype;
7479 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7480 current_vector_size);
7481 if (vectype
7482 && current_vector_size == 0)
7483 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7484 return vectype;
7485}
7486
b690cc0f
RG
7487/* Function get_same_sized_vectype
7488
7489 Returns a vector type corresponding to SCALAR_TYPE of size
7490 VECTOR_TYPE if supported by the target. */
7491
7492tree
bb67d9c7 7493get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7494{
bb67d9c7
RG
7495 return get_vectype_for_scalar_type_and_size
7496 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7497}
7498
ebfd146a
IR
7499/* Function vect_is_simple_use.
7500
7501 Input:
a70d6342
IR
7502 LOOP_VINFO - the vect info of the loop that is being vectorized.
7503 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7504 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7505 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7506
7507 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7508 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7509 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7510 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7511 is the case in reduction/induction computations).
7512 For basic blocks, supportable operands are constants and bb invariants.
7513 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7514
7515bool
24ee1384 7516vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7517 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7518 tree *def, enum vect_def_type *dt)
b8698a0f 7519{
ebfd146a
IR
7520 basic_block bb;
7521 stmt_vec_info stmt_vinfo;
a70d6342 7522 struct loop *loop = NULL;
b8698a0f 7523
a70d6342
IR
7524 if (loop_vinfo)
7525 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
7526
7527 *def_stmt = NULL;
7528 *def = NULL_TREE;
b8698a0f 7529
73fbfcad 7530 if (dump_enabled_p ())
ebfd146a 7531 {
78c60e3d
SS
7532 dump_printf_loc (MSG_NOTE, vect_location,
7533 "vect_is_simple_use: operand ");
7534 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7535 dump_printf (MSG_NOTE, "\n");
ebfd146a 7536 }
b8698a0f 7537
b758f602 7538 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7539 {
7540 *dt = vect_constant_def;
7541 return true;
7542 }
b8698a0f 7543
ebfd146a
IR
7544 if (is_gimple_min_invariant (operand))
7545 {
7546 *def = operand;
8644a673 7547 *dt = vect_external_def;
ebfd146a
IR
7548 return true;
7549 }
7550
7551 if (TREE_CODE (operand) == PAREN_EXPR)
7552 {
73fbfcad 7553 if (dump_enabled_p ())
e645e942 7554 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
7555 operand = TREE_OPERAND (operand, 0);
7556 }
b8698a0f 7557
ebfd146a
IR
7558 if (TREE_CODE (operand) != SSA_NAME)
7559 {
73fbfcad 7560 if (dump_enabled_p ())
78c60e3d 7561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7562 "not ssa-name.\n");
ebfd146a
IR
7563 return false;
7564 }
b8698a0f 7565
ebfd146a
IR
7566 *def_stmt = SSA_NAME_DEF_STMT (operand);
7567 if (*def_stmt == NULL)
7568 {
73fbfcad 7569 if (dump_enabled_p ())
78c60e3d 7570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7571 "no def_stmt.\n");
ebfd146a
IR
7572 return false;
7573 }
7574
73fbfcad 7575 if (dump_enabled_p ())
ebfd146a 7576 {
78c60e3d
SS
7577 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7578 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 7579 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
7580 }
7581
8644a673 7582 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
7583 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7584 if (gimple_nop_p (*def_stmt))
7585 {
7586 *def = operand;
8644a673 7587 *dt = vect_external_def;
ebfd146a
IR
7588 return true;
7589 }
7590
7591 bb = gimple_bb (*def_stmt);
a70d6342
IR
7592
7593 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7594 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 7595 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 7596 *dt = vect_external_def;
ebfd146a
IR
7597 else
7598 {
7599 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7600 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7601 }
7602
24ee1384
IR
7603 if (*dt == vect_unknown_def_type
7604 || (stmt
7605 && *dt == vect_double_reduction_def
7606 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 7607 {
73fbfcad 7608 if (dump_enabled_p ())
78c60e3d 7609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7610 "Unsupported pattern.\n");
ebfd146a
IR
7611 return false;
7612 }
7613
73fbfcad 7614 if (dump_enabled_p ())
e645e942 7615 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
7616
7617 switch (gimple_code (*def_stmt))
7618 {
7619 case GIMPLE_PHI:
7620 *def = gimple_phi_result (*def_stmt);
7621 break;
7622
7623 case GIMPLE_ASSIGN:
7624 *def = gimple_assign_lhs (*def_stmt);
7625 break;
7626
7627 case GIMPLE_CALL:
7628 *def = gimple_call_lhs (*def_stmt);
7629 if (*def != NULL)
7630 break;
7631 /* FALLTHRU */
7632 default:
73fbfcad 7633 if (dump_enabled_p ())
78c60e3d 7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7635 "unsupported defining stmt:\n");
ebfd146a
IR
7636 return false;
7637 }
7638
7639 return true;
7640}
7641
b690cc0f
RG
7642/* Function vect_is_simple_use_1.
7643
7644 Same as vect_is_simple_use_1 but also determines the vector operand
7645 type of OPERAND and stores it to *VECTYPE. If the definition of
7646 OPERAND is vect_uninitialized_def, vect_constant_def or
7647 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7648 is responsible to compute the best suited vector type for the
7649 scalar operand. */
7650
7651bool
24ee1384 7652vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
7653 bb_vec_info bb_vinfo, gimple *def_stmt,
7654 tree *def, enum vect_def_type *dt, tree *vectype)
7655{
24ee1384
IR
7656 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7657 def, dt))
b690cc0f
RG
7658 return false;
7659
7660 /* Now get a vector type if the def is internal, otherwise supply
7661 NULL_TREE and leave it up to the caller to figure out a proper
7662 type for the use stmt. */
7663 if (*dt == vect_internal_def
7664 || *dt == vect_induction_def
7665 || *dt == vect_reduction_def
7666 || *dt == vect_double_reduction_def
7667 || *dt == vect_nested_cycle)
7668 {
7669 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
7670
7671 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7672 && !STMT_VINFO_RELEVANT (stmt_info)
7673 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 7674 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 7675
b690cc0f
RG
7676 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7677 gcc_assert (*vectype != NULL_TREE);
7678 }
7679 else if (*dt == vect_uninitialized_def
7680 || *dt == vect_constant_def
7681 || *dt == vect_external_def)
7682 *vectype = NULL_TREE;
7683 else
7684 gcc_unreachable ();
7685
7686 return true;
7687}
7688
ebfd146a
IR
7689
7690/* Function supportable_widening_operation
7691
b8698a0f
L
7692 Check whether an operation represented by the code CODE is a
7693 widening operation that is supported by the target platform in
b690cc0f
RG
7694 vector form (i.e., when operating on arguments of type VECTYPE_IN
7695 producing a result of type VECTYPE_OUT).
b8698a0f 7696
ebfd146a
IR
7697 Widening operations we currently support are NOP (CONVERT), FLOAT
7698 and WIDEN_MULT. This function checks if these operations are supported
7699 by the target platform either directly (via vector tree-codes), or via
7700 target builtins.
7701
7702 Output:
b8698a0f
L
7703 - CODE1 and CODE2 are codes of vector operations to be used when
7704 vectorizing the operation, if available.
ebfd146a
IR
7705 - MULTI_STEP_CVT determines the number of required intermediate steps in
7706 case of multi-step conversion (like char->short->int - in that case
7707 MULTI_STEP_CVT will be 1).
b8698a0f
L
7708 - INTERM_TYPES contains the intermediate type required to perform the
7709 widening operation (short in the above example). */
ebfd146a
IR
7710
7711bool
b690cc0f
RG
7712supportable_widening_operation (enum tree_code code, gimple stmt,
7713 tree vectype_out, tree vectype_in,
ebfd146a
IR
7714 enum tree_code *code1, enum tree_code *code2,
7715 int *multi_step_cvt,
9771b263 7716 vec<tree> *interm_types)
ebfd146a
IR
7717{
7718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7719 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 7720 struct loop *vect_loop = NULL;
ebfd146a 7721 enum machine_mode vec_mode;
81f40b79 7722 enum insn_code icode1, icode2;
ebfd146a 7723 optab optab1, optab2;
b690cc0f
RG
7724 tree vectype = vectype_in;
7725 tree wide_vectype = vectype_out;
ebfd146a 7726 enum tree_code c1, c2;
4a00c761
JJ
7727 int i;
7728 tree prev_type, intermediate_type;
7729 enum machine_mode intermediate_mode, prev_mode;
7730 optab optab3, optab4;
ebfd146a 7731
4a00c761 7732 *multi_step_cvt = 0;
4ef69dfc
IR
7733 if (loop_info)
7734 vect_loop = LOOP_VINFO_LOOP (loop_info);
7735
ebfd146a
IR
7736 switch (code)
7737 {
7738 case WIDEN_MULT_EXPR:
6ae6116f
RH
7739 /* The result of a vectorized widening operation usually requires
7740 two vectors (because the widened results do not fit into one vector).
7741 The generated vector results would normally be expected to be
7742 generated in the same order as in the original scalar computation,
7743 i.e. if 8 results are generated in each vector iteration, they are
7744 to be organized as follows:
7745 vect1: [res1,res2,res3,res4],
7746 vect2: [res5,res6,res7,res8].
7747
7748 However, in the special case that the result of the widening
7749 operation is used in a reduction computation only, the order doesn't
7750 matter (because when vectorizing a reduction we change the order of
7751 the computation). Some targets can take advantage of this and
7752 generate more efficient code. For example, targets like Altivec,
7753 that support widen_mult using a sequence of {mult_even,mult_odd}
7754 generate the following vectors:
7755 vect1: [res1,res3,res5,res7],
7756 vect2: [res2,res4,res6,res8].
7757
7758 When vectorizing outer-loops, we execute the inner-loop sequentially
7759 (each vectorized inner-loop iteration contributes to VF outer-loop
7760 iterations in parallel). We therefore don't allow to change the
7761 order of the computation in the inner-loop during outer-loop
7762 vectorization. */
7763 /* TODO: Another case in which order doesn't *really* matter is when we
7764 widen and then contract again, e.g. (short)((int)x * y >> 8).
7765 Normally, pack_trunc performs an even/odd permute, whereas the
7766 repack from an even/odd expansion would be an interleave, which
7767 would be significantly simpler for e.g. AVX2. */
7768 /* In any case, in order to avoid duplicating the code below, recurse
7769 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7770 are properly set up for the caller. If we fail, we'll continue with
7771 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7772 if (vect_loop
7773 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7774 && !nested_in_vect_loop_p (vect_loop, stmt)
7775 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7776 stmt, vectype_out, vectype_in,
a86ec597
RH
7777 code1, code2, multi_step_cvt,
7778 interm_types))
6ae6116f 7779 return true;
4a00c761
JJ
7780 c1 = VEC_WIDEN_MULT_LO_EXPR;
7781 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
7782 break;
7783
6ae6116f
RH
7784 case VEC_WIDEN_MULT_EVEN_EXPR:
7785 /* Support the recursion induced just above. */
7786 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7787 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7788 break;
7789
36ba4aae 7790 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
7791 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7792 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
7793 break;
7794
ebfd146a 7795 CASE_CONVERT:
4a00c761
JJ
7796 c1 = VEC_UNPACK_LO_EXPR;
7797 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
7798 break;
7799
7800 case FLOAT_EXPR:
4a00c761
JJ
7801 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7802 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
7803 break;
7804
7805 case FIX_TRUNC_EXPR:
7806 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7807 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7808 computing the operation. */
7809 return false;
7810
7811 default:
7812 gcc_unreachable ();
7813 }
7814
6ae6116f 7815 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
7816 {
7817 enum tree_code ctmp = c1;
7818 c1 = c2;
7819 c2 = ctmp;
7820 }
7821
ebfd146a
IR
7822 if (code == FIX_TRUNC_EXPR)
7823 {
7824 /* The signedness is determined from output operand. */
b690cc0f
RG
7825 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7826 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
7827 }
7828 else
7829 {
7830 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7831 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7832 }
7833
7834 if (!optab1 || !optab2)
7835 return false;
7836
7837 vec_mode = TYPE_MODE (vectype);
947131ba
RS
7838 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7839 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
7840 return false;
7841
4a00c761
JJ
7842 *code1 = c1;
7843 *code2 = c2;
7844
7845 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7846 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7847 return true;
7848
b8698a0f 7849 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 7850 types. */
ebfd146a 7851
4a00c761
JJ
7852 prev_type = vectype;
7853 prev_mode = vec_mode;
b8698a0f 7854
4a00c761
JJ
7855 if (!CONVERT_EXPR_CODE_P (code))
7856 return false;
b8698a0f 7857
4a00c761
JJ
7858 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7859 intermediate steps in promotion sequence. We try
7860 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7861 not. */
9771b263 7862 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
7863 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7864 {
7865 intermediate_mode = insn_data[icode1].operand[0].mode;
7866 intermediate_type
7867 = lang_hooks.types.type_for_mode (intermediate_mode,
7868 TYPE_UNSIGNED (prev_type));
7869 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7870 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7871
7872 if (!optab3 || !optab4
7873 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7874 || insn_data[icode1].operand[0].mode != intermediate_mode
7875 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7876 || insn_data[icode2].operand[0].mode != intermediate_mode
7877 || ((icode1 = optab_handler (optab3, intermediate_mode))
7878 == CODE_FOR_nothing)
7879 || ((icode2 = optab_handler (optab4, intermediate_mode))
7880 == CODE_FOR_nothing))
7881 break;
ebfd146a 7882
9771b263 7883 interm_types->quick_push (intermediate_type);
4a00c761
JJ
7884 (*multi_step_cvt)++;
7885
7886 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7887 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7888 return true;
7889
7890 prev_type = intermediate_type;
7891 prev_mode = intermediate_mode;
ebfd146a
IR
7892 }
7893
9771b263 7894 interm_types->release ();
4a00c761 7895 return false;
ebfd146a
IR
7896}
7897
7898
7899/* Function supportable_narrowing_operation
7900
b8698a0f
L
7901 Check whether an operation represented by the code CODE is a
7902 narrowing operation that is supported by the target platform in
b690cc0f
RG
7903 vector form (i.e., when operating on arguments of type VECTYPE_IN
7904 and producing a result of type VECTYPE_OUT).
b8698a0f 7905
ebfd146a 7906 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 7907 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
7908 the target platform directly via vector tree-codes.
7909
7910 Output:
b8698a0f
L
7911 - CODE1 is the code of a vector operation to be used when
7912 vectorizing the operation, if available.
ebfd146a
IR
7913 - MULTI_STEP_CVT determines the number of required intermediate steps in
7914 case of multi-step conversion (like int->short->char - in that case
7915 MULTI_STEP_CVT will be 1).
7916 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 7917 narrowing operation (short in the above example). */
ebfd146a
IR
7918
7919bool
7920supportable_narrowing_operation (enum tree_code code,
b690cc0f 7921 tree vectype_out, tree vectype_in,
ebfd146a 7922 enum tree_code *code1, int *multi_step_cvt,
9771b263 7923 vec<tree> *interm_types)
ebfd146a
IR
7924{
7925 enum machine_mode vec_mode;
7926 enum insn_code icode1;
7927 optab optab1, interm_optab;
b690cc0f
RG
7928 tree vectype = vectype_in;
7929 tree narrow_vectype = vectype_out;
ebfd146a 7930 enum tree_code c1;
4a00c761
JJ
7931 tree intermediate_type;
7932 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 7933 int i;
4a00c761 7934 bool uns;
ebfd146a 7935
4a00c761 7936 *multi_step_cvt = 0;
ebfd146a
IR
7937 switch (code)
7938 {
7939 CASE_CONVERT:
7940 c1 = VEC_PACK_TRUNC_EXPR;
7941 break;
7942
7943 case FIX_TRUNC_EXPR:
7944 c1 = VEC_PACK_FIX_TRUNC_EXPR;
7945 break;
7946
7947 case FLOAT_EXPR:
7948 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
7949 tree code and optabs used for computing the operation. */
7950 return false;
7951
7952 default:
7953 gcc_unreachable ();
7954 }
7955
7956 if (code == FIX_TRUNC_EXPR)
7957 /* The signedness is determined from output operand. */
b690cc0f 7958 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
7959 else
7960 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7961
7962 if (!optab1)
7963 return false;
7964
7965 vec_mode = TYPE_MODE (vectype);
947131ba 7966 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
7967 return false;
7968
4a00c761
JJ
7969 *code1 = c1;
7970
7971 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
7972 return true;
7973
ebfd146a
IR
7974 /* Check if it's a multi-step conversion that can be done using intermediate
7975 types. */
4a00c761
JJ
7976 prev_mode = vec_mode;
7977 if (code == FIX_TRUNC_EXPR)
7978 uns = TYPE_UNSIGNED (vectype_out);
7979 else
7980 uns = TYPE_UNSIGNED (vectype);
7981
7982 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
7983 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
7984 costly than signed. */
7985 if (code == FIX_TRUNC_EXPR && uns)
7986 {
7987 enum insn_code icode2;
7988
7989 intermediate_type
7990 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
7991 interm_optab
7992 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 7993 if (interm_optab != unknown_optab
4a00c761
JJ
7994 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
7995 && insn_data[icode1].operand[0].mode
7996 == insn_data[icode2].operand[0].mode)
7997 {
7998 uns = false;
7999 optab1 = interm_optab;
8000 icode1 = icode2;
8001 }
8002 }
ebfd146a 8003
4a00c761
JJ
8004 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8005 intermediate steps in promotion sequence. We try
8006 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 8007 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8008 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8009 {
8010 intermediate_mode = insn_data[icode1].operand[0].mode;
8011 intermediate_type
8012 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8013 interm_optab
8014 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8015 optab_default);
8016 if (!interm_optab
8017 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8018 || insn_data[icode1].operand[0].mode != intermediate_mode
8019 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8020 == CODE_FOR_nothing))
8021 break;
8022
9771b263 8023 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8024 (*multi_step_cvt)++;
8025
8026 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8027 return true;
8028
8029 prev_mode = intermediate_mode;
8030 optab1 = interm_optab;
ebfd146a
IR
8031 }
8032
9771b263 8033 interm_types->release ();
4a00c761 8034 return false;
ebfd146a 8035}