]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
config.gcc: Use darwin9.h, darwin10.h and darwin12.h on darwin12 and later.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
40e23961
MC
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
ebfd146a 36#include "tree.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
ebfd146a 39#include "target.h"
60393bbc 40#include "predict.h"
60393bbc 41#include "hard-reg-set.h"
60393bbc
AM
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
ebfd146a 45#include "basic-block.h"
cf835838 46#include "gimple-pretty-print.h"
2fb9a547
AM
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "tree-eh.h"
50#include "gimple-expr.h"
51#include "is-a.h"
18f429e2 52#include "gimple.h"
45b0be94 53#include "gimplify.h"
5be5c238 54#include "gimple-iterator.h"
18f429e2 55#include "gimplify-me.h"
442b4905
AM
56#include "gimple-ssa.h"
57#include "tree-cfg.h"
58#include "tree-phinodes.h"
59#include "ssa-iterators.h"
d8a2d370 60#include "stringpool.h"
442b4905 61#include "tree-ssanames.h"
e28030cf 62#include "tree-ssa-loop-manip.h"
ebfd146a 63#include "cfgloop.h"
0136f8f0
AH
64#include "tree-ssa-loop.h"
65#include "tree-scalar-evolution.h"
36566b39
PK
66#include "hashtab.h"
67#include "rtl.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
ebfd146a 80#include "expr.h"
7ee2468b 81#include "recog.h" /* FIXME: for insn_data */
b0710fe1 82#include "insn-codes.h"
ebfd146a 83#include "optabs.h"
718f9c0f 84#include "diagnostic-core.h"
ebfd146a 85#include "tree-vectorizer.h"
c582198b
AM
86#include "hash-map.h"
87#include "plugin-api.h"
88#include "ipa-ref.h"
0136f8f0 89#include "cgraph.h"
9b2b7279 90#include "builtins.h"
ebfd146a 91
7ee2468b
SB
92/* For lang_hooks.types.type_for_mode. */
93#include "langhooks.h"
ebfd146a 94
c3e7ee41
BS
95/* Return the vectorized type for the given statement. */
96
97tree
98stmt_vectype (struct _stmt_vec_info *stmt_info)
99{
100 return STMT_VINFO_VECTYPE (stmt_info);
101}
102
103/* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105bool
106stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107{
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
112
113 if (!loop_vinfo)
114 return false;
115
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
117
118 return (bb->loop_father == loop->inner);
119}
120
121/* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
124
125unsigned
92345349 126record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 128 int misalign, enum vect_cost_model_location where)
c3e7ee41 129{
92345349 130 if (body_cost_vec)
c3e7ee41 131 {
92345349
BS
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
c3e7ee41 136 return (unsigned)
92345349 137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
138
139 }
140 else
141 {
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
145
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150
92345349
BS
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
c3e7ee41
BS
153 }
154}
155
272c6793
RS
156/* Return a variable of type ELEM_TYPE[NELEMS]. */
157
158static tree
159create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160{
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
163}
164
165/* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
169
170static tree
171read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
173{
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
176
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
183
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
188
189 return vect_name;
190}
191
192/* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
195
196static void
197write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
199{
200 tree array_ref;
201 gimple new_stmt;
202
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
206
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
209}
210
211/* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
214
215static tree
216create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217{
272c6793
RS
218 tree mem_ref, alias_ptr_type;
219
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
644ffefd 223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
224 return mem_ref;
225}
226
ebfd146a
IR
227/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
228
229/* Function vect_mark_relevant.
230
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
232
233static void
9771b263 234vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
ebfd146a
IR
237{
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 241 gimple pattern_stmt;
ebfd146a 242
73fbfcad 243 if (dump_enabled_p ())
78c60e3d 244 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 245 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 246
83197f37
IR
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
ebfd146a
IR
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252 {
83197f37
IR
253 bool found = false;
254 if (!used_in_pattern)
255 {
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
13c931c9
JJ
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 262
83197f37
IR
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
ebfd146a 267
83197f37
IR
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
5ce9450f 271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273 {
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
277
13c931c9
JJ
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
280
ab0ef706
JJ
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283 {
284 found = true;
285 break;
286 }
287 }
83197f37
IR
288 }
289
290 if (!found)
291 {
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
296
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298
73fbfcad 299 if (dump_enabled_p ())
78c60e3d
SS
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
e645e942 302 " relevant/live.\n");
83197f37
IR
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
308 }
ebfd146a
IR
309 }
310
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
314
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317 {
73fbfcad 318 if (dump_enabled_p ())
78c60e3d 319 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 320 "already marked relevant/live.\n");
ebfd146a
IR
321 return;
322 }
323
9771b263 324 worklist->safe_push (stmt);
ebfd146a
IR
325}
326
327
328/* Function vect_stmt_relevant_p.
329
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
332
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
337
338 CHECKME: what other side effects would the vectorizer allow? */
339
340static bool
341vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
343{
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
349
8644a673 350 *relevant = vect_unused_in_scope;
ebfd146a
IR
351 *live_p = false;
352
353 /* cond stmt other than loop exit cond. */
b8698a0f
L
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
8644a673 357 *relevant = vect_used_in_scope;
ebfd146a
IR
358
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
ebfd146a 363 {
73fbfcad 364 if (dump_enabled_p ())
78c60e3d 365 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 366 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 367 *relevant = vect_used_in_scope;
ebfd146a
IR
368 }
369
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372 {
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374 {
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
377 {
73fbfcad 378 if (dump_enabled_p ())
78c60e3d 379 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 380 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 381
3157b0c2
AO
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
384
ebfd146a
IR
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
389
390 *live_p = true;
391 }
392 }
393 }
394
395 return (*live_p || *relevant);
396}
397
398
b8698a0f 399/* Function exist_non_indexing_operands_for_use_p
ebfd146a 400
ff802fa1 401 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
402 used in STMT for anything other than indexing an array. */
403
404static bool
405exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406{
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 409
ff802fa1 410 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
59a05b0c 415
ebfd146a
IR
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
421
422 'var' in the second case corresponds to a def, not a use,
b8698a0f 423 so USE cannot correspond to any operands that are not used
ebfd146a
IR
424 for array indexing.
425
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
ebfd146a
IR
428
429 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
430 {
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
434 {
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
447 }
448 return false;
449 }
450
59a05b0c
EB
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
ebfd146a 453 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
456
457 if (operand == use)
458 return true;
459
460 return false;
461}
462
463
b8698a0f 464/*
ebfd146a
IR
465 Function process_use.
466
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 470 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
ebfd146a
IR
474
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 482 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 483 of the respective DEF_STMT is left unchanged.
b8698a0f
L
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
488
489 Return true if everything is as expected. Return false otherwise. */
490
491static bool
b8698a0f 492process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 493 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 494 bool force)
ebfd146a
IR
495{
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
503
b8698a0f 504 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 505 that are used for address computation are not considered relevant. */
aec7ae7d 506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
507 return true;
508
24ee1384 509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 510 {
73fbfcad 511 if (dump_enabled_p ())
78c60e3d 512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 513 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
514 return false;
515 }
516
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
519
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
522 {
73fbfcad 523 if (dump_enabled_p ())
e645e942 524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
525 return true;
526 }
527
b8698a0f
L
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
540 {
73fbfcad 541 if (dump_enabled_p ())
78c60e3d 542 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 543 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
549 return true;
550 }
551
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560 {
73fbfcad 561 if (dump_enabled_p ())
78c60e3d 562 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 563 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 564
ebfd146a
IR
565 switch (relevant)
566 {
8644a673 567 case vect_unused_in_scope:
7c5222ff
IR
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 570 break;
7c5222ff 571
ebfd146a 572 case vect_used_in_outer_by_reduction:
7c5222ff 573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
574 relevant = vect_used_by_reduction;
575 break;
7c5222ff 576
ebfd146a 577 case vect_used_in_outer:
7c5222ff 578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 579 relevant = vect_used_in_scope;
ebfd146a 580 break;
7c5222ff 581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 break;
584
585 default:
586 gcc_unreachable ();
b8698a0f 587 }
ebfd146a
IR
588 }
589
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
592 ...
593 inner-loop:
594 d = def_stmt
06066f92 595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598 {
73fbfcad 599 if (dump_enabled_p ())
78c60e3d 600 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 601 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 602
ebfd146a
IR
603 switch (relevant)
604 {
8644a673 605 case vect_unused_in_scope:
b8698a0f 606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
609 break;
610
ebfd146a
IR
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
614
8644a673 615 case vect_used_in_scope:
ebfd146a
IR
616 relevant = vect_used_in_outer;
617 break;
618
619 default:
620 gcc_unreachable ();
621 }
622 }
623
83197f37
IR
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
626 return true;
627}
628
629
630/* Function vect_mark_stmts_to_be_vectorized.
631
632 Not all stmts in the loop need to be vectorized. For example:
633
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
638
639 3. j = j + 1
640
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
643
644 This pass detects such stmts. */
645
646bool
647vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648{
ebfd146a
IR
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
06066f92
IR
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
ebfd146a 661
73fbfcad 662 if (dump_enabled_p ())
78c60e3d 663 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 664 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 665
00f96dc9 666 auto_vec<gimple, 64> worklist;
ebfd146a
IR
667
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
670 {
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 673 {
ebfd146a 674 phi = gsi_stmt (si);
73fbfcad 675 if (dump_enabled_p ())
ebfd146a 676 {
78c60e3d
SS
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
679 }
680
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
683 }
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685 {
686 stmt = gsi_stmt (si);
73fbfcad 687 if (dump_enabled_p ())
ebfd146a 688 {
78c60e3d
SS
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 691 }
ebfd146a
IR
692
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
695 }
696 }
697
698 /* 2. Process_worklist */
9771b263 699 while (worklist.length () > 0)
ebfd146a
IR
700 {
701 use_operand_p use_p;
702 ssa_op_iter iter;
703
9771b263 704 stmt = worklist.pop ();
73fbfcad 705 if (dump_enabled_p ())
ebfd146a 706 {
78c60e3d
SS
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
709 }
710
b8698a0f
L
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 728 those that are used by a reduction computation, and those that are
ff802fa1 729 (also) used by a regular computation. This allows us later on to
b8698a0f 730 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 731 order of the results that they produce does not have to be kept. */
ebfd146a 732
06066f92
IR
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
ebfd146a 736 {
06066f92
IR
737 case vect_reduction_def:
738 switch (tmp_relevant)
739 {
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
743
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
748
749 default:
73fbfcad 750 if (dump_enabled_p ())
78c60e3d 751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 752 "unsupported use of reduction.\n");
06066f92
IR
753 return false;
754 }
755
b8698a0f 756 live_p = false;
06066f92 757 break;
b8698a0f 758
06066f92
IR
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
763 {
73fbfcad 764 if (dump_enabled_p ())
78c60e3d 765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 766 "unsupported use of nested cycle.\n");
7c5222ff 767
06066f92
IR
768 return false;
769 }
7c5222ff 770
b8698a0f
L
771 live_p = false;
772 break;
773
06066f92
IR
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
777 {
73fbfcad 778 if (dump_enabled_p ())
78c60e3d 779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 780 "unsupported use of double reduction.\n");
7c5222ff 781
7c5222ff 782 return false;
06066f92
IR
783 }
784
785 live_p = false;
b8698a0f 786 break;
7c5222ff 787
06066f92
IR
788 default:
789 break;
7c5222ff 790 }
b8698a0f 791
aec7ae7d 792 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
793 {
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
798 {
69d2aade
JJ
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
801
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804 {
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 806 live_p, relevant, &worklist, false)
69d2aade 807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 808 live_p, relevant, &worklist, false))
566d377a 809 return false;
69d2aade
JJ
810 i = 2;
811 }
812 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 813 {
69d2aade 814 op = gimple_op (stmt, i);
9d5e7640 815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 816 &worklist, false))
07687835 817 return false;
9d5e7640
IR
818 }
819 }
820 else if (is_gimple_call (stmt))
821 {
822 for (i = 0; i < gimple_call_num_args (stmt); i++)
823 {
824 tree arg = gimple_call_arg (stmt, i);
825 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 826 &worklist, false))
07687835 827 return false;
9d5e7640
IR
828 }
829 }
830 }
831 else
832 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833 {
834 tree op = USE_FROM_PTR (use_p);
835 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 836 &worklist, false))
07687835 837 return false;
9d5e7640 838 }
aec7ae7d
JJ
839
840 if (STMT_VINFO_GATHER_P (stmt_vinfo))
841 {
842 tree off;
843 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844 gcc_assert (decl);
845 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846 &worklist, true))
566d377a 847 return false;
aec7ae7d 848 }
ebfd146a
IR
849 } /* while worklist */
850
ebfd146a
IR
851 return true;
852}
853
854
b8698a0f 855/* Function vect_model_simple_cost.
ebfd146a 856
b8698a0f 857 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
858 single op. Right now, this does not account for multiple insns that could
859 be generated for the single vector op. We will handle that shortly. */
860
861void
b8698a0f 862vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
863 enum vect_def_type *dt,
864 stmt_vector_for_cost *prologue_cost_vec,
865 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
866{
867 int i;
92345349 868 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
869
870 /* The SLP costs were already calculated during SLP tree build. */
871 if (PURE_SLP_STMT (stmt_info))
872 return;
873
ebfd146a
IR
874 /* FORNOW: Assuming maximum 2 args per stmts. */
875 for (i = 0; i < 2; i++)
92345349
BS
876 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878 stmt_info, 0, vect_prologue);
c3e7ee41
BS
879
880 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
881 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882 stmt_info, 0, vect_body);
c3e7ee41 883
73fbfcad 884 if (dump_enabled_p ())
78c60e3d
SS
885 dump_printf_loc (MSG_NOTE, vect_location,
886 "vect_model_simple_cost: inside_cost = %d, "
e645e942 887 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
888}
889
890
8bd37302
BS
891/* Model cost for type demotion and promotion operations. PWR is normally
892 zero for single-step promotions and demotions. It will be one if
893 two-step promotion/demotion is required, and so on. Each additional
894 step doubles the number of instructions required. */
895
896static void
897vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898 enum vect_def_type *dt, int pwr)
899{
900 int i, tmp;
92345349 901 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904 void *target_cost_data;
8bd37302
BS
905
906 /* The SLP costs were already calculated during SLP tree build. */
907 if (PURE_SLP_STMT (stmt_info))
908 return;
909
c3e7ee41
BS
910 if (loop_vinfo)
911 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912 else
913 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914
8bd37302
BS
915 for (i = 0; i < pwr + 1; i++)
916 {
917 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918 (i + 1) : i;
c3e7ee41 919 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
920 vec_promote_demote, stmt_info, 0,
921 vect_body);
8bd37302
BS
922 }
923
924 /* FORNOW: Assuming maximum 2 args per stmts. */
925 for (i = 0; i < 2; i++)
92345349
BS
926 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928 stmt_info, 0, vect_prologue);
8bd37302 929
73fbfcad 930 if (dump_enabled_p ())
78c60e3d
SS
931 dump_printf_loc (MSG_NOTE, vect_location,
932 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 933 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
934}
935
0d0293ac 936/* Function vect_cost_group_size
b8698a0f 937
0d0293ac 938 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
939 load or store of a group, else return 1. This ensures that group size is
940 only returned once per group. */
941
942static int
0d0293ac 943vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 944{
e14c1050 945 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
946
947 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 948 return GROUP_SIZE (stmt_info);
ebfd146a
IR
949
950 return 1;
951}
952
953
954/* Function vect_model_store_cost
955
0d0293ac
MM
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
958
959void
b8698a0f 960vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 961 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
962 slp_tree slp_node,
963 stmt_vector_for_cost *prologue_cost_vec,
964 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
965{
966 int group_size;
92345349 967 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
968 struct data_reference *first_dr;
969 gimple first_stmt;
ebfd146a 970
8644a673 971 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
972 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
973 stmt_info, 0, vect_prologue);
ebfd146a 974
0d0293ac
MM
975 /* Grouped access? */
976 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
977 {
978 if (slp_node)
979 {
9771b263 980 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
981 group_size = 1;
982 }
983 else
984 {
e14c1050 985 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 986 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
987 }
988
989 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
990 }
0d0293ac 991 /* Not a grouped access. */
ebfd146a 992 else
720f5239
IR
993 {
994 group_size = 1;
995 first_dr = STMT_VINFO_DATA_REF (stmt_info);
996 }
ebfd146a 997
272c6793 998 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 999 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
1000 access is instead being provided by a permute-and-store operation,
1001 include the cost of the permutes. */
1002 if (!store_lanes_p && group_size > 1)
ebfd146a 1003 {
e1377713
ES
1004 /* Uses a high and low interleave or shuffle operations for each
1005 needed permute. */
1006 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
1007 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1008 stmt_info, 0, vect_body);
ebfd146a 1009
73fbfcad 1010 if (dump_enabled_p ())
78c60e3d 1011 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1012 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1013 group_size);
ebfd146a
IR
1014 }
1015
1016 /* Costs of the stores. */
92345349 1017 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 1018
73fbfcad 1019 if (dump_enabled_p ())
78c60e3d
SS
1020 dump_printf_loc (MSG_NOTE, vect_location,
1021 "vect_model_store_cost: inside_cost = %d, "
e645e942 1022 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1023}
1024
1025
720f5239
IR
1026/* Calculate cost of DR's memory access. */
1027void
1028vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1029 unsigned int *inside_cost,
92345349 1030 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1031{
1032 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1033 gimple stmt = DR_STMT (dr);
1034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1035
1036 switch (alignment_support_scheme)
1037 {
1038 case dr_aligned:
1039 {
92345349
BS
1040 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1041 vector_store, stmt_info, 0,
1042 vect_body);
720f5239 1043
73fbfcad 1044 if (dump_enabled_p ())
78c60e3d 1045 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1046 "vect_model_store_cost: aligned.\n");
720f5239
IR
1047 break;
1048 }
1049
1050 case dr_unaligned_supported:
1051 {
720f5239 1052 /* Here, we assign an additional cost for the unaligned store. */
92345349 1053 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1054 unaligned_store, stmt_info,
92345349 1055 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1056 if (dump_enabled_p ())
78c60e3d
SS
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_store_cost: unaligned supported by "
e645e942 1059 "hardware.\n");
720f5239
IR
1060 break;
1061 }
1062
38eec4c6
UW
1063 case dr_unaligned_unsupported:
1064 {
1065 *inside_cost = VECT_MAX_COST;
1066
73fbfcad 1067 if (dump_enabled_p ())
78c60e3d 1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1069 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1070 break;
1071 }
1072
720f5239
IR
1073 default:
1074 gcc_unreachable ();
1075 }
1076}
1077
1078
ebfd146a
IR
1079/* Function vect_model_load_cost
1080
0d0293ac
MM
1081 Models cost for loads. In the case of grouped accesses, the last access
1082 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1083 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1084 access scheme chosen. */
1085
1086void
92345349
BS
1087vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1088 bool load_lanes_p, slp_tree slp_node,
1089 stmt_vector_for_cost *prologue_cost_vec,
1090 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1091{
1092 int group_size;
ebfd146a
IR
1093 gimple first_stmt;
1094 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1095 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1096
0d0293ac 1097 /* Grouped accesses? */
e14c1050 1098 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1099 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1100 {
0d0293ac 1101 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1102 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1103 }
0d0293ac 1104 /* Not a grouped access. */
ebfd146a
IR
1105 else
1106 {
1107 group_size = 1;
1108 first_dr = dr;
1109 }
1110
272c6793 1111 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1112 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1113 access is instead being provided by a load-and-permute operation,
1114 include the cost of the permutes. */
1115 if (!load_lanes_p && group_size > 1)
ebfd146a 1116 {
2c23db6d
ES
1117 /* Uses an even and odd extract operations or shuffle operations
1118 for each needed permute. */
1119 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1120 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1121 stmt_info, 0, vect_body);
ebfd146a 1122
73fbfcad 1123 if (dump_enabled_p ())
e645e942
TJ
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1126 group_size);
ebfd146a
IR
1127 }
1128
1129 /* The loads themselves. */
a82960aa
RG
1130 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1131 {
a21892ad
BS
1132 /* N scalar loads plus gathering them into a vector. */
1133 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1134 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1135 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1136 scalar_load, stmt_info, 0, vect_body);
1137 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1138 stmt_info, 0, vect_body);
a82960aa
RG
1139 }
1140 else
1141 vect_get_load_cost (first_dr, ncopies,
1142 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1143 || group_size > 1 || slp_node),
92345349
BS
1144 &inside_cost, &prologue_cost,
1145 prologue_cost_vec, body_cost_vec, true);
720f5239 1146
73fbfcad 1147 if (dump_enabled_p ())
78c60e3d
SS
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: inside_cost = %d, "
e645e942 1150 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1151}
1152
1153
1154/* Calculate cost of DR's memory access. */
1155void
1156vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1157 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1158 unsigned int *prologue_cost,
1159 stmt_vector_for_cost *prologue_cost_vec,
1160 stmt_vector_for_cost *body_cost_vec,
1161 bool record_prologue_costs)
720f5239
IR
1162{
1163 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1164 gimple stmt = DR_STMT (dr);
1165 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1166
1167 switch (alignment_support_scheme)
ebfd146a
IR
1168 {
1169 case dr_aligned:
1170 {
92345349
BS
1171 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1172 stmt_info, 0, vect_body);
ebfd146a 1173
73fbfcad 1174 if (dump_enabled_p ())
78c60e3d 1175 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1176 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1177
1178 break;
1179 }
1180 case dr_unaligned_supported:
1181 {
720f5239 1182 /* Here, we assign an additional cost for the unaligned load. */
92345349 1183 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1184 unaligned_load, stmt_info,
92345349 1185 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1186
73fbfcad 1187 if (dump_enabled_p ())
78c60e3d
SS
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: unaligned supported by "
e645e942 1190 "hardware.\n");
ebfd146a
IR
1191
1192 break;
1193 }
1194 case dr_explicit_realign:
1195 {
92345349
BS
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1197 vector_load, stmt_info, 0, vect_body);
1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1199 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1200
1201 /* FIXME: If the misalignment remains fixed across the iterations of
1202 the containing loop, the following cost should be added to the
92345349 1203 prologue costs. */
ebfd146a 1204 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1205 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1206 stmt_info, 0, vect_body);
ebfd146a 1207
73fbfcad 1208 if (dump_enabled_p ())
e645e942
TJ
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_model_load_cost: explicit realign\n");
8bd37302 1211
ebfd146a
IR
1212 break;
1213 }
1214 case dr_explicit_realign_optimized:
1215 {
73fbfcad 1216 if (dump_enabled_p ())
e645e942 1217 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1218 "vect_model_load_cost: unaligned software "
e645e942 1219 "pipelined.\n");
ebfd146a
IR
1220
1221 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1222 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1223 if this is an access in a group of loads, which provide grouped
ebfd146a 1224 access, then the above cost should only be considered for one
ff802fa1 1225 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1226 and a realignment op. */
1227
92345349 1228 if (add_realign_cost && record_prologue_costs)
ebfd146a 1229 {
92345349
BS
1230 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1231 vector_stmt, stmt_info,
1232 0, vect_prologue);
ebfd146a 1233 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1234 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1235 vector_stmt, stmt_info,
1236 0, vect_prologue);
ebfd146a
IR
1237 }
1238
92345349
BS
1239 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1240 stmt_info, 0, vect_body);
1241 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1242 stmt_info, 0, vect_body);
8bd37302 1243
73fbfcad 1244 if (dump_enabled_p ())
78c60e3d 1245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1246 "vect_model_load_cost: explicit realign optimized"
1247 "\n");
8bd37302 1248
ebfd146a
IR
1249 break;
1250 }
1251
38eec4c6
UW
1252 case dr_unaligned_unsupported:
1253 {
1254 *inside_cost = VECT_MAX_COST;
1255
73fbfcad 1256 if (dump_enabled_p ())
78c60e3d 1257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1258 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1259 break;
1260 }
1261
ebfd146a
IR
1262 default:
1263 gcc_unreachable ();
1264 }
ebfd146a
IR
1265}
1266
418b7df3
RG
1267/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1268 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1269
418b7df3
RG
1270static void
1271vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1272{
ebfd146a 1273 if (gsi)
418b7df3 1274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1275 else
1276 {
418b7df3 1277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1279
a70d6342
IR
1280 if (loop_vinfo)
1281 {
1282 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1283 basic_block new_bb;
1284 edge pe;
a70d6342
IR
1285
1286 if (nested_in_vect_loop_p (loop, stmt))
1287 loop = loop->inner;
b8698a0f 1288
a70d6342 1289 pe = loop_preheader_edge (loop);
418b7df3 1290 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1291 gcc_assert (!new_bb);
1292 }
1293 else
1294 {
1295 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1296 basic_block bb;
1297 gimple_stmt_iterator gsi_bb_start;
1298
1299 gcc_assert (bb_vinfo);
1300 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1301 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1302 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1303 }
ebfd146a
IR
1304 }
1305
73fbfcad 1306 if (dump_enabled_p ())
ebfd146a 1307 {
78c60e3d
SS
1308 dump_printf_loc (MSG_NOTE, vect_location,
1309 "created new init_stmt: ");
1310 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1311 }
418b7df3
RG
1312}
1313
1314/* Function vect_init_vector.
ebfd146a 1315
5467ee52
RG
1316 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1317 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1318 vector type a vector with all elements equal to VAL is created first.
1319 Place the initialization at BSI if it is not NULL. Otherwise, place the
1320 initialization at the loop preheader.
418b7df3
RG
1321 Return the DEF of INIT_STMT.
1322 It will be used in the vectorization of STMT. */
1323
1324tree
5467ee52 1325vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1326{
1327 tree new_var;
1328 gimple init_stmt;
1329 tree vec_oprnd;
1330 tree new_temp;
1331
5467ee52
RG
1332 if (TREE_CODE (type) == VECTOR_TYPE
1333 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1334 {
5467ee52 1335 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1336 {
5467ee52
RG
1337 if (CONSTANT_CLASS_P (val))
1338 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1339 else
1340 {
b731b390 1341 new_temp = make_ssa_name (TREE_TYPE (type));
0d0e4a03 1342 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1343 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1344 val = new_temp;
418b7df3
RG
1345 }
1346 }
5467ee52 1347 val = build_vector_from_val (type, val);
418b7df3
RG
1348 }
1349
5467ee52 1350 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1351 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1352 new_temp = make_ssa_name (new_var, init_stmt);
1353 gimple_assign_set_lhs (init_stmt, new_temp);
1354 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1355 vec_oprnd = gimple_assign_lhs (init_stmt);
1356 return vec_oprnd;
1357}
1358
a70d6342 1359
ebfd146a
IR
1360/* Function vect_get_vec_def_for_operand.
1361
ff802fa1 1362 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1363 used in the vectorized stmt for STMT.
1364
1365 In the case that OP is an SSA_NAME which is defined in the loop, then
1366 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1367
1368 In case OP is an invariant or constant, a new stmt that creates a vector def
1369 needs to be introduced. */
1370
1371tree
1372vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1373{
1374 tree vec_oprnd;
1375 gimple vec_stmt;
1376 gimple def_stmt;
1377 stmt_vec_info def_stmt_info = NULL;
1378 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1379 unsigned int nunits;
ebfd146a 1380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1381 tree def;
ebfd146a
IR
1382 enum vect_def_type dt;
1383 bool is_simple_use;
1384 tree vector_type;
1385
73fbfcad 1386 if (dump_enabled_p ())
ebfd146a 1387 {
78c60e3d
SS
1388 dump_printf_loc (MSG_NOTE, vect_location,
1389 "vect_get_vec_def_for_operand: ");
1390 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1391 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1392 }
1393
24ee1384
IR
1394 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1395 &def_stmt, &def, &dt);
ebfd146a 1396 gcc_assert (is_simple_use);
73fbfcad 1397 if (dump_enabled_p ())
ebfd146a 1398 {
78c60e3d 1399 int loc_printed = 0;
ebfd146a
IR
1400 if (def)
1401 {
78c60e3d
SS
1402 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1403 loc_printed = 1;
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1405 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1406 }
1407 if (def_stmt)
1408 {
78c60e3d
SS
1409 if (loc_printed)
1410 dump_printf (MSG_NOTE, " def_stmt = ");
1411 else
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1414 }
1415 }
1416
1417 switch (dt)
1418 {
1419 /* Case 1: operand is a constant. */
1420 case vect_constant_def:
1421 {
7569a6cc
RG
1422 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1423 gcc_assert (vector_type);
9dc3f7de 1424 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1425
b8698a0f 1426 if (scalar_def)
ebfd146a
IR
1427 *scalar_def = op;
1428
1429 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1430 if (dump_enabled_p ())
78c60e3d 1431 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1432 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1433
418b7df3 1434 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1435 }
1436
1437 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1438 case vect_external_def:
ebfd146a
IR
1439 {
1440 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1441 gcc_assert (vector_type);
ebfd146a 1442
b8698a0f 1443 if (scalar_def)
ebfd146a
IR
1444 *scalar_def = def;
1445
1446 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1447 if (dump_enabled_p ())
e645e942 1448 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1449
418b7df3 1450 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1451 }
1452
1453 /* Case 3: operand is defined inside the loop. */
8644a673 1454 case vect_internal_def:
ebfd146a 1455 {
b8698a0f 1456 if (scalar_def)
ebfd146a
IR
1457 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1458
1459 /* Get the def from the vectorized stmt. */
1460 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1461
ebfd146a 1462 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1463 /* Get vectorized pattern statement. */
1464 if (!vec_stmt
1465 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1466 && !STMT_VINFO_RELEVANT (def_stmt_info))
1467 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1468 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1469 gcc_assert (vec_stmt);
1470 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1471 vec_oprnd = PHI_RESULT (vec_stmt);
1472 else if (is_gimple_call (vec_stmt))
1473 vec_oprnd = gimple_call_lhs (vec_stmt);
1474 else
1475 vec_oprnd = gimple_assign_lhs (vec_stmt);
1476 return vec_oprnd;
1477 }
1478
1479 /* Case 4: operand is defined by a loop header phi - reduction */
1480 case vect_reduction_def:
06066f92 1481 case vect_double_reduction_def:
7c5222ff 1482 case vect_nested_cycle:
ebfd146a
IR
1483 {
1484 struct loop *loop;
1485
1486 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1487 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1488
1489 /* Get the def before the loop */
1490 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1491 return get_initial_def_for_reduction (stmt, op, scalar_def);
1492 }
1493
1494 /* Case 5: operand is defined by loop-header phi - induction. */
1495 case vect_induction_def:
1496 {
1497 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1498
1499 /* Get the def from the vectorized stmt. */
1500 def_stmt_info = vinfo_for_stmt (def_stmt);
1501 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1502 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1503 vec_oprnd = PHI_RESULT (vec_stmt);
1504 else
1505 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1506 return vec_oprnd;
1507 }
1508
1509 default:
1510 gcc_unreachable ();
1511 }
1512}
1513
1514
1515/* Function vect_get_vec_def_for_stmt_copy
1516
ff802fa1 1517 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1518 vectorized stmt to be created (by the caller to this function) is a "copy"
1519 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1520 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1521 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1522 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1523 DT is the type of the vector def VEC_OPRND.
1524
1525 Context:
1526 In case the vectorization factor (VF) is bigger than the number
1527 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1528 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1529 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1530 smallest data-type determines the VF, and as a result, when vectorizing
1531 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1532 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1533 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1534 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1535 which VF=16 and nunits=4, so the number of copies required is 4):
1536
1537 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1538
ebfd146a
IR
1539 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1540 VS1.1: vx.1 = memref1 VS1.2
1541 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1542 VS1.3: vx.3 = memref3
ebfd146a
IR
1543
1544 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1545 VSnew.1: vz1 = vx.1 + ... VSnew.2
1546 VSnew.2: vz2 = vx.2 + ... VSnew.3
1547 VSnew.3: vz3 = vx.3 + ...
1548
1549 The vectorization of S1 is explained in vectorizable_load.
1550 The vectorization of S2:
b8698a0f
L
1551 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1552 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1553 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1554 returns the vector-def 'vx.0'.
1555
b8698a0f
L
1556 To create the remaining copies of the vector-stmt (VSnew.j), this
1557 function is called to get the relevant vector-def for each operand. It is
1558 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1559 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1560
b8698a0f
L
1561 For example, to obtain the vector-def 'vx.1' in order to create the
1562 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1563 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1564 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1565 and return its def ('vx.1').
1566 Overall, to create the above sequence this function will be called 3 times:
1567 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1568 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1569 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1570
1571tree
1572vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1573{
1574 gimple vec_stmt_for_operand;
1575 stmt_vec_info def_stmt_info;
1576
1577 /* Do nothing; can reuse same def. */
8644a673 1578 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1579 return vec_oprnd;
1580
1581 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1582 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1583 gcc_assert (def_stmt_info);
1584 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1585 gcc_assert (vec_stmt_for_operand);
1586 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1587 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1588 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1589 else
1590 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1591 return vec_oprnd;
1592}
1593
1594
1595/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1596 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1597
1598static void
b8698a0f 1599vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1600 vec<tree> *vec_oprnds0,
1601 vec<tree> *vec_oprnds1)
ebfd146a 1602{
9771b263 1603 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1604
1605 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1606 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1607
9771b263 1608 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1609 {
9771b263 1610 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1611 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1612 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1613 }
1614}
1615
1616
d092494c
IR
1617/* Get vectorized definitions for OP0 and OP1.
1618 REDUC_INDEX is the index of reduction operand in case of reduction,
1619 and -1 otherwise. */
ebfd146a 1620
d092494c 1621void
ebfd146a 1622vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1623 vec<tree> *vec_oprnds0,
1624 vec<tree> *vec_oprnds1,
d092494c 1625 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1626{
1627 if (slp_node)
d092494c
IR
1628 {
1629 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1630 auto_vec<tree> ops (nops);
1631 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1632
9771b263 1633 ops.quick_push (op0);
d092494c 1634 if (op1)
9771b263 1635 ops.quick_push (op1);
d092494c
IR
1636
1637 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1638
37b5ec8f 1639 *vec_oprnds0 = vec_defs[0];
d092494c 1640 if (op1)
37b5ec8f 1641 *vec_oprnds1 = vec_defs[1];
d092494c 1642 }
ebfd146a
IR
1643 else
1644 {
1645 tree vec_oprnd;
1646
9771b263 1647 vec_oprnds0->create (1);
b8698a0f 1648 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1649 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1650
1651 if (op1)
1652 {
9771b263 1653 vec_oprnds1->create (1);
b8698a0f 1654 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1655 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1656 }
1657 }
1658}
1659
1660
1661/* Function vect_finish_stmt_generation.
1662
1663 Insert a new stmt. */
1664
1665void
1666vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1667 gimple_stmt_iterator *gsi)
1668{
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1671 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1672
1673 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1674
54e8e2c3
RG
1675 if (!gsi_end_p (*gsi)
1676 && gimple_has_mem_ops (vec_stmt))
1677 {
1678 gimple at_stmt = gsi_stmt (*gsi);
1679 tree vuse = gimple_vuse (at_stmt);
1680 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1681 {
1682 tree vdef = gimple_vdef (at_stmt);
1683 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1684 /* If we have an SSA vuse and insert a store, update virtual
1685 SSA form to avoid triggering the renamer. Do so only
1686 if we can easily see all uses - which is what almost always
1687 happens with the way vectorized stmts are inserted. */
1688 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1689 && ((is_gimple_assign (vec_stmt)
1690 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1691 || (is_gimple_call (vec_stmt)
1692 && !(gimple_call_flags (vec_stmt)
1693 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1694 {
1695 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1696 gimple_set_vdef (vec_stmt, new_vdef);
1697 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1698 }
1699 }
1700 }
ebfd146a
IR
1701 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1702
b8698a0f 1703 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1704 bb_vinfo));
ebfd146a 1705
73fbfcad 1706 if (dump_enabled_p ())
ebfd146a 1707 {
78c60e3d
SS
1708 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1709 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1710 }
1711
ad885386 1712 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1713
1714 /* While EH edges will generally prevent vectorization, stmt might
1715 e.g. be in a must-not-throw region. Ensure newly created stmts
1716 that could throw are part of the same region. */
1717 int lp_nr = lookup_stmt_eh_lp (stmt);
1718 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1719 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1720}
1721
1722/* Checks if CALL can be vectorized in type VECTYPE. Returns
1723 a function declaration if the target has a vectorized version
1724 of the function, or NULL_TREE if the function cannot be vectorized. */
1725
1726tree
538dd0b7 1727vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
ebfd146a
IR
1728{
1729 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1730
1731 /* We only handle functions that do not read or clobber memory -- i.e.
1732 const or novops ones. */
1733 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1734 return NULL_TREE;
1735
1736 if (!fndecl
1737 || TREE_CODE (fndecl) != FUNCTION_DECL
1738 || !DECL_BUILT_IN (fndecl))
1739 return NULL_TREE;
1740
62f7fd21 1741 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1742 vectype_in);
1743}
1744
5ce9450f
JJ
1745
1746static tree permute_vec_elements (tree, tree, tree, gimple,
1747 gimple_stmt_iterator *);
1748
1749
1750/* Function vectorizable_mask_load_store.
1751
1752 Check if STMT performs a conditional load or store that can be vectorized.
1753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1754 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1755 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1756
1757static bool
1758vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1759 gimple *vec_stmt, slp_tree slp_node)
1760{
1761 tree vec_dest = NULL;
1762 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1763 stmt_vec_info prev_stmt_info;
1764 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1765 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1766 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1767 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1768 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1769 tree elem_type;
1770 gimple new_stmt;
1771 tree dummy;
1772 tree dataref_ptr = NULL_TREE;
1773 gimple ptr_incr;
1774 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1775 int ncopies;
1776 int i, j;
1777 bool inv_p;
1778 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1779 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1780 int gather_scale = 1;
1781 enum vect_def_type gather_dt = vect_unknown_def_type;
1782 bool is_store;
1783 tree mask;
1784 gimple def_stmt;
1785 tree def;
1786 enum vect_def_type dt;
1787
1788 if (slp_node != NULL)
1789 return false;
1790
1791 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1792 gcc_assert (ncopies >= 1);
1793
1794 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1795 mask = gimple_call_arg (stmt, 2);
1796 if (TYPE_PRECISION (TREE_TYPE (mask))
1797 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1798 return false;
1799
1800 /* FORNOW. This restriction should be relaxed. */
1801 if (nested_in_vect_loop && ncopies > 1)
1802 {
1803 if (dump_enabled_p ())
1804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1805 "multiple types in nested loop.");
1806 return false;
1807 }
1808
1809 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1810 return false;
1811
1812 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1813 return false;
1814
1815 if (!STMT_VINFO_DATA_REF (stmt_info))
1816 return false;
1817
1818 elem_type = TREE_TYPE (vectype);
1819
1820 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1821 return false;
1822
1823 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1824 return false;
1825
1826 if (STMT_VINFO_GATHER_P (stmt_info))
1827 {
1828 gimple def_stmt;
1829 tree def;
1830 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1831 &gather_off, &gather_scale);
1832 gcc_assert (gather_decl);
1833 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1834 &def_stmt, &def, &gather_dt,
1835 &gather_off_vectype))
1836 {
1837 if (dump_enabled_p ())
1838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1839 "gather index use not simple.");
1840 return false;
1841 }
03b9e8e4
JJ
1842
1843 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1844 tree masktype
1845 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1846 if (TREE_CODE (masktype) == INTEGER_TYPE)
1847 {
1848 if (dump_enabled_p ())
1849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1850 "masked gather with integer mask not supported.");
1851 return false;
1852 }
5ce9450f
JJ
1853 }
1854 else if (tree_int_cst_compare (nested_in_vect_loop
1855 ? STMT_VINFO_DR_STEP (stmt_info)
1856 : DR_STEP (dr), size_zero_node) <= 0)
1857 return false;
1858 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1859 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1860 return false;
1861
1862 if (TREE_CODE (mask) != SSA_NAME)
1863 return false;
1864
1865 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1866 &def_stmt, &def, &dt))
1867 return false;
1868
1869 if (is_store)
1870 {
1871 tree rhs = gimple_call_arg (stmt, 3);
1872 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1873 &def_stmt, &def, &dt))
1874 return false;
1875 }
1876
1877 if (!vec_stmt) /* transformation not required. */
1878 {
1879 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1880 if (is_store)
1881 vect_model_store_cost (stmt_info, ncopies, false, dt,
1882 NULL, NULL, NULL);
1883 else
1884 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1885 return true;
1886 }
1887
1888 /** Transform. **/
1889
1890 if (STMT_VINFO_GATHER_P (stmt_info))
1891 {
1892 tree vec_oprnd0 = NULL_TREE, op;
1893 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1894 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1895 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1896 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1897 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1898 edge pe = loop_preheader_edge (loop);
1899 gimple_seq seq;
1900 basic_block new_bb;
1901 enum { NARROW, NONE, WIDEN } modifier;
1902 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1903
acdcd61b
JJ
1904 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1905 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1906 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1907 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1908 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1909 scaletype = TREE_VALUE (arglist);
1910 gcc_checking_assert (types_compatible_p (srctype, rettype)
1911 && types_compatible_p (srctype, masktype));
1912
5ce9450f
JJ
1913 if (nunits == gather_off_nunits)
1914 modifier = NONE;
1915 else if (nunits == gather_off_nunits / 2)
1916 {
1917 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1918 modifier = WIDEN;
1919
1920 for (i = 0; i < gather_off_nunits; ++i)
1921 sel[i] = i | nunits;
1922
557be5a8 1923 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1924 }
1925 else if (nunits == gather_off_nunits * 2)
1926 {
1927 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1928 modifier = NARROW;
1929
1930 for (i = 0; i < nunits; ++i)
1931 sel[i] = i < gather_off_nunits
1932 ? i : i + nunits - gather_off_nunits;
1933
557be5a8 1934 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1935 ncopies *= 2;
acdcd61b
JJ
1936 for (i = 0; i < nunits; ++i)
1937 sel[i] = i | gather_off_nunits;
557be5a8 1938 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1939 }
1940 else
1941 gcc_unreachable ();
1942
5ce9450f
JJ
1943 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1944
1945 ptr = fold_convert (ptrtype, gather_base);
1946 if (!is_gimple_min_invariant (ptr))
1947 {
1948 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1949 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1950 gcc_assert (!new_bb);
1951 }
1952
1953 scale = build_int_cst (scaletype, gather_scale);
1954
1955 prev_stmt_info = NULL;
1956 for (j = 0; j < ncopies; ++j)
1957 {
1958 if (modifier == WIDEN && (j & 1))
1959 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1960 perm_mask, stmt, gsi);
1961 else if (j == 0)
1962 op = vec_oprnd0
1963 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1964 else
1965 op = vec_oprnd0
1966 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1967
1968 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1969 {
1970 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1971 == TYPE_VECTOR_SUBPARTS (idxtype));
1972 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 1973 var = make_ssa_name (var);
5ce9450f
JJ
1974 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1975 new_stmt
0d0e4a03 1976 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1978 op = var;
1979 }
1980
acdcd61b
JJ
1981 if (mask_perm_mask && (j & 1))
1982 mask_op = permute_vec_elements (mask_op, mask_op,
1983 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1984 else
1985 {
acdcd61b
JJ
1986 if (j == 0)
1987 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1988 else
1989 {
1990 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1991 &def_stmt, &def, &dt);
1992 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1993 }
5ce9450f 1994
acdcd61b
JJ
1995 mask_op = vec_mask;
1996 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1997 {
1998 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1999 == TYPE_VECTOR_SUBPARTS (masktype));
2000 var = vect_get_new_vect_var (masktype, vect_simple_var,
2001 NULL);
b731b390 2002 var = make_ssa_name (var);
acdcd61b
JJ
2003 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2004 new_stmt
0d0e4a03 2005 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2007 mask_op = var;
2008 }
5ce9450f
JJ
2009 }
2010
2011 new_stmt
2012 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2013 scale);
2014
2015 if (!useless_type_conversion_p (vectype, rettype))
2016 {
2017 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2018 == TYPE_VECTOR_SUBPARTS (rettype));
2019 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2020 op = make_ssa_name (var, new_stmt);
2021 gimple_call_set_lhs (new_stmt, op);
2022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2023 var = make_ssa_name (vec_dest);
5ce9450f 2024 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2025 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2026 }
2027 else
2028 {
2029 var = make_ssa_name (vec_dest, new_stmt);
2030 gimple_call_set_lhs (new_stmt, var);
2031 }
2032
2033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2034
2035 if (modifier == NARROW)
2036 {
2037 if ((j & 1) == 0)
2038 {
2039 prev_res = var;
2040 continue;
2041 }
2042 var = permute_vec_elements (prev_res, var,
2043 perm_mask, stmt, gsi);
2044 new_stmt = SSA_NAME_DEF_STMT (var);
2045 }
2046
2047 if (prev_stmt_info == NULL)
2048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2049 else
2050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2051 prev_stmt_info = vinfo_for_stmt (new_stmt);
2052 }
3efe2e2c
JJ
2053
2054 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2055 from the IL. */
2056 tree lhs = gimple_call_lhs (stmt);
2057 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2058 set_vinfo_for_stmt (new_stmt, stmt_info);
2059 set_vinfo_for_stmt (stmt, NULL);
2060 STMT_VINFO_STMT (stmt_info) = new_stmt;
2061 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2062 return true;
2063 }
2064 else if (is_store)
2065 {
2066 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2067 prev_stmt_info = NULL;
2068 for (i = 0; i < ncopies; i++)
2069 {
2070 unsigned align, misalign;
2071
2072 if (i == 0)
2073 {
2074 tree rhs = gimple_call_arg (stmt, 3);
2075 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2076 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2077 /* We should have catched mismatched types earlier. */
2078 gcc_assert (useless_type_conversion_p (vectype,
2079 TREE_TYPE (vec_rhs)));
2080 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2081 NULL_TREE, &dummy, gsi,
2082 &ptr_incr, false, &inv_p);
2083 gcc_assert (!inv_p);
2084 }
2085 else
2086 {
2087 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2088 &def, &dt);
2089 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2090 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2091 &def, &dt);
2092 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2093 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2094 TYPE_SIZE_UNIT (vectype));
2095 }
2096
2097 align = TYPE_ALIGN_UNIT (vectype);
2098 if (aligned_access_p (dr))
2099 misalign = 0;
2100 else if (DR_MISALIGNMENT (dr) == -1)
2101 {
2102 align = TYPE_ALIGN_UNIT (elem_type);
2103 misalign = 0;
2104 }
2105 else
2106 misalign = DR_MISALIGNMENT (dr);
2107 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2108 misalign);
2109 new_stmt
2110 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2111 gimple_call_arg (stmt, 1),
2112 vec_mask, vec_rhs);
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114 if (i == 0)
2115 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2116 else
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2118 prev_stmt_info = vinfo_for_stmt (new_stmt);
2119 }
2120 }
2121 else
2122 {
2123 tree vec_mask = NULL_TREE;
2124 prev_stmt_info = NULL;
2125 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2126 for (i = 0; i < ncopies; i++)
2127 {
2128 unsigned align, misalign;
2129
2130 if (i == 0)
2131 {
2132 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2133 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2134 NULL_TREE, &dummy, gsi,
2135 &ptr_incr, false, &inv_p);
2136 gcc_assert (!inv_p);
2137 }
2138 else
2139 {
2140 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2141 &def, &dt);
2142 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2143 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2144 TYPE_SIZE_UNIT (vectype));
2145 }
2146
2147 align = TYPE_ALIGN_UNIT (vectype);
2148 if (aligned_access_p (dr))
2149 misalign = 0;
2150 else if (DR_MISALIGNMENT (dr) == -1)
2151 {
2152 align = TYPE_ALIGN_UNIT (elem_type);
2153 misalign = 0;
2154 }
2155 else
2156 misalign = DR_MISALIGNMENT (dr);
2157 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2158 misalign);
2159 new_stmt
2160 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2161 gimple_call_arg (stmt, 1),
2162 vec_mask);
b731b390 2163 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2165 if (i == 0)
2166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2167 else
2168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2169 prev_stmt_info = vinfo_for_stmt (new_stmt);
2170 }
2171 }
2172
3efe2e2c
JJ
2173 if (!is_store)
2174 {
2175 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2176 from the IL. */
2177 tree lhs = gimple_call_lhs (stmt);
2178 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2179 set_vinfo_for_stmt (new_stmt, stmt_info);
2180 set_vinfo_for_stmt (stmt, NULL);
2181 STMT_VINFO_STMT (stmt_info) = new_stmt;
2182 gsi_replace (gsi, new_stmt, true);
2183 }
2184
5ce9450f
JJ
2185 return true;
2186}
2187
2188
ebfd146a
IR
2189/* Function vectorizable_call.
2190
538dd0b7 2191 Check if GS performs a function call that can be vectorized.
b8698a0f 2192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2195
2196static bool
538dd0b7 2197vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
190c2236 2198 slp_tree slp_node)
ebfd146a 2199{
538dd0b7 2200 gcall *stmt;
ebfd146a
IR
2201 tree vec_dest;
2202 tree scalar_dest;
2203 tree op, type;
2204 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2205 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2206 tree vectype_out, vectype_in;
2207 int nunits_in;
2208 int nunits_out;
2209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2210 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2211 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2212 gimple def_stmt;
0502fb85
UB
2213 enum vect_def_type dt[3]
2214 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2215 gimple new_stmt = NULL;
ebfd146a 2216 int ncopies, j;
6e1aa848 2217 vec<tree> vargs = vNULL;
ebfd146a
IR
2218 enum { NARROW, NONE, WIDEN } modifier;
2219 size_t i, nargs;
9d5e7640 2220 tree lhs;
ebfd146a 2221
190c2236 2222 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2223 return false;
2224
8644a673 2225 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2226 return false;
2227
538dd0b7
DM
2228 /* Is GS a vectorizable call? */
2229 stmt = dyn_cast <gcall *> (gs);
2230 if (!stmt)
ebfd146a
IR
2231 return false;
2232
5ce9450f
JJ
2233 if (gimple_call_internal_p (stmt)
2234 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2235 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2236 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2237 slp_node);
2238
0136f8f0
AH
2239 if (gimple_call_lhs (stmt) == NULL_TREE
2240 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2241 return false;
2242
0136f8f0 2243 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2244
b690cc0f
RG
2245 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2246
ebfd146a
IR
2247 /* Process function arguments. */
2248 rhs_type = NULL_TREE;
b690cc0f 2249 vectype_in = NULL_TREE;
ebfd146a
IR
2250 nargs = gimple_call_num_args (stmt);
2251
1b1562a5
MM
2252 /* Bail out if the function has more than three arguments, we do not have
2253 interesting builtin functions to vectorize with more than two arguments
2254 except for fma. No arguments is also not good. */
2255 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2256 return false;
2257
74bf76ed
JJ
2258 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2259 if (gimple_call_internal_p (stmt)
2260 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2261 {
2262 nargs = 0;
2263 rhs_type = unsigned_type_node;
2264 }
2265
ebfd146a
IR
2266 for (i = 0; i < nargs; i++)
2267 {
b690cc0f
RG
2268 tree opvectype;
2269
ebfd146a
IR
2270 op = gimple_call_arg (stmt, i);
2271
2272 /* We can only handle calls with arguments of the same type. */
2273 if (rhs_type
8533c9d8 2274 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2275 {
73fbfcad 2276 if (dump_enabled_p ())
78c60e3d 2277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2278 "argument types differ.\n");
ebfd146a
IR
2279 return false;
2280 }
b690cc0f
RG
2281 if (!rhs_type)
2282 rhs_type = TREE_TYPE (op);
ebfd146a 2283
24ee1384 2284 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2285 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2286 {
73fbfcad 2287 if (dump_enabled_p ())
78c60e3d 2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2289 "use not simple.\n");
ebfd146a
IR
2290 return false;
2291 }
ebfd146a 2292
b690cc0f
RG
2293 if (!vectype_in)
2294 vectype_in = opvectype;
2295 else if (opvectype
2296 && opvectype != vectype_in)
2297 {
73fbfcad 2298 if (dump_enabled_p ())
78c60e3d 2299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2300 "argument vector types differ.\n");
b690cc0f
RG
2301 return false;
2302 }
2303 }
2304 /* If all arguments are external or constant defs use a vector type with
2305 the same size as the output vector type. */
ebfd146a 2306 if (!vectype_in)
b690cc0f 2307 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2308 if (vec_stmt)
2309 gcc_assert (vectype_in);
2310 if (!vectype_in)
2311 {
73fbfcad 2312 if (dump_enabled_p ())
7d8930a0 2313 {
78c60e3d
SS
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "no vectype for scalar type ");
2316 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2317 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2318 }
2319
2320 return false;
2321 }
ebfd146a
IR
2322
2323 /* FORNOW */
b690cc0f
RG
2324 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2326 if (nunits_in == nunits_out / 2)
2327 modifier = NARROW;
2328 else if (nunits_out == nunits_in)
2329 modifier = NONE;
2330 else if (nunits_out == nunits_in / 2)
2331 modifier = WIDEN;
2332 else
2333 return false;
2334
2335 /* For now, we only vectorize functions if a target specific builtin
2336 is available. TODO -- in some cases, it might be profitable to
2337 insert the calls for pieces of the vector, in order to be able
2338 to vectorize other operations in the loop. */
2339 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2340 if (fndecl == NULL_TREE)
2341 {
74bf76ed
JJ
2342 if (gimple_call_internal_p (stmt)
2343 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2344 && !slp_node
2345 && loop_vinfo
2346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2347 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2350 {
2351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2352 { 0, 1, 2, ... vf - 1 } vector. */
2353 gcc_assert (nargs == 0);
2354 }
2355 else
2356 {
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2359 "function is not vectorizable.\n");
74bf76ed
JJ
2360 return false;
2361 }
ebfd146a
IR
2362 }
2363
5006671f 2364 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2365
190c2236
JJ
2366 if (slp_node || PURE_SLP_STMT (stmt_info))
2367 ncopies = 1;
2368 else if (modifier == NARROW)
ebfd146a
IR
2369 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2370 else
2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2372
2373 /* Sanity check: make sure that at least one copy of the vectorized stmt
2374 needs to be generated. */
2375 gcc_assert (ncopies >= 1);
2376
2377 if (!vec_stmt) /* transformation not required. */
2378 {
2379 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2380 if (dump_enabled_p ())
e645e942
TJ
2381 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2382 "\n");
c3e7ee41 2383 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2384 return true;
2385 }
2386
2387 /** Transform. **/
2388
73fbfcad 2389 if (dump_enabled_p ())
e645e942 2390 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2391
2392 /* Handle def. */
2393 scalar_dest = gimple_call_lhs (stmt);
2394 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2395
2396 prev_stmt_info = NULL;
2397 switch (modifier)
2398 {
2399 case NONE:
2400 for (j = 0; j < ncopies; ++j)
2401 {
2402 /* Build argument list for the vectorized call. */
2403 if (j == 0)
9771b263 2404 vargs.create (nargs);
ebfd146a 2405 else
9771b263 2406 vargs.truncate (0);
ebfd146a 2407
190c2236
JJ
2408 if (slp_node)
2409 {
ef062b13 2410 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2411 vec<tree> vec_oprnds0;
190c2236
JJ
2412
2413 for (i = 0; i < nargs; i++)
9771b263 2414 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2415 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2416 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2417
2418 /* Arguments are ready. Create the new vector stmt. */
9771b263 2419 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2420 {
2421 size_t k;
2422 for (k = 0; k < nargs; k++)
2423 {
37b5ec8f 2424 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2425 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2426 }
2427 new_stmt = gimple_build_call_vec (fndecl, vargs);
2428 new_temp = make_ssa_name (vec_dest, new_stmt);
2429 gimple_call_set_lhs (new_stmt, new_temp);
2430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2431 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2432 }
2433
2434 for (i = 0; i < nargs; i++)
2435 {
37b5ec8f 2436 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2437 vec_oprndsi.release ();
190c2236 2438 }
190c2236
JJ
2439 continue;
2440 }
2441
ebfd146a
IR
2442 for (i = 0; i < nargs; i++)
2443 {
2444 op = gimple_call_arg (stmt, i);
2445 if (j == 0)
2446 vec_oprnd0
2447 = vect_get_vec_def_for_operand (op, stmt, NULL);
2448 else
63827fb8
IR
2449 {
2450 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2451 vec_oprnd0
2452 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2453 }
ebfd146a 2454
9771b263 2455 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2456 }
2457
74bf76ed
JJ
2458 if (gimple_call_internal_p (stmt)
2459 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2460 {
2461 tree *v = XALLOCAVEC (tree, nunits_out);
2462 int k;
2463 for (k = 0; k < nunits_out; ++k)
2464 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2465 tree cst = build_vector (vectype_out, v);
2466 tree new_var
2467 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2468 gimple init_stmt = gimple_build_assign (new_var, cst);
2469 new_temp = make_ssa_name (new_var, init_stmt);
2470 gimple_assign_set_lhs (init_stmt, new_temp);
2471 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2472 new_temp = make_ssa_name (vec_dest);
74bf76ed
JJ
2473 new_stmt = gimple_build_assign (new_temp,
2474 gimple_assign_lhs (init_stmt));
2475 }
2476 else
2477 {
2478 new_stmt = gimple_build_call_vec (fndecl, vargs);
2479 new_temp = make_ssa_name (vec_dest, new_stmt);
2480 gimple_call_set_lhs (new_stmt, new_temp);
2481 }
ebfd146a
IR
2482 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2483
2484 if (j == 0)
2485 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2486 else
2487 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2488
2489 prev_stmt_info = vinfo_for_stmt (new_stmt);
2490 }
2491
2492 break;
2493
2494 case NARROW:
2495 for (j = 0; j < ncopies; ++j)
2496 {
2497 /* Build argument list for the vectorized call. */
2498 if (j == 0)
9771b263 2499 vargs.create (nargs * 2);
ebfd146a 2500 else
9771b263 2501 vargs.truncate (0);
ebfd146a 2502
190c2236
JJ
2503 if (slp_node)
2504 {
ef062b13 2505 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2506 vec<tree> vec_oprnds0;
190c2236
JJ
2507
2508 for (i = 0; i < nargs; i++)
9771b263 2509 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2510 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2511 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2512
2513 /* Arguments are ready. Create the new vector stmt. */
9771b263 2514 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2515 {
2516 size_t k;
9771b263 2517 vargs.truncate (0);
190c2236
JJ
2518 for (k = 0; k < nargs; k++)
2519 {
37b5ec8f 2520 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2521 vargs.quick_push (vec_oprndsk[i]);
2522 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2523 }
2524 new_stmt = gimple_build_call_vec (fndecl, vargs);
2525 new_temp = make_ssa_name (vec_dest, new_stmt);
2526 gimple_call_set_lhs (new_stmt, new_temp);
2527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2529 }
2530
2531 for (i = 0; i < nargs; i++)
2532 {
37b5ec8f 2533 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2534 vec_oprndsi.release ();
190c2236 2535 }
190c2236
JJ
2536 continue;
2537 }
2538
ebfd146a
IR
2539 for (i = 0; i < nargs; i++)
2540 {
2541 op = gimple_call_arg (stmt, i);
2542 if (j == 0)
2543 {
2544 vec_oprnd0
2545 = vect_get_vec_def_for_operand (op, stmt, NULL);
2546 vec_oprnd1
63827fb8 2547 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2548 }
2549 else
2550 {
336ecb65 2551 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2552 vec_oprnd0
63827fb8 2553 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2554 vec_oprnd1
63827fb8 2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2556 }
2557
9771b263
DN
2558 vargs.quick_push (vec_oprnd0);
2559 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2560 }
2561
2562 new_stmt = gimple_build_call_vec (fndecl, vargs);
2563 new_temp = make_ssa_name (vec_dest, new_stmt);
2564 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2565 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2566
2567 if (j == 0)
2568 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2569 else
2570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2571
2572 prev_stmt_info = vinfo_for_stmt (new_stmt);
2573 }
2574
2575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2576
2577 break;
2578
2579 case WIDEN:
2580 /* No current target implements this case. */
2581 return false;
2582 }
2583
9771b263 2584 vargs.release ();
ebfd146a 2585
ebfd146a
IR
2586 /* The call in STMT might prevent it from being removed in dce.
2587 We however cannot remove it here, due to the way the ssa name
2588 it defines is mapped to the new definition. So just replace
2589 rhs of the statement with something harmless. */
2590
dd34c087
JJ
2591 if (slp_node)
2592 return true;
2593
ebfd146a 2594 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2595 if (is_pattern_stmt_p (stmt_info))
2596 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2597 else
2598 lhs = gimple_call_lhs (stmt);
2599 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2600 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2601 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2602 STMT_VINFO_STMT (stmt_info) = new_stmt;
2603 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2604
2605 return true;
2606}
2607
2608
0136f8f0
AH
2609struct simd_call_arg_info
2610{
2611 tree vectype;
2612 tree op;
2613 enum vect_def_type dt;
2614 HOST_WIDE_INT linear_step;
2615 unsigned int align;
2616};
2617
2618/* Function vectorizable_simd_clone_call.
2619
2620 Check if STMT performs a function call that can be vectorized
2621 by calling a simd clone of the function.
2622 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2623 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2624 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2625
2626static bool
2627vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2628 gimple *vec_stmt, slp_tree slp_node)
2629{
2630 tree vec_dest;
2631 tree scalar_dest;
2632 tree op, type;
2633 tree vec_oprnd0 = NULL_TREE;
2634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2635 tree vectype;
2636 unsigned int nunits;
2637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2638 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2639 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2640 tree fndecl, new_temp, def;
2641 gimple def_stmt;
2642 gimple new_stmt = NULL;
2643 int ncopies, j;
2644 vec<simd_call_arg_info> arginfo = vNULL;
2645 vec<tree> vargs = vNULL;
2646 size_t i, nargs;
2647 tree lhs, rtype, ratype;
2648 vec<constructor_elt, va_gc> *ret_ctor_elts;
2649
2650 /* Is STMT a vectorizable call? */
2651 if (!is_gimple_call (stmt))
2652 return false;
2653
2654 fndecl = gimple_call_fndecl (stmt);
2655 if (fndecl == NULL_TREE)
2656 return false;
2657
d52f5295 2658 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2659 if (node == NULL || node->simd_clones == NULL)
2660 return false;
2661
2662 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2663 return false;
2664
2665 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2666 return false;
2667
2668 if (gimple_call_lhs (stmt)
2669 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2670 return false;
2671
2672 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2673
2674 vectype = STMT_VINFO_VECTYPE (stmt_info);
2675
2676 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2677 return false;
2678
2679 /* FORNOW */
2680 if (slp_node || PURE_SLP_STMT (stmt_info))
2681 return false;
2682
2683 /* Process function arguments. */
2684 nargs = gimple_call_num_args (stmt);
2685
2686 /* Bail out if the function has zero arguments. */
2687 if (nargs == 0)
2688 return false;
2689
2690 arginfo.create (nargs);
2691
2692 for (i = 0; i < nargs; i++)
2693 {
2694 simd_call_arg_info thisarginfo;
2695 affine_iv iv;
2696
2697 thisarginfo.linear_step = 0;
2698 thisarginfo.align = 0;
2699 thisarginfo.op = NULL_TREE;
2700
2701 op = gimple_call_arg (stmt, i);
2702 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2703 &def_stmt, &def, &thisarginfo.dt,
2704 &thisarginfo.vectype)
2705 || thisarginfo.dt == vect_uninitialized_def)
2706 {
2707 if (dump_enabled_p ())
2708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2709 "use not simple.\n");
2710 arginfo.release ();
2711 return false;
2712 }
2713
2714 if (thisarginfo.dt == vect_constant_def
2715 || thisarginfo.dt == vect_external_def)
2716 gcc_assert (thisarginfo.vectype == NULL_TREE);
2717 else
2718 gcc_assert (thisarginfo.vectype != NULL_TREE);
2719
6c9e85fb
JJ
2720 /* For linear arguments, the analyze phase should have saved
2721 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2722 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2723 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2724 {
2725 gcc_assert (vec_stmt);
2726 thisarginfo.linear_step
2727 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2728 thisarginfo.op
2729 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2730 /* If loop has been peeled for alignment, we need to adjust it. */
2731 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2732 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2733 if (n1 != n2)
2734 {
2735 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2736 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2737 tree opt = TREE_TYPE (thisarginfo.op);
2738 bias = fold_convert (TREE_TYPE (step), bias);
2739 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2740 thisarginfo.op
2741 = fold_build2 (POINTER_TYPE_P (opt)
2742 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2743 thisarginfo.op, bias);
2744 }
2745 }
2746 else if (!vec_stmt
2747 && thisarginfo.dt != vect_constant_def
2748 && thisarginfo.dt != vect_external_def
2749 && loop_vinfo
2750 && TREE_CODE (op) == SSA_NAME
2751 && simple_iv (loop, loop_containing_stmt (stmt), op,
2752 &iv, false)
2753 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2754 {
2755 thisarginfo.linear_step = tree_to_shwi (iv.step);
2756 thisarginfo.op = iv.base;
2757 }
2758 else if ((thisarginfo.dt == vect_constant_def
2759 || thisarginfo.dt == vect_external_def)
2760 && POINTER_TYPE_P (TREE_TYPE (op)))
2761 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2762
2763 arginfo.quick_push (thisarginfo);
2764 }
2765
2766 unsigned int badness = 0;
2767 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2768 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2769 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2770 else
2771 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2772 n = n->simdclone->next_clone)
2773 {
2774 unsigned int this_badness = 0;
2775 if (n->simdclone->simdlen
2776 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2777 || n->simdclone->nargs != nargs)
2778 continue;
2779 if (n->simdclone->simdlen
2780 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2781 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2782 - exact_log2 (n->simdclone->simdlen)) * 1024;
2783 if (n->simdclone->inbranch)
2784 this_badness += 2048;
2785 int target_badness = targetm.simd_clone.usable (n);
2786 if (target_badness < 0)
2787 continue;
2788 this_badness += target_badness * 512;
2789 /* FORNOW: Have to add code to add the mask argument. */
2790 if (n->simdclone->inbranch)
2791 continue;
2792 for (i = 0; i < nargs; i++)
2793 {
2794 switch (n->simdclone->args[i].arg_type)
2795 {
2796 case SIMD_CLONE_ARG_TYPE_VECTOR:
2797 if (!useless_type_conversion_p
2798 (n->simdclone->args[i].orig_type,
2799 TREE_TYPE (gimple_call_arg (stmt, i))))
2800 i = -1;
2801 else if (arginfo[i].dt == vect_constant_def
2802 || arginfo[i].dt == vect_external_def
2803 || arginfo[i].linear_step)
2804 this_badness += 64;
2805 break;
2806 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2807 if (arginfo[i].dt != vect_constant_def
2808 && arginfo[i].dt != vect_external_def)
2809 i = -1;
2810 break;
2811 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2812 if (arginfo[i].dt == vect_constant_def
2813 || arginfo[i].dt == vect_external_def
2814 || (arginfo[i].linear_step
2815 != n->simdclone->args[i].linear_step))
2816 i = -1;
2817 break;
2818 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2819 /* FORNOW */
2820 i = -1;
2821 break;
2822 case SIMD_CLONE_ARG_TYPE_MASK:
2823 gcc_unreachable ();
2824 }
2825 if (i == (size_t) -1)
2826 break;
2827 if (n->simdclone->args[i].alignment > arginfo[i].align)
2828 {
2829 i = -1;
2830 break;
2831 }
2832 if (arginfo[i].align)
2833 this_badness += (exact_log2 (arginfo[i].align)
2834 - exact_log2 (n->simdclone->args[i].alignment));
2835 }
2836 if (i == (size_t) -1)
2837 continue;
2838 if (bestn == NULL || this_badness < badness)
2839 {
2840 bestn = n;
2841 badness = this_badness;
2842 }
2843 }
2844
2845 if (bestn == NULL)
2846 {
2847 arginfo.release ();
2848 return false;
2849 }
2850
2851 for (i = 0; i < nargs; i++)
2852 if ((arginfo[i].dt == vect_constant_def
2853 || arginfo[i].dt == vect_external_def)
2854 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2855 {
2856 arginfo[i].vectype
2857 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2858 i)));
2859 if (arginfo[i].vectype == NULL
2860 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2861 > bestn->simdclone->simdlen))
2862 {
2863 arginfo.release ();
2864 return false;
2865 }
2866 }
2867
2868 fndecl = bestn->decl;
2869 nunits = bestn->simdclone->simdlen;
2870 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2871
2872 /* If the function isn't const, only allow it in simd loops where user
2873 has asserted that at least nunits consecutive iterations can be
2874 performed using SIMD instructions. */
2875 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2876 && gimple_vuse (stmt))
2877 {
2878 arginfo.release ();
2879 return false;
2880 }
2881
2882 /* Sanity check: make sure that at least one copy of the vectorized stmt
2883 needs to be generated. */
2884 gcc_assert (ncopies >= 1);
2885
2886 if (!vec_stmt) /* transformation not required. */
2887 {
6c9e85fb
JJ
2888 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2889 for (i = 0; i < nargs; i++)
2890 if (bestn->simdclone->args[i].arg_type
2891 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2892 {
2893 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2894 + 1);
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2896 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2897 ? size_type_node : TREE_TYPE (arginfo[i].op);
2898 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2899 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2900 }
0136f8f0
AH
2901 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2902 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_NOTE, vect_location,
2904 "=== vectorizable_simd_clone_call ===\n");
2905/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2906 arginfo.release ();
2907 return true;
2908 }
2909
2910 /** Transform. **/
2911
2912 if (dump_enabled_p ())
2913 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2914
2915 /* Handle def. */
2916 scalar_dest = gimple_call_lhs (stmt);
2917 vec_dest = NULL_TREE;
2918 rtype = NULL_TREE;
2919 ratype = NULL_TREE;
2920 if (scalar_dest)
2921 {
2922 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2923 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2924 if (TREE_CODE (rtype) == ARRAY_TYPE)
2925 {
2926 ratype = rtype;
2927 rtype = TREE_TYPE (ratype);
2928 }
2929 }
2930
2931 prev_stmt_info = NULL;
2932 for (j = 0; j < ncopies; ++j)
2933 {
2934 /* Build argument list for the vectorized call. */
2935 if (j == 0)
2936 vargs.create (nargs);
2937 else
2938 vargs.truncate (0);
2939
2940 for (i = 0; i < nargs; i++)
2941 {
2942 unsigned int k, l, m, o;
2943 tree atype;
2944 op = gimple_call_arg (stmt, i);
2945 switch (bestn->simdclone->args[i].arg_type)
2946 {
2947 case SIMD_CLONE_ARG_TYPE_VECTOR:
2948 atype = bestn->simdclone->args[i].vector_type;
2949 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2950 for (m = j * o; m < (j + 1) * o; m++)
2951 {
2952 if (TYPE_VECTOR_SUBPARTS (atype)
2953 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2954 {
2955 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2956 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2957 / TYPE_VECTOR_SUBPARTS (atype));
2958 gcc_assert ((k & (k - 1)) == 0);
2959 if (m == 0)
2960 vec_oprnd0
2961 = vect_get_vec_def_for_operand (op, stmt, NULL);
2962 else
2963 {
2964 vec_oprnd0 = arginfo[i].op;
2965 if ((m & (k - 1)) == 0)
2966 vec_oprnd0
2967 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2968 vec_oprnd0);
2969 }
2970 arginfo[i].op = vec_oprnd0;
2971 vec_oprnd0
2972 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2973 size_int (prec),
2974 bitsize_int ((m & (k - 1)) * prec));
2975 new_stmt
b731b390 2976 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
2977 vec_oprnd0);
2978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2979 vargs.safe_push (gimple_assign_lhs (new_stmt));
2980 }
2981 else
2982 {
2983 k = (TYPE_VECTOR_SUBPARTS (atype)
2984 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2985 gcc_assert ((k & (k - 1)) == 0);
2986 vec<constructor_elt, va_gc> *ctor_elts;
2987 if (k != 1)
2988 vec_alloc (ctor_elts, k);
2989 else
2990 ctor_elts = NULL;
2991 for (l = 0; l < k; l++)
2992 {
2993 if (m == 0 && l == 0)
2994 vec_oprnd0
2995 = vect_get_vec_def_for_operand (op, stmt, NULL);
2996 else
2997 vec_oprnd0
2998 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2999 arginfo[i].op);
3000 arginfo[i].op = vec_oprnd0;
3001 if (k == 1)
3002 break;
3003 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3004 vec_oprnd0);
3005 }
3006 if (k == 1)
3007 vargs.safe_push (vec_oprnd0);
3008 else
3009 {
3010 vec_oprnd0 = build_constructor (atype, ctor_elts);
3011 new_stmt
b731b390 3012 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3013 vec_oprnd0);
3014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3015 vargs.safe_push (gimple_assign_lhs (new_stmt));
3016 }
3017 }
3018 }
3019 break;
3020 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3021 vargs.safe_push (op);
3022 break;
3023 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3024 if (j == 0)
3025 {
3026 gimple_seq stmts;
3027 arginfo[i].op
3028 = force_gimple_operand (arginfo[i].op, &stmts, true,
3029 NULL_TREE);
3030 if (stmts != NULL)
3031 {
3032 basic_block new_bb;
3033 edge pe = loop_preheader_edge (loop);
3034 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3035 gcc_assert (!new_bb);
3036 }
b731b390 3037 tree phi_res = copy_ssa_name (op);
538dd0b7 3038 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0
AH
3039 set_vinfo_for_stmt (new_phi,
3040 new_stmt_vec_info (new_phi, loop_vinfo,
3041 NULL));
3042 add_phi_arg (new_phi, arginfo[i].op,
3043 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3044 enum tree_code code
3045 = POINTER_TYPE_P (TREE_TYPE (op))
3046 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3047 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3048 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3049 widest_int cst
3050 = wi::mul (bestn->simdclone->args[i].linear_step,
3051 ncopies * nunits);
3052 tree tcst = wide_int_to_tree (type, cst);
b731b390 3053 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3054 new_stmt
3055 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3056 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3057 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3058 set_vinfo_for_stmt (new_stmt,
3059 new_stmt_vec_info (new_stmt, loop_vinfo,
3060 NULL));
3061 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3062 UNKNOWN_LOCATION);
3063 arginfo[i].op = phi_res;
3064 vargs.safe_push (phi_res);
3065 }
3066 else
3067 {
3068 enum tree_code code
3069 = POINTER_TYPE_P (TREE_TYPE (op))
3070 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3071 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3072 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3073 widest_int cst
3074 = wi::mul (bestn->simdclone->args[i].linear_step,
3075 j * nunits);
3076 tree tcst = wide_int_to_tree (type, cst);
b731b390 3077 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3078 new_stmt = gimple_build_assign (new_temp, code,
3079 arginfo[i].op, tcst);
0136f8f0
AH
3080 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3081 vargs.safe_push (new_temp);
3082 }
3083 break;
3084 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3085 default:
3086 gcc_unreachable ();
3087 }
3088 }
3089
3090 new_stmt = gimple_build_call_vec (fndecl, vargs);
3091 if (vec_dest)
3092 {
3093 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3094 if (ratype)
b731b390 3095 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3096 else if (TYPE_VECTOR_SUBPARTS (vectype)
3097 == TYPE_VECTOR_SUBPARTS (rtype))
3098 new_temp = make_ssa_name (vec_dest, new_stmt);
3099 else
3100 new_temp = make_ssa_name (rtype, new_stmt);
3101 gimple_call_set_lhs (new_stmt, new_temp);
3102 }
3103 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3104
3105 if (vec_dest)
3106 {
3107 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3108 {
3109 unsigned int k, l;
3110 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3111 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3112 gcc_assert ((k & (k - 1)) == 0);
3113 for (l = 0; l < k; l++)
3114 {
3115 tree t;
3116 if (ratype)
3117 {
3118 t = build_fold_addr_expr (new_temp);
3119 t = build2 (MEM_REF, vectype, t,
3120 build_int_cst (TREE_TYPE (t),
3121 l * prec / BITS_PER_UNIT));
3122 }
3123 else
3124 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3125 size_int (prec), bitsize_int (l * prec));
3126 new_stmt
b731b390 3127 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3128 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3129 if (j == 0 && l == 0)
3130 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3131 else
3132 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3133
3134 prev_stmt_info = vinfo_for_stmt (new_stmt);
3135 }
3136
3137 if (ratype)
3138 {
3139 tree clobber = build_constructor (ratype, NULL);
3140 TREE_THIS_VOLATILE (clobber) = 1;
3141 new_stmt = gimple_build_assign (new_temp, clobber);
3142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3143 }
3144 continue;
3145 }
3146 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3147 {
3148 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3149 / TYPE_VECTOR_SUBPARTS (rtype));
3150 gcc_assert ((k & (k - 1)) == 0);
3151 if ((j & (k - 1)) == 0)
3152 vec_alloc (ret_ctor_elts, k);
3153 if (ratype)
3154 {
3155 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3156 for (m = 0; m < o; m++)
3157 {
3158 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3159 size_int (m), NULL_TREE, NULL_TREE);
3160 new_stmt
b731b390 3161 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3163 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3164 gimple_assign_lhs (new_stmt));
3165 }
3166 tree clobber = build_constructor (ratype, NULL);
3167 TREE_THIS_VOLATILE (clobber) = 1;
3168 new_stmt = gimple_build_assign (new_temp, clobber);
3169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3170 }
3171 else
3172 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3173 if ((j & (k - 1)) != k - 1)
3174 continue;
3175 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3176 new_stmt
b731b390 3177 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3179
3180 if ((unsigned) j == k - 1)
3181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3182 else
3183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3184
3185 prev_stmt_info = vinfo_for_stmt (new_stmt);
3186 continue;
3187 }
3188 else if (ratype)
3189 {
3190 tree t = build_fold_addr_expr (new_temp);
3191 t = build2 (MEM_REF, vectype, t,
3192 build_int_cst (TREE_TYPE (t), 0));
3193 new_stmt
b731b390 3194 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3195 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3196 tree clobber = build_constructor (ratype, NULL);
3197 TREE_THIS_VOLATILE (clobber) = 1;
3198 vect_finish_stmt_generation (stmt,
3199 gimple_build_assign (new_temp,
3200 clobber), gsi);
3201 }
3202 }
3203
3204 if (j == 0)
3205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3206 else
3207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3208
3209 prev_stmt_info = vinfo_for_stmt (new_stmt);
3210 }
3211
3212 vargs.release ();
3213
3214 /* The call in STMT might prevent it from being removed in dce.
3215 We however cannot remove it here, due to the way the ssa name
3216 it defines is mapped to the new definition. So just replace
3217 rhs of the statement with something harmless. */
3218
3219 if (slp_node)
3220 return true;
3221
3222 if (scalar_dest)
3223 {
3224 type = TREE_TYPE (scalar_dest);
3225 if (is_pattern_stmt_p (stmt_info))
3226 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3227 else
3228 lhs = gimple_call_lhs (stmt);
3229 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3230 }
3231 else
3232 new_stmt = gimple_build_nop ();
3233 set_vinfo_for_stmt (new_stmt, stmt_info);
3234 set_vinfo_for_stmt (stmt, NULL);
3235 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3236 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3237 unlink_stmt_vdef (stmt);
3238
3239 return true;
3240}
3241
3242
ebfd146a
IR
3243/* Function vect_gen_widened_results_half
3244
3245 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3246 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3247 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3248 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3249 needs to be created (DECL is a function-decl of a target-builtin).
3250 STMT is the original scalar stmt that we are vectorizing. */
3251
3252static gimple
3253vect_gen_widened_results_half (enum tree_code code,
3254 tree decl,
3255 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3256 tree vec_dest, gimple_stmt_iterator *gsi,
3257 gimple stmt)
b8698a0f 3258{
ebfd146a 3259 gimple new_stmt;
b8698a0f
L
3260 tree new_temp;
3261
3262 /* Generate half of the widened result: */
3263 if (code == CALL_EXPR)
3264 {
3265 /* Target specific support */
ebfd146a
IR
3266 if (op_type == binary_op)
3267 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3268 else
3269 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3270 new_temp = make_ssa_name (vec_dest, new_stmt);
3271 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3272 }
3273 else
ebfd146a 3274 {
b8698a0f
L
3275 /* Generic support */
3276 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3277 if (op_type != binary_op)
3278 vec_oprnd1 = NULL;
0d0e4a03 3279 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3280 new_temp = make_ssa_name (vec_dest, new_stmt);
3281 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3282 }
ebfd146a
IR
3283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3284
ebfd146a
IR
3285 return new_stmt;
3286}
3287
4a00c761
JJ
3288
3289/* Get vectorized definitions for loop-based vectorization. For the first
3290 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3291 scalar operand), and for the rest we get a copy with
3292 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3293 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3294 The vectors are collected into VEC_OPRNDS. */
3295
3296static void
3297vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3298 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3299{
3300 tree vec_oprnd;
3301
3302 /* Get first vector operand. */
3303 /* All the vector operands except the very first one (that is scalar oprnd)
3304 are stmt copies. */
3305 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3306 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3307 else
3308 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3309
9771b263 3310 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3311
3312 /* Get second vector operand. */
3313 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3314 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3315
3316 *oprnd = vec_oprnd;
3317
3318 /* For conversion in multiple steps, continue to get operands
3319 recursively. */
3320 if (multi_step_cvt)
3321 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3322}
3323
3324
3325/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3326 For multi-step conversions store the resulting vectors and call the function
3327 recursively. */
3328
3329static void
9771b263 3330vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3331 int multi_step_cvt, gimple stmt,
9771b263 3332 vec<tree> vec_dsts,
4a00c761
JJ
3333 gimple_stmt_iterator *gsi,
3334 slp_tree slp_node, enum tree_code code,
3335 stmt_vec_info *prev_stmt_info)
3336{
3337 unsigned int i;
3338 tree vop0, vop1, new_tmp, vec_dest;
3339 gimple new_stmt;
3340 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3341
9771b263 3342 vec_dest = vec_dsts.pop ();
4a00c761 3343
9771b263 3344 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3345 {
3346 /* Create demotion operation. */
9771b263
DN
3347 vop0 = (*vec_oprnds)[i];
3348 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3349 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3350 new_tmp = make_ssa_name (vec_dest, new_stmt);
3351 gimple_assign_set_lhs (new_stmt, new_tmp);
3352 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3353
3354 if (multi_step_cvt)
3355 /* Store the resulting vector for next recursive call. */
9771b263 3356 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3357 else
3358 {
3359 /* This is the last step of the conversion sequence. Store the
3360 vectors in SLP_NODE or in vector info of the scalar statement
3361 (or in STMT_VINFO_RELATED_STMT chain). */
3362 if (slp_node)
9771b263 3363 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3364 else
3365 {
3366 if (!*prev_stmt_info)
3367 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3368 else
3369 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3370
3371 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3372 }
3373 }
3374 }
3375
3376 /* For multi-step demotion operations we first generate demotion operations
3377 from the source type to the intermediate types, and then combine the
3378 results (stored in VEC_OPRNDS) in demotion operation to the destination
3379 type. */
3380 if (multi_step_cvt)
3381 {
3382 /* At each level of recursion we have half of the operands we had at the
3383 previous level. */
9771b263 3384 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3385 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3386 stmt, vec_dsts, gsi, slp_node,
3387 VEC_PACK_TRUNC_EXPR,
3388 prev_stmt_info);
3389 }
3390
9771b263 3391 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3392}
3393
3394
3395/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3396 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3397 the resulting vectors and call the function recursively. */
3398
3399static void
9771b263
DN
3400vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3401 vec<tree> *vec_oprnds1,
4a00c761
JJ
3402 gimple stmt, tree vec_dest,
3403 gimple_stmt_iterator *gsi,
3404 enum tree_code code1,
3405 enum tree_code code2, tree decl1,
3406 tree decl2, int op_type)
3407{
3408 int i;
3409 tree vop0, vop1, new_tmp1, new_tmp2;
3410 gimple new_stmt1, new_stmt2;
6e1aa848 3411 vec<tree> vec_tmp = vNULL;
4a00c761 3412
9771b263
DN
3413 vec_tmp.create (vec_oprnds0->length () * 2);
3414 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3415 {
3416 if (op_type == binary_op)
9771b263 3417 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3418 else
3419 vop1 = NULL_TREE;
3420
3421 /* Generate the two halves of promotion operation. */
3422 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3423 op_type, vec_dest, gsi, stmt);
3424 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3425 op_type, vec_dest, gsi, stmt);
3426 if (is_gimple_call (new_stmt1))
3427 {
3428 new_tmp1 = gimple_call_lhs (new_stmt1);
3429 new_tmp2 = gimple_call_lhs (new_stmt2);
3430 }
3431 else
3432 {
3433 new_tmp1 = gimple_assign_lhs (new_stmt1);
3434 new_tmp2 = gimple_assign_lhs (new_stmt2);
3435 }
3436
3437 /* Store the results for the next step. */
9771b263
DN
3438 vec_tmp.quick_push (new_tmp1);
3439 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3440 }
3441
689eaba3 3442 vec_oprnds0->release ();
4a00c761
JJ
3443 *vec_oprnds0 = vec_tmp;
3444}
3445
3446
b8698a0f
L
3447/* Check if STMT performs a conversion operation, that can be vectorized.
3448 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3449 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3450 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3451
3452static bool
3453vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3454 gimple *vec_stmt, slp_tree slp_node)
3455{
3456 tree vec_dest;
3457 tree scalar_dest;
4a00c761 3458 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3459 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3460 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3462 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3463 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3464 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3465 tree new_temp;
3466 tree def;
3467 gimple def_stmt;
3468 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3469 gimple new_stmt = NULL;
3470 stmt_vec_info prev_stmt_info;
3471 int nunits_in;
3472 int nunits_out;
3473 tree vectype_out, vectype_in;
4a00c761
JJ
3474 int ncopies, i, j;
3475 tree lhs_type, rhs_type;
ebfd146a 3476 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3477 vec<tree> vec_oprnds0 = vNULL;
3478 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3479 tree vop0;
4a00c761
JJ
3480 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3481 int multi_step_cvt = 0;
6e1aa848
DN
3482 vec<tree> vec_dsts = vNULL;
3483 vec<tree> interm_types = vNULL;
4a00c761
JJ
3484 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3485 int op_type;
ef4bddc2 3486 machine_mode rhs_mode;
4a00c761 3487 unsigned short fltsz;
ebfd146a
IR
3488
3489 /* Is STMT a vectorizable conversion? */
3490
4a00c761 3491 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3492 return false;
3493
8644a673 3494 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3495 return false;
3496
3497 if (!is_gimple_assign (stmt))
3498 return false;
3499
3500 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3501 return false;
3502
3503 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3504 if (!CONVERT_EXPR_CODE_P (code)
3505 && code != FIX_TRUNC_EXPR
3506 && code != FLOAT_EXPR
3507 && code != WIDEN_MULT_EXPR
3508 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3509 return false;
3510
4a00c761
JJ
3511 op_type = TREE_CODE_LENGTH (code);
3512
ebfd146a 3513 /* Check types of lhs and rhs. */
b690cc0f 3514 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3515 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3516 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3517
ebfd146a
IR
3518 op0 = gimple_assign_rhs1 (stmt);
3519 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3520
3521 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3522 && !((INTEGRAL_TYPE_P (lhs_type)
3523 && INTEGRAL_TYPE_P (rhs_type))
3524 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3525 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3526 return false;
3527
3528 if ((INTEGRAL_TYPE_P (lhs_type)
3529 && (TYPE_PRECISION (lhs_type)
3530 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3531 || (INTEGRAL_TYPE_P (rhs_type)
3532 && (TYPE_PRECISION (rhs_type)
3533 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3534 {
73fbfcad 3535 if (dump_enabled_p ())
78c60e3d 3536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3537 "type conversion to/from bit-precision unsupported."
3538 "\n");
4a00c761
JJ
3539 return false;
3540 }
3541
b690cc0f 3542 /* Check the operands of the operation. */
24ee1384 3543 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3544 &def_stmt, &def, &dt[0], &vectype_in))
3545 {
73fbfcad 3546 if (dump_enabled_p ())
78c60e3d 3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3548 "use not simple.\n");
b690cc0f
RG
3549 return false;
3550 }
4a00c761
JJ
3551 if (op_type == binary_op)
3552 {
3553 bool ok;
3554
3555 op1 = gimple_assign_rhs2 (stmt);
3556 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3557 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3558 OP1. */
3559 if (CONSTANT_CLASS_P (op0))
f5709183 3560 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3561 &def_stmt, &def, &dt[1], &vectype_in);
3562 else
f5709183 3563 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3564 &def, &dt[1]);
4a00c761
JJ
3565
3566 if (!ok)
3567 {
73fbfcad 3568 if (dump_enabled_p ())
78c60e3d 3569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3570 "use not simple.\n");
4a00c761
JJ
3571 return false;
3572 }
3573 }
3574
b690cc0f
RG
3575 /* If op0 is an external or constant defs use a vector type of
3576 the same size as the output vector type. */
ebfd146a 3577 if (!vectype_in)
b690cc0f 3578 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3579 if (vec_stmt)
3580 gcc_assert (vectype_in);
3581 if (!vectype_in)
3582 {
73fbfcad 3583 if (dump_enabled_p ())
4a00c761 3584 {
78c60e3d
SS
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3586 "no vectype for scalar type ");
3587 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3588 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3589 }
7d8930a0
IR
3590
3591 return false;
3592 }
ebfd146a 3593
b690cc0f
RG
3594 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3595 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3596 if (nunits_in < nunits_out)
ebfd146a
IR
3597 modifier = NARROW;
3598 else if (nunits_out == nunits_in)
3599 modifier = NONE;
ebfd146a 3600 else
4a00c761 3601 modifier = WIDEN;
ebfd146a 3602
ff802fa1
IR
3603 /* Multiple types in SLP are handled by creating the appropriate number of
3604 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3605 case of SLP. */
437f4a00 3606 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3607 ncopies = 1;
4a00c761
JJ
3608 else if (modifier == NARROW)
3609 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3610 else
3611 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3612
ebfd146a
IR
3613 /* Sanity check: make sure that at least one copy of the vectorized stmt
3614 needs to be generated. */
3615 gcc_assert (ncopies >= 1);
3616
ebfd146a 3617 /* Supportable by target? */
4a00c761 3618 switch (modifier)
ebfd146a 3619 {
4a00c761
JJ
3620 case NONE:
3621 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3622 return false;
3623 if (supportable_convert_operation (code, vectype_out, vectype_in,
3624 &decl1, &code1))
3625 break;
3626 /* FALLTHRU */
3627 unsupported:
73fbfcad 3628 if (dump_enabled_p ())
78c60e3d 3629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3630 "conversion not supported by target.\n");
ebfd146a 3631 return false;
ebfd146a 3632
4a00c761
JJ
3633 case WIDEN:
3634 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3635 &code1, &code2, &multi_step_cvt,
3636 &interm_types))
4a00c761
JJ
3637 {
3638 /* Binary widening operation can only be supported directly by the
3639 architecture. */
3640 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3641 break;
3642 }
3643
3644 if (code != FLOAT_EXPR
3645 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3646 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3647 goto unsupported;
3648
3649 rhs_mode = TYPE_MODE (rhs_type);
3650 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3651 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3652 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3653 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3654 {
3655 cvt_type
3656 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3657 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3658 if (cvt_type == NULL_TREE)
3659 goto unsupported;
3660
3661 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3662 {
3663 if (!supportable_convert_operation (code, vectype_out,
3664 cvt_type, &decl1, &codecvt1))
3665 goto unsupported;
3666 }
3667 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3668 cvt_type, &codecvt1,
3669 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3670 &interm_types))
3671 continue;
3672 else
3673 gcc_assert (multi_step_cvt == 0);
3674
3675 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3676 vectype_in, &code1, &code2,
3677 &multi_step_cvt, &interm_types))
4a00c761
JJ
3678 break;
3679 }
3680
3681 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3682 goto unsupported;
3683
3684 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3685 codecvt2 = ERROR_MARK;
3686 else
3687 {
3688 multi_step_cvt++;
9771b263 3689 interm_types.safe_push (cvt_type);
4a00c761
JJ
3690 cvt_type = NULL_TREE;
3691 }
3692 break;
3693
3694 case NARROW:
3695 gcc_assert (op_type == unary_op);
3696 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3697 &code1, &multi_step_cvt,
3698 &interm_types))
3699 break;
3700
3701 if (code != FIX_TRUNC_EXPR
3702 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3703 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3704 goto unsupported;
3705
3706 rhs_mode = TYPE_MODE (rhs_type);
3707 cvt_type
3708 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3709 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3710 if (cvt_type == NULL_TREE)
3711 goto unsupported;
3712 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3713 &decl1, &codecvt1))
3714 goto unsupported;
3715 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3716 &code1, &multi_step_cvt,
3717 &interm_types))
3718 break;
3719 goto unsupported;
3720
3721 default:
3722 gcc_unreachable ();
ebfd146a
IR
3723 }
3724
3725 if (!vec_stmt) /* transformation not required. */
3726 {
73fbfcad 3727 if (dump_enabled_p ())
78c60e3d 3728 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3729 "=== vectorizable_conversion ===\n");
4a00c761 3730 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3731 {
3732 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3733 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3734 }
4a00c761
JJ
3735 else if (modifier == NARROW)
3736 {
3737 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3738 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3739 }
3740 else
3741 {
3742 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3743 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3744 }
9771b263 3745 interm_types.release ();
ebfd146a
IR
3746 return true;
3747 }
3748
3749 /** Transform. **/
73fbfcad 3750 if (dump_enabled_p ())
78c60e3d 3751 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3752 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3753
4a00c761
JJ
3754 if (op_type == binary_op)
3755 {
3756 if (CONSTANT_CLASS_P (op0))
3757 op0 = fold_convert (TREE_TYPE (op1), op0);
3758 else if (CONSTANT_CLASS_P (op1))
3759 op1 = fold_convert (TREE_TYPE (op0), op1);
3760 }
3761
3762 /* In case of multi-step conversion, we first generate conversion operations
3763 to the intermediate types, and then from that types to the final one.
3764 We create vector destinations for the intermediate type (TYPES) received
3765 from supportable_*_operation, and store them in the correct order
3766 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3767 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3768 vec_dest = vect_create_destination_var (scalar_dest,
3769 (cvt_type && modifier == WIDEN)
3770 ? cvt_type : vectype_out);
9771b263 3771 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3772
3773 if (multi_step_cvt)
3774 {
9771b263
DN
3775 for (i = interm_types.length () - 1;
3776 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3777 {
3778 vec_dest = vect_create_destination_var (scalar_dest,
3779 intermediate_type);
9771b263 3780 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3781 }
3782 }
ebfd146a 3783
4a00c761 3784 if (cvt_type)
82294ec1
JJ
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 modifier == WIDEN
3787 ? vectype_out : cvt_type);
4a00c761
JJ
3788
3789 if (!slp_node)
3790 {
30862efc 3791 if (modifier == WIDEN)
4a00c761 3792 {
c3284718 3793 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3794 if (op_type == binary_op)
9771b263 3795 vec_oprnds1.create (1);
4a00c761 3796 }
30862efc 3797 else if (modifier == NARROW)
9771b263
DN
3798 vec_oprnds0.create (
3799 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3800 }
3801 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3802 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3803
4a00c761 3804 last_oprnd = op0;
ebfd146a
IR
3805 prev_stmt_info = NULL;
3806 switch (modifier)
3807 {
3808 case NONE:
3809 for (j = 0; j < ncopies; j++)
3810 {
ebfd146a 3811 if (j == 0)
d092494c
IR
3812 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3813 -1);
ebfd146a
IR
3814 else
3815 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3816
9771b263 3817 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3818 {
3819 /* Arguments are ready, create the new vector stmt. */
3820 if (code1 == CALL_EXPR)
3821 {
3822 new_stmt = gimple_build_call (decl1, 1, vop0);
3823 new_temp = make_ssa_name (vec_dest, new_stmt);
3824 gimple_call_set_lhs (new_stmt, new_temp);
3825 }
3826 else
3827 {
3828 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3829 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_assign_set_lhs (new_stmt, new_temp);
3832 }
3833
3834 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3835 if (slp_node)
9771b263 3836 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3837 }
3838
ebfd146a
IR
3839 if (j == 0)
3840 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3841 else
3842 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3843 prev_stmt_info = vinfo_for_stmt (new_stmt);
3844 }
3845 break;
3846
3847 case WIDEN:
3848 /* In case the vectorization factor (VF) is bigger than the number
3849 of elements that we can fit in a vectype (nunits), we have to
3850 generate more than one vector stmt - i.e - we need to "unroll"
3851 the vector stmt by a factor VF/nunits. */
3852 for (j = 0; j < ncopies; j++)
3853 {
4a00c761 3854 /* Handle uses. */
ebfd146a 3855 if (j == 0)
4a00c761
JJ
3856 {
3857 if (slp_node)
3858 {
3859 if (code == WIDEN_LSHIFT_EXPR)
3860 {
3861 unsigned int k;
ebfd146a 3862
4a00c761
JJ
3863 vec_oprnd1 = op1;
3864 /* Store vec_oprnd1 for every vector stmt to be created
3865 for SLP_NODE. We check during the analysis that all
3866 the shift arguments are the same. */
3867 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3868 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3869
3870 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3871 slp_node, -1);
3872 }
3873 else
3874 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3875 &vec_oprnds1, slp_node, -1);
3876 }
3877 else
3878 {
3879 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3880 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3881 if (op_type == binary_op)
3882 {
3883 if (code == WIDEN_LSHIFT_EXPR)
3884 vec_oprnd1 = op1;
3885 else
3886 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3887 NULL);
9771b263 3888 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3889 }
3890 }
3891 }
ebfd146a 3892 else
4a00c761
JJ
3893 {
3894 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3895 vec_oprnds0.truncate (0);
3896 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3897 if (op_type == binary_op)
3898 {
3899 if (code == WIDEN_LSHIFT_EXPR)
3900 vec_oprnd1 = op1;
3901 else
3902 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3903 vec_oprnd1);
9771b263
DN
3904 vec_oprnds1.truncate (0);
3905 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3906 }
3907 }
ebfd146a 3908
4a00c761
JJ
3909 /* Arguments are ready. Create the new vector stmts. */
3910 for (i = multi_step_cvt; i >= 0; i--)
3911 {
9771b263 3912 tree this_dest = vec_dsts[i];
4a00c761
JJ
3913 enum tree_code c1 = code1, c2 = code2;
3914 if (i == 0 && codecvt2 != ERROR_MARK)
3915 {
3916 c1 = codecvt1;
3917 c2 = codecvt2;
3918 }
3919 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3920 &vec_oprnds1,
3921 stmt, this_dest, gsi,
3922 c1, c2, decl1, decl2,
3923 op_type);
3924 }
3925
9771b263 3926 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3927 {
3928 if (cvt_type)
3929 {
3930 if (codecvt1 == CALL_EXPR)
3931 {
3932 new_stmt = gimple_build_call (decl1, 1, vop0);
3933 new_temp = make_ssa_name (vec_dest, new_stmt);
3934 gimple_call_set_lhs (new_stmt, new_temp);
3935 }
3936 else
3937 {
3938 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 3939 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
3940 new_stmt = gimple_build_assign (new_temp, codecvt1,
3941 vop0);
4a00c761
JJ
3942 }
3943
3944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3945 }
3946 else
3947 new_stmt = SSA_NAME_DEF_STMT (vop0);
3948
3949 if (slp_node)
9771b263 3950 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3951 else
3952 {
3953 if (!prev_stmt_info)
3954 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3955 else
3956 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3957 prev_stmt_info = vinfo_for_stmt (new_stmt);
3958 }
3959 }
ebfd146a 3960 }
4a00c761
JJ
3961
3962 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3963 break;
3964
3965 case NARROW:
3966 /* In case the vectorization factor (VF) is bigger than the number
3967 of elements that we can fit in a vectype (nunits), we have to
3968 generate more than one vector stmt - i.e - we need to "unroll"
3969 the vector stmt by a factor VF/nunits. */
3970 for (j = 0; j < ncopies; j++)
3971 {
3972 /* Handle uses. */
4a00c761
JJ
3973 if (slp_node)
3974 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3975 slp_node, -1);
ebfd146a
IR
3976 else
3977 {
9771b263 3978 vec_oprnds0.truncate (0);
4a00c761
JJ
3979 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3980 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3981 }
3982
4a00c761
JJ
3983 /* Arguments are ready. Create the new vector stmts. */
3984 if (cvt_type)
9771b263 3985 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3986 {
3987 if (codecvt1 == CALL_EXPR)
3988 {
3989 new_stmt = gimple_build_call (decl1, 1, vop0);
3990 new_temp = make_ssa_name (vec_dest, new_stmt);
3991 gimple_call_set_lhs (new_stmt, new_temp);
3992 }
3993 else
3994 {
3995 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 3996 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
3997 new_stmt = gimple_build_assign (new_temp, codecvt1,
3998 vop0);
4a00c761 3999 }
ebfd146a 4000
4a00c761 4001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4002 vec_oprnds0[i] = new_temp;
4a00c761 4003 }
ebfd146a 4004
4a00c761
JJ
4005 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4006 stmt, vec_dsts, gsi,
4007 slp_node, code1,
4008 &prev_stmt_info);
ebfd146a
IR
4009 }
4010
4011 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4012 break;
ebfd146a
IR
4013 }
4014
9771b263
DN
4015 vec_oprnds0.release ();
4016 vec_oprnds1.release ();
4017 vec_dsts.release ();
4018 interm_types.release ();
ebfd146a
IR
4019
4020 return true;
4021}
ff802fa1
IR
4022
4023
ebfd146a
IR
4024/* Function vectorizable_assignment.
4025
b8698a0f
L
4026 Check if STMT performs an assignment (copy) that can be vectorized.
4027 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4028 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4029 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4030
4031static bool
4032vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4033 gimple *vec_stmt, slp_tree slp_node)
4034{
4035 tree vec_dest;
4036 tree scalar_dest;
4037 tree op;
4038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4040 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4041 tree new_temp;
4042 tree def;
4043 gimple def_stmt;
4044 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 4045 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 4046 int ncopies;
f18b55bd 4047 int i, j;
6e1aa848 4048 vec<tree> vec_oprnds = vNULL;
ebfd146a 4049 tree vop;
a70d6342 4050 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
4051 gimple new_stmt = NULL;
4052 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4053 enum tree_code code;
4054 tree vectype_in;
ebfd146a
IR
4055
4056 /* Multiple types in SLP are handled by creating the appropriate number of
4057 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4058 case of SLP. */
437f4a00 4059 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4060 ncopies = 1;
4061 else
4062 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4063
4064 gcc_assert (ncopies >= 1);
ebfd146a 4065
a70d6342 4066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4067 return false;
4068
8644a673 4069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4070 return false;
4071
4072 /* Is vectorizable assignment? */
4073 if (!is_gimple_assign (stmt))
4074 return false;
4075
4076 scalar_dest = gimple_assign_lhs (stmt);
4077 if (TREE_CODE (scalar_dest) != SSA_NAME)
4078 return false;
4079
fde9c428 4080 code = gimple_assign_rhs_code (stmt);
ebfd146a 4081 if (gimple_assign_single_p (stmt)
fde9c428
RG
4082 || code == PAREN_EXPR
4083 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4084 op = gimple_assign_rhs1 (stmt);
4085 else
4086 return false;
4087
7b7ec6c5
RG
4088 if (code == VIEW_CONVERT_EXPR)
4089 op = TREE_OPERAND (op, 0);
4090
24ee1384 4091 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 4092 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 4093 {
73fbfcad 4094 if (dump_enabled_p ())
78c60e3d 4095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4096 "use not simple.\n");
ebfd146a
IR
4097 return false;
4098 }
4099
fde9c428
RG
4100 /* We can handle NOP_EXPR conversions that do not change the number
4101 of elements or the vector size. */
7b7ec6c5
RG
4102 if ((CONVERT_EXPR_CODE_P (code)
4103 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4104 && (!vectype_in
4105 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4106 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4107 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4108 return false;
4109
7b7b1813
RG
4110 /* We do not handle bit-precision changes. */
4111 if ((CONVERT_EXPR_CODE_P (code)
4112 || code == VIEW_CONVERT_EXPR)
4113 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4114 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4115 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4116 || ((TYPE_PRECISION (TREE_TYPE (op))
4117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4118 /* But a conversion that does not change the bit-pattern is ok. */
4119 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4120 > TYPE_PRECISION (TREE_TYPE (op)))
4121 && TYPE_UNSIGNED (TREE_TYPE (op))))
4122 {
73fbfcad 4123 if (dump_enabled_p ())
78c60e3d
SS
4124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4125 "type conversion to/from bit-precision "
e645e942 4126 "unsupported.\n");
7b7b1813
RG
4127 return false;
4128 }
4129
ebfd146a
IR
4130 if (!vec_stmt) /* transformation not required. */
4131 {
4132 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4133 if (dump_enabled_p ())
78c60e3d 4134 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4135 "=== vectorizable_assignment ===\n");
c3e7ee41 4136 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4137 return true;
4138 }
4139
4140 /** Transform. **/
73fbfcad 4141 if (dump_enabled_p ())
e645e942 4142 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4143
4144 /* Handle def. */
4145 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4146
4147 /* Handle use. */
f18b55bd 4148 for (j = 0; j < ncopies; j++)
ebfd146a 4149 {
f18b55bd
IR
4150 /* Handle uses. */
4151 if (j == 0)
d092494c 4152 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4153 else
4154 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4155
4156 /* Arguments are ready. create the new vector stmt. */
9771b263 4157 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4158 {
7b7ec6c5
RG
4159 if (CONVERT_EXPR_CODE_P (code)
4160 || code == VIEW_CONVERT_EXPR)
4a73490d 4161 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4162 new_stmt = gimple_build_assign (vec_dest, vop);
4163 new_temp = make_ssa_name (vec_dest, new_stmt);
4164 gimple_assign_set_lhs (new_stmt, new_temp);
4165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4166 if (slp_node)
9771b263 4167 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4168 }
ebfd146a
IR
4169
4170 if (slp_node)
f18b55bd
IR
4171 continue;
4172
4173 if (j == 0)
4174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4175 else
4176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4177
4178 prev_stmt_info = vinfo_for_stmt (new_stmt);
4179 }
b8698a0f 4180
9771b263 4181 vec_oprnds.release ();
ebfd146a
IR
4182 return true;
4183}
4184
9dc3f7de 4185
1107f3ae
IR
4186/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4187 either as shift by a scalar or by a vector. */
4188
4189bool
4190vect_supportable_shift (enum tree_code code, tree scalar_type)
4191{
4192
ef4bddc2 4193 machine_mode vec_mode;
1107f3ae
IR
4194 optab optab;
4195 int icode;
4196 tree vectype;
4197
4198 vectype = get_vectype_for_scalar_type (scalar_type);
4199 if (!vectype)
4200 return false;
4201
4202 optab = optab_for_tree_code (code, vectype, optab_scalar);
4203 if (!optab
4204 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4205 {
4206 optab = optab_for_tree_code (code, vectype, optab_vector);
4207 if (!optab
4208 || (optab_handler (optab, TYPE_MODE (vectype))
4209 == CODE_FOR_nothing))
4210 return false;
4211 }
4212
4213 vec_mode = TYPE_MODE (vectype);
4214 icode = (int) optab_handler (optab, vec_mode);
4215 if (icode == CODE_FOR_nothing)
4216 return false;
4217
4218 return true;
4219}
4220
4221
9dc3f7de
IR
4222/* Function vectorizable_shift.
4223
4224 Check if STMT performs a shift operation that can be vectorized.
4225 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4226 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4227 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4228
4229static bool
4230vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4231 gimple *vec_stmt, slp_tree slp_node)
4232{
4233 tree vec_dest;
4234 tree scalar_dest;
4235 tree op0, op1 = NULL;
4236 tree vec_oprnd1 = NULL_TREE;
4237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4238 tree vectype;
4239 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4240 enum tree_code code;
ef4bddc2 4241 machine_mode vec_mode;
9dc3f7de
IR
4242 tree new_temp;
4243 optab optab;
4244 int icode;
ef4bddc2 4245 machine_mode optab_op2_mode;
9dc3f7de
IR
4246 tree def;
4247 gimple def_stmt;
4248 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4249 gimple new_stmt = NULL;
4250 stmt_vec_info prev_stmt_info;
4251 int nunits_in;
4252 int nunits_out;
4253 tree vectype_out;
cede2577 4254 tree op1_vectype;
9dc3f7de
IR
4255 int ncopies;
4256 int j, i;
6e1aa848
DN
4257 vec<tree> vec_oprnds0 = vNULL;
4258 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4259 tree vop0, vop1;
4260 unsigned int k;
49eab32e 4261 bool scalar_shift_arg = true;
9dc3f7de
IR
4262 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4263 int vf;
4264
4265 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4266 return false;
4267
4268 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4269 return false;
4270
4271 /* Is STMT a vectorizable binary/unary operation? */
4272 if (!is_gimple_assign (stmt))
4273 return false;
4274
4275 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4276 return false;
4277
4278 code = gimple_assign_rhs_code (stmt);
4279
4280 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4281 || code == RROTATE_EXPR))
4282 return false;
4283
4284 scalar_dest = gimple_assign_lhs (stmt);
4285 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4286 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4288 {
73fbfcad 4289 if (dump_enabled_p ())
78c60e3d 4290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4291 "bit-precision shifts not supported.\n");
7b7b1813
RG
4292 return false;
4293 }
9dc3f7de
IR
4294
4295 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4296 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4297 &def_stmt, &def, &dt[0], &vectype))
4298 {
73fbfcad 4299 if (dump_enabled_p ())
78c60e3d 4300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4301 "use not simple.\n");
9dc3f7de
IR
4302 return false;
4303 }
4304 /* If op0 is an external or constant def use a vector type with
4305 the same size as the output vector type. */
4306 if (!vectype)
4307 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4308 if (vec_stmt)
4309 gcc_assert (vectype);
4310 if (!vectype)
4311 {
73fbfcad 4312 if (dump_enabled_p ())
78c60e3d 4313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4314 "no vectype for scalar type\n");
9dc3f7de
IR
4315 return false;
4316 }
4317
4318 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4319 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4320 if (nunits_out != nunits_in)
4321 return false;
4322
4323 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4324 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4325 &def, &dt[1], &op1_vectype))
9dc3f7de 4326 {
73fbfcad 4327 if (dump_enabled_p ())
78c60e3d 4328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4329 "use not simple.\n");
9dc3f7de
IR
4330 return false;
4331 }
4332
4333 if (loop_vinfo)
4334 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4335 else
4336 vf = 1;
4337
4338 /* Multiple types in SLP are handled by creating the appropriate number of
4339 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4340 case of SLP. */
437f4a00 4341 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4342 ncopies = 1;
4343 else
4344 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4345
4346 gcc_assert (ncopies >= 1);
4347
4348 /* Determine whether the shift amount is a vector, or scalar. If the
4349 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4350
49eab32e
JJ
4351 if (dt[1] == vect_internal_def && !slp_node)
4352 scalar_shift_arg = false;
4353 else if (dt[1] == vect_constant_def
4354 || dt[1] == vect_external_def
4355 || dt[1] == vect_internal_def)
4356 {
4357 /* In SLP, need to check whether the shift count is the same,
4358 in loops if it is a constant or invariant, it is always
4359 a scalar shift. */
4360 if (slp_node)
4361 {
9771b263 4362 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4363 gimple slpstmt;
4364
9771b263 4365 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4366 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4367 scalar_shift_arg = false;
4368 }
4369 }
4370 else
4371 {
73fbfcad 4372 if (dump_enabled_p ())
78c60e3d 4373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4374 "operand mode requires invariant argument.\n");
49eab32e
JJ
4375 return false;
4376 }
4377
9dc3f7de 4378 /* Vector shifted by vector. */
49eab32e 4379 if (!scalar_shift_arg)
9dc3f7de
IR
4380 {
4381 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4382 if (dump_enabled_p ())
78c60e3d 4383 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4384 "vector/vector shift/rotate found.\n");
78c60e3d 4385
aa948027
JJ
4386 if (!op1_vectype)
4387 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4388 if (op1_vectype == NULL_TREE
4389 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4390 {
73fbfcad 4391 if (dump_enabled_p ())
78c60e3d
SS
4392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4393 "unusable type for last operand in"
e645e942 4394 " vector/vector shift/rotate.\n");
cede2577
JJ
4395 return false;
4396 }
9dc3f7de
IR
4397 }
4398 /* See if the machine has a vector shifted by scalar insn and if not
4399 then see if it has a vector shifted by vector insn. */
49eab32e 4400 else
9dc3f7de
IR
4401 {
4402 optab = optab_for_tree_code (code, vectype, optab_scalar);
4403 if (optab
4404 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4405 {
73fbfcad 4406 if (dump_enabled_p ())
78c60e3d 4407 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4408 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4409 }
4410 else
4411 {
4412 optab = optab_for_tree_code (code, vectype, optab_vector);
4413 if (optab
4414 && (optab_handler (optab, TYPE_MODE (vectype))
4415 != CODE_FOR_nothing))
4416 {
49eab32e
JJ
4417 scalar_shift_arg = false;
4418
73fbfcad 4419 if (dump_enabled_p ())
78c60e3d 4420 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4421 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4422
4423 /* Unlike the other binary operators, shifts/rotates have
4424 the rhs being int, instead of the same type as the lhs,
4425 so make sure the scalar is the right type if we are
aa948027 4426 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4427 if (dt[1] == vect_constant_def)
4428 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4429 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4430 TREE_TYPE (op1)))
4431 {
4432 if (slp_node
4433 && TYPE_MODE (TREE_TYPE (vectype))
4434 != TYPE_MODE (TREE_TYPE (op1)))
4435 {
73fbfcad 4436 if (dump_enabled_p ())
78c60e3d
SS
4437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4438 "unusable type for last operand in"
e645e942 4439 " vector/vector shift/rotate.\n");
aa948027
JJ
4440 return false;
4441 }
4442 if (vec_stmt && !slp_node)
4443 {
4444 op1 = fold_convert (TREE_TYPE (vectype), op1);
4445 op1 = vect_init_vector (stmt, op1,
4446 TREE_TYPE (vectype), NULL);
4447 }
4448 }
9dc3f7de
IR
4449 }
4450 }
4451 }
9dc3f7de
IR
4452
4453 /* Supportable by target? */
4454 if (!optab)
4455 {
73fbfcad 4456 if (dump_enabled_p ())
78c60e3d 4457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4458 "no optab.\n");
9dc3f7de
IR
4459 return false;
4460 }
4461 vec_mode = TYPE_MODE (vectype);
4462 icode = (int) optab_handler (optab, vec_mode);
4463 if (icode == CODE_FOR_nothing)
4464 {
73fbfcad 4465 if (dump_enabled_p ())
78c60e3d 4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4467 "op not supported by target.\n");
9dc3f7de
IR
4468 /* Check only during analysis. */
4469 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4470 || (vf < vect_min_worthwhile_factor (code)
4471 && !vec_stmt))
4472 return false;
73fbfcad 4473 if (dump_enabled_p ())
e645e942
TJ
4474 dump_printf_loc (MSG_NOTE, vect_location,
4475 "proceeding using word mode.\n");
9dc3f7de
IR
4476 }
4477
4478 /* Worthwhile without SIMD support? Check only during analysis. */
4479 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4480 && vf < vect_min_worthwhile_factor (code)
4481 && !vec_stmt)
4482 {
73fbfcad 4483 if (dump_enabled_p ())
78c60e3d 4484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4485 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4486 return false;
4487 }
4488
4489 if (!vec_stmt) /* transformation not required. */
4490 {
4491 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4492 if (dump_enabled_p ())
e645e942
TJ
4493 dump_printf_loc (MSG_NOTE, vect_location,
4494 "=== vectorizable_shift ===\n");
c3e7ee41 4495 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4496 return true;
4497 }
4498
4499 /** Transform. **/
4500
73fbfcad 4501 if (dump_enabled_p ())
78c60e3d 4502 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4503 "transform binary/unary operation.\n");
9dc3f7de
IR
4504
4505 /* Handle def. */
4506 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4507
9dc3f7de
IR
4508 prev_stmt_info = NULL;
4509 for (j = 0; j < ncopies; j++)
4510 {
4511 /* Handle uses. */
4512 if (j == 0)
4513 {
4514 if (scalar_shift_arg)
4515 {
4516 /* Vector shl and shr insn patterns can be defined with scalar
4517 operand 2 (shift operand). In this case, use constant or loop
4518 invariant op1 directly, without extending it to vector mode
4519 first. */
4520 optab_op2_mode = insn_data[icode].operand[2].mode;
4521 if (!VECTOR_MODE_P (optab_op2_mode))
4522 {
73fbfcad 4523 if (dump_enabled_p ())
78c60e3d 4524 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4525 "operand 1 using scalar mode.\n");
9dc3f7de 4526 vec_oprnd1 = op1;
8930f723 4527 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4528 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4529 if (slp_node)
4530 {
4531 /* Store vec_oprnd1 for every vector stmt to be created
4532 for SLP_NODE. We check during the analysis that all
4533 the shift arguments are the same.
4534 TODO: Allow different constants for different vector
4535 stmts generated for an SLP instance. */
4536 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4537 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4538 }
4539 }
4540 }
4541
4542 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4543 (a special case for certain kind of vector shifts); otherwise,
4544 operand 1 should be of a vector type (the usual case). */
4545 if (vec_oprnd1)
4546 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4547 slp_node, -1);
9dc3f7de
IR
4548 else
4549 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4550 slp_node, -1);
9dc3f7de
IR
4551 }
4552 else
4553 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4554
4555 /* Arguments are ready. Create the new vector stmt. */
9771b263 4556 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4557 {
9771b263 4558 vop1 = vec_oprnds1[i];
0d0e4a03 4559 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4560 new_temp = make_ssa_name (vec_dest, new_stmt);
4561 gimple_assign_set_lhs (new_stmt, new_temp);
4562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4563 if (slp_node)
9771b263 4564 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4565 }
4566
4567 if (slp_node)
4568 continue;
4569
4570 if (j == 0)
4571 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4572 else
4573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4574 prev_stmt_info = vinfo_for_stmt (new_stmt);
4575 }
4576
9771b263
DN
4577 vec_oprnds0.release ();
4578 vec_oprnds1.release ();
9dc3f7de
IR
4579
4580 return true;
4581}
4582
4583
ebfd146a
IR
4584/* Function vectorizable_operation.
4585
16949072
RG
4586 Check if STMT performs a binary, unary or ternary operation that can
4587 be vectorized.
b8698a0f 4588 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4589 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4590 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4591
4592static bool
4593vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4594 gimple *vec_stmt, slp_tree slp_node)
4595{
00f07b86 4596 tree vec_dest;
ebfd146a 4597 tree scalar_dest;
16949072 4598 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4600 tree vectype;
ebfd146a
IR
4601 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4602 enum tree_code code;
ef4bddc2 4603 machine_mode vec_mode;
ebfd146a
IR
4604 tree new_temp;
4605 int op_type;
00f07b86 4606 optab optab;
ebfd146a 4607 int icode;
ebfd146a
IR
4608 tree def;
4609 gimple def_stmt;
16949072
RG
4610 enum vect_def_type dt[3]
4611 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4612 gimple new_stmt = NULL;
4613 stmt_vec_info prev_stmt_info;
b690cc0f 4614 int nunits_in;
ebfd146a
IR
4615 int nunits_out;
4616 tree vectype_out;
4617 int ncopies;
4618 int j, i;
6e1aa848
DN
4619 vec<tree> vec_oprnds0 = vNULL;
4620 vec<tree> vec_oprnds1 = vNULL;
4621 vec<tree> vec_oprnds2 = vNULL;
16949072 4622 tree vop0, vop1, vop2;
a70d6342
IR
4623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4624 int vf;
4625
a70d6342 4626 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4627 return false;
4628
8644a673 4629 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4630 return false;
4631
4632 /* Is STMT a vectorizable binary/unary operation? */
4633 if (!is_gimple_assign (stmt))
4634 return false;
4635
4636 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4637 return false;
4638
ebfd146a
IR
4639 code = gimple_assign_rhs_code (stmt);
4640
4641 /* For pointer addition, we should use the normal plus for
4642 the vector addition. */
4643 if (code == POINTER_PLUS_EXPR)
4644 code = PLUS_EXPR;
4645
4646 /* Support only unary or binary operations. */
4647 op_type = TREE_CODE_LENGTH (code);
16949072 4648 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4649 {
73fbfcad 4650 if (dump_enabled_p ())
78c60e3d 4651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4652 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4653 op_type);
ebfd146a
IR
4654 return false;
4655 }
4656
b690cc0f
RG
4657 scalar_dest = gimple_assign_lhs (stmt);
4658 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4659
7b7b1813
RG
4660 /* Most operations cannot handle bit-precision types without extra
4661 truncations. */
4662 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4663 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4664 /* Exception are bitwise binary operations. */
4665 && code != BIT_IOR_EXPR
4666 && code != BIT_XOR_EXPR
4667 && code != BIT_AND_EXPR)
4668 {
73fbfcad 4669 if (dump_enabled_p ())
78c60e3d 4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4671 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4672 return false;
4673 }
4674
ebfd146a 4675 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4676 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4677 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4678 {
73fbfcad 4679 if (dump_enabled_p ())
78c60e3d 4680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4681 "use not simple.\n");
ebfd146a
IR
4682 return false;
4683 }
b690cc0f
RG
4684 /* If op0 is an external or constant def use a vector type with
4685 the same size as the output vector type. */
4686 if (!vectype)
4687 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4688 if (vec_stmt)
4689 gcc_assert (vectype);
4690 if (!vectype)
4691 {
73fbfcad 4692 if (dump_enabled_p ())
7d8930a0 4693 {
78c60e3d
SS
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "no vectype for scalar type ");
4696 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4697 TREE_TYPE (op0));
e645e942 4698 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4699 }
4700
4701 return false;
4702 }
b690cc0f
RG
4703
4704 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4705 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4706 if (nunits_out != nunits_in)
4707 return false;
ebfd146a 4708
16949072 4709 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4710 {
4711 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4712 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4713 &def, &dt[1]))
ebfd146a 4714 {
73fbfcad 4715 if (dump_enabled_p ())
78c60e3d 4716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4717 "use not simple.\n");
ebfd146a
IR
4718 return false;
4719 }
4720 }
16949072
RG
4721 if (op_type == ternary_op)
4722 {
4723 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4724 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4725 &def, &dt[2]))
16949072 4726 {
73fbfcad 4727 if (dump_enabled_p ())
78c60e3d 4728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4729 "use not simple.\n");
16949072
RG
4730 return false;
4731 }
4732 }
ebfd146a 4733
b690cc0f
RG
4734 if (loop_vinfo)
4735 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4736 else
4737 vf = 1;
4738
4739 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4740 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4741 case of SLP. */
437f4a00 4742 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4743 ncopies = 1;
4744 else
4745 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4746
4747 gcc_assert (ncopies >= 1);
4748
9dc3f7de 4749 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4750 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4751 || code == RROTATE_EXPR)
9dc3f7de 4752 return false;
ebfd146a 4753
ebfd146a 4754 /* Supportable by target? */
00f07b86
RH
4755
4756 vec_mode = TYPE_MODE (vectype);
4757 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4758 {
00f07b86 4759 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4760 icode = LAST_INSN_CODE;
00f07b86
RH
4761 else
4762 icode = CODE_FOR_nothing;
ebfd146a 4763 }
00f07b86
RH
4764 else
4765 {
4766 optab = optab_for_tree_code (code, vectype, optab_default);
4767 if (!optab)
5deb57cb 4768 {
73fbfcad 4769 if (dump_enabled_p ())
78c60e3d 4770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4771 "no optab.\n");
00f07b86 4772 return false;
5deb57cb 4773 }
00f07b86 4774 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4775 }
4776
ebfd146a
IR
4777 if (icode == CODE_FOR_nothing)
4778 {
73fbfcad 4779 if (dump_enabled_p ())
78c60e3d 4780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4781 "op not supported by target.\n");
ebfd146a
IR
4782 /* Check only during analysis. */
4783 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4784 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4785 return false;
73fbfcad 4786 if (dump_enabled_p ())
e645e942
TJ
4787 dump_printf_loc (MSG_NOTE, vect_location,
4788 "proceeding using word mode.\n");
383d9c83
IR
4789 }
4790
4a00c761 4791 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4792 if (!VECTOR_MODE_P (vec_mode)
4793 && !vec_stmt
4794 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4795 {
73fbfcad 4796 if (dump_enabled_p ())
78c60e3d 4797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4798 "not worthwhile without SIMD support.\n");
e34842c6 4799 return false;
7d8930a0 4800 }
ebfd146a 4801
ebfd146a
IR
4802 if (!vec_stmt) /* transformation not required. */
4803 {
4a00c761 4804 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4805 if (dump_enabled_p ())
78c60e3d 4806 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4807 "=== vectorizable_operation ===\n");
c3e7ee41 4808 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4809 return true;
4810 }
4811
4812 /** Transform. **/
4813
73fbfcad 4814 if (dump_enabled_p ())
78c60e3d 4815 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4816 "transform binary/unary operation.\n");
383d9c83 4817
ebfd146a 4818 /* Handle def. */
00f07b86 4819 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4820
ebfd146a
IR
4821 /* In case the vectorization factor (VF) is bigger than the number
4822 of elements that we can fit in a vectype (nunits), we have to generate
4823 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4824 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4825 from one copy of the vector stmt to the next, in the field
4826 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4827 stages to find the correct vector defs to be used when vectorizing
4828 stmts that use the defs of the current stmt. The example below
4829 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4830 we need to create 4 vectorized stmts):
4831
4832 before vectorization:
4833 RELATED_STMT VEC_STMT
4834 S1: x = memref - -
4835 S2: z = x + 1 - -
4836
4837 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4838 there):
4839 RELATED_STMT VEC_STMT
4840 VS1_0: vx0 = memref0 VS1_1 -
4841 VS1_1: vx1 = memref1 VS1_2 -
4842 VS1_2: vx2 = memref2 VS1_3 -
4843 VS1_3: vx3 = memref3 - -
4844 S1: x = load - VS1_0
4845 S2: z = x + 1 - -
4846
4847 step2: vectorize stmt S2 (done here):
4848 To vectorize stmt S2 we first need to find the relevant vector
4849 def for the first operand 'x'. This is, as usual, obtained from
4850 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4851 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4852 relevant vector def 'vx0'. Having found 'vx0' we can generate
4853 the vector stmt VS2_0, and as usual, record it in the
4854 STMT_VINFO_VEC_STMT of stmt S2.
4855 When creating the second copy (VS2_1), we obtain the relevant vector
4856 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4857 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4858 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4859 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4860 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4861 chain of stmts and pointers:
4862 RELATED_STMT VEC_STMT
4863 VS1_0: vx0 = memref0 VS1_1 -
4864 VS1_1: vx1 = memref1 VS1_2 -
4865 VS1_2: vx2 = memref2 VS1_3 -
4866 VS1_3: vx3 = memref3 - -
4867 S1: x = load - VS1_0
4868 VS2_0: vz0 = vx0 + v1 VS2_1 -
4869 VS2_1: vz1 = vx1 + v1 VS2_2 -
4870 VS2_2: vz2 = vx2 + v1 VS2_3 -
4871 VS2_3: vz3 = vx3 + v1 - -
4872 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4873
4874 prev_stmt_info = NULL;
4875 for (j = 0; j < ncopies; j++)
4876 {
4877 /* Handle uses. */
4878 if (j == 0)
4a00c761
JJ
4879 {
4880 if (op_type == binary_op || op_type == ternary_op)
4881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4882 slp_node, -1);
4883 else
4884 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4885 slp_node, -1);
4886 if (op_type == ternary_op)
36ba4aae 4887 {
9771b263
DN
4888 vec_oprnds2.create (1);
4889 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4890 stmt,
4891 NULL));
36ba4aae 4892 }
4a00c761 4893 }
ebfd146a 4894 else
4a00c761
JJ
4895 {
4896 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4897 if (op_type == ternary_op)
4898 {
9771b263
DN
4899 tree vec_oprnd = vec_oprnds2.pop ();
4900 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4901 vec_oprnd));
4a00c761
JJ
4902 }
4903 }
4904
4905 /* Arguments are ready. Create the new vector stmt. */
9771b263 4906 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4907 {
4a00c761 4908 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4909 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4910 vop2 = ((op_type == ternary_op)
9771b263 4911 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 4912 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
4913 new_temp = make_ssa_name (vec_dest, new_stmt);
4914 gimple_assign_set_lhs (new_stmt, new_temp);
4915 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4916 if (slp_node)
9771b263 4917 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4918 }
4919
4a00c761
JJ
4920 if (slp_node)
4921 continue;
4922
4923 if (j == 0)
4924 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4925 else
4926 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4927 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4928 }
4929
9771b263
DN
4930 vec_oprnds0.release ();
4931 vec_oprnds1.release ();
4932 vec_oprnds2.release ();
ebfd146a 4933
ebfd146a
IR
4934 return true;
4935}
4936
c716e67f
XDL
4937/* A helper function to ensure data reference DR's base alignment
4938 for STMT_INFO. */
4939
4940static void
4941ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4942{
4943 if (!dr->aux)
4944 return;
4945
4946 if (((dataref_aux *)dr->aux)->base_misaligned)
4947 {
4948 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4949 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4950
428f0c67
JH
4951 if (decl_in_symtab_p (base_decl))
4952 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4953 else
4954 {
4955 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4956 DECL_USER_ALIGN (base_decl) = 1;
4957 }
c716e67f
XDL
4958 ((dataref_aux *)dr->aux)->base_misaligned = false;
4959 }
4960}
4961
ebfd146a 4962
09dfa495
BM
4963/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4964 reversal of the vector elements. If that is impossible to do,
4965 returns NULL. */
4966
4967static tree
4968perm_mask_for_reverse (tree vectype)
4969{
4970 int i, nunits;
4971 unsigned char *sel;
4972
4973 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4974 sel = XALLOCAVEC (unsigned char, nunits);
4975
4976 for (i = 0; i < nunits; ++i)
4977 sel[i] = nunits - 1 - i;
4978
557be5a8
AL
4979 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4980 return NULL_TREE;
4981 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
4982}
4983
ebfd146a
IR
4984/* Function vectorizable_store.
4985
b8698a0f
L
4986 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4987 can be vectorized.
4988 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4989 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4990 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4991
4992static bool
4993vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4994 slp_tree slp_node)
ebfd146a
IR
4995{
4996 tree scalar_dest;
4997 tree data_ref;
4998 tree op;
4999 tree vec_oprnd = NULL_TREE;
5000 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5001 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5002 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5003 tree elem_type;
ebfd146a 5004 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5005 struct loop *loop = NULL;
ef4bddc2 5006 machine_mode vec_mode;
ebfd146a
IR
5007 tree dummy;
5008 enum dr_alignment_support alignment_support_scheme;
5009 tree def;
5010 gimple def_stmt;
5011 enum vect_def_type dt;
5012 stmt_vec_info prev_stmt_info = NULL;
5013 tree dataref_ptr = NULL_TREE;
74bf76ed 5014 tree dataref_offset = NULL_TREE;
fef4d2b3 5015 gimple ptr_incr = NULL;
ebfd146a
IR
5016 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5017 int ncopies;
5018 int j;
5019 gimple next_stmt, first_stmt = NULL;
0d0293ac 5020 bool grouped_store = false;
272c6793 5021 bool store_lanes_p = false;
ebfd146a 5022 unsigned int group_size, i;
6e1aa848
DN
5023 vec<tree> dr_chain = vNULL;
5024 vec<tree> oprnds = vNULL;
5025 vec<tree> result_chain = vNULL;
ebfd146a 5026 bool inv_p;
09dfa495
BM
5027 bool negative = false;
5028 tree offset = NULL_TREE;
6e1aa848 5029 vec<tree> vec_oprnds = vNULL;
ebfd146a 5030 bool slp = (slp_node != NULL);
ebfd146a 5031 unsigned int vec_num;
a70d6342 5032 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 5033 tree aggr_type;
a70d6342
IR
5034
5035 if (loop_vinfo)
5036 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5037
5038 /* Multiple types in SLP are handled by creating the appropriate number of
5039 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5040 case of SLP. */
437f4a00 5041 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5042 ncopies = 1;
5043 else
5044 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5045
5046 gcc_assert (ncopies >= 1);
5047
5048 /* FORNOW. This restriction should be relaxed. */
a70d6342 5049 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 5050 {
73fbfcad 5051 if (dump_enabled_p ())
78c60e3d 5052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5053 "multiple types in nested loop.\n");
ebfd146a
IR
5054 return false;
5055 }
5056
a70d6342 5057 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5058 return false;
5059
8644a673 5060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5061 return false;
5062
5063 /* Is vectorizable store? */
5064
5065 if (!is_gimple_assign (stmt))
5066 return false;
5067
5068 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5069 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5070 && is_pattern_stmt_p (stmt_info))
5071 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5072 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5073 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5074 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5075 && TREE_CODE (scalar_dest) != COMPONENT_REF
5076 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5077 && TREE_CODE (scalar_dest) != REALPART_EXPR
5078 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5079 return false;
5080
5081 gcc_assert (gimple_assign_single_p (stmt));
5082 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
5083 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5084 &def, &dt))
ebfd146a 5085 {
73fbfcad 5086 if (dump_enabled_p ())
78c60e3d 5087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5088 "use not simple.\n");
ebfd146a
IR
5089 return false;
5090 }
5091
272c6793 5092 elem_type = TREE_TYPE (vectype);
ebfd146a 5093 vec_mode = TYPE_MODE (vectype);
7b7b1813 5094
ebfd146a
IR
5095 /* FORNOW. In some cases can vectorize even if data-type not supported
5096 (e.g. - array initialization with 0). */
947131ba 5097 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5098 return false;
5099
5100 if (!STMT_VINFO_DATA_REF (stmt_info))
5101 return false;
5102
09dfa495
BM
5103 negative =
5104 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5105 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5106 size_zero_node) < 0;
5107 if (negative && ncopies > 1)
a1e53f3f 5108 {
73fbfcad 5109 if (dump_enabled_p ())
78c60e3d 5110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5111 "multiple types with negative step.\n");
a1e53f3f
L
5112 return false;
5113 }
5114
09dfa495
BM
5115 if (negative)
5116 {
5117 gcc_assert (!grouped_store);
5118 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5119 if (alignment_support_scheme != dr_aligned
5120 && alignment_support_scheme != dr_unaligned_supported)
5121 {
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5124 "negative step but alignment required.\n");
09dfa495
BM
5125 return false;
5126 }
f234d260
BM
5127 if (dt != vect_constant_def
5128 && dt != vect_external_def
5129 && !perm_mask_for_reverse (vectype))
09dfa495
BM
5130 {
5131 if (dump_enabled_p ())
5132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5133 "negative step and reversing not supported.\n");
09dfa495
BM
5134 return false;
5135 }
5136 }
5137
0d0293ac 5138 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5139 {
0d0293ac 5140 grouped_store = true;
e14c1050 5141 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5142 if (!slp && !PURE_SLP_STMT (stmt_info))
5143 {
e14c1050 5144 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5145 if (vect_store_lanes_supported (vectype, group_size))
5146 store_lanes_p = true;
0d0293ac 5147 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5148 return false;
5149 }
b8698a0f 5150
ebfd146a
IR
5151 if (first_stmt == stmt)
5152 {
5153 /* STMT is the leader of the group. Check the operands of all the
5154 stmts of the group. */
e14c1050 5155 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5156 while (next_stmt)
5157 {
5158 gcc_assert (gimple_assign_single_p (next_stmt));
5159 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5160 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5161 &def_stmt, &def, &dt))
ebfd146a 5162 {
73fbfcad 5163 if (dump_enabled_p ())
78c60e3d 5164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5165 "use not simple.\n");
ebfd146a
IR
5166 return false;
5167 }
e14c1050 5168 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5169 }
5170 }
5171 }
5172
5173 if (!vec_stmt) /* transformation not required. */
5174 {
5175 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5176 /* The SLP costs are calculated during SLP analysis. */
5177 if (!PURE_SLP_STMT (stmt_info))
5178 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5179 NULL, NULL, NULL);
ebfd146a
IR
5180 return true;
5181 }
5182
5183 /** Transform. **/
5184
c716e67f
XDL
5185 ensure_base_align (stmt_info, dr);
5186
0d0293ac 5187 if (grouped_store)
ebfd146a
IR
5188 {
5189 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5190 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5191
e14c1050 5192 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5193
5194 /* FORNOW */
a70d6342 5195 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5196
5197 /* We vectorize all the stmts of the interleaving group when we
5198 reach the last stmt in the group. */
e14c1050
IR
5199 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5200 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5201 && !slp)
5202 {
5203 *vec_stmt = NULL;
5204 return true;
5205 }
5206
5207 if (slp)
4b5caab7 5208 {
0d0293ac 5209 grouped_store = false;
4b5caab7
IR
5210 /* VEC_NUM is the number of vect stmts to be created for this
5211 group. */
5212 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5213 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5214 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5215 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5216 }
ebfd146a 5217 else
4b5caab7
IR
5218 /* VEC_NUM is the number of vect stmts to be created for this
5219 group. */
ebfd146a
IR
5220 vec_num = group_size;
5221 }
b8698a0f 5222 else
ebfd146a
IR
5223 {
5224 first_stmt = stmt;
5225 first_dr = dr;
5226 group_size = vec_num = 1;
ebfd146a 5227 }
b8698a0f 5228
73fbfcad 5229 if (dump_enabled_p ())
78c60e3d 5230 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5231 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5232
9771b263
DN
5233 dr_chain.create (group_size);
5234 oprnds.create (group_size);
ebfd146a 5235
720f5239 5236 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5237 gcc_assert (alignment_support_scheme);
272c6793
RS
5238 /* Targets with store-lane instructions must not require explicit
5239 realignment. */
5240 gcc_assert (!store_lanes_p
5241 || alignment_support_scheme == dr_aligned
5242 || alignment_support_scheme == dr_unaligned_supported);
5243
09dfa495
BM
5244 if (negative)
5245 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5246
272c6793
RS
5247 if (store_lanes_p)
5248 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5249 else
5250 aggr_type = vectype;
ebfd146a
IR
5251
5252 /* In case the vectorization factor (VF) is bigger than the number
5253 of elements that we can fit in a vectype (nunits), we have to generate
5254 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5255 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5256 vect_get_vec_def_for_copy_stmt. */
5257
0d0293ac 5258 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5259
5260 S1: &base + 2 = x2
5261 S2: &base = x0
5262 S3: &base + 1 = x1
5263 S4: &base + 3 = x3
5264
5265 We create vectorized stores starting from base address (the access of the
5266 first stmt in the chain (S2 in the above example), when the last store stmt
5267 of the chain (S4) is reached:
5268
5269 VS1: &base = vx2
5270 VS2: &base + vec_size*1 = vx0
5271 VS3: &base + vec_size*2 = vx1
5272 VS4: &base + vec_size*3 = vx3
5273
5274 Then permutation statements are generated:
5275
3fcc1b55
JJ
5276 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5277 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5278 ...
b8698a0f 5279
ebfd146a
IR
5280 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5281 (the order of the data-refs in the output of vect_permute_store_chain
5282 corresponds to the order of scalar stmts in the interleaving chain - see
5283 the documentation of vect_permute_store_chain()).
5284
5285 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5286 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5287 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5288 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5289 */
5290
5291 prev_stmt_info = NULL;
5292 for (j = 0; j < ncopies; j++)
5293 {
5294 gimple new_stmt;
ebfd146a
IR
5295
5296 if (j == 0)
5297 {
5298 if (slp)
5299 {
5300 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5301 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5302 NULL, slp_node, -1);
ebfd146a 5303
9771b263 5304 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5305 }
5306 else
5307 {
b8698a0f
L
5308 /* For interleaved stores we collect vectorized defs for all the
5309 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5310 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5311 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5312
0d0293ac 5313 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5314 OPRNDS are of size 1. */
b8698a0f 5315 next_stmt = first_stmt;
ebfd146a
IR
5316 for (i = 0; i < group_size; i++)
5317 {
b8698a0f
L
5318 /* Since gaps are not supported for interleaved stores,
5319 GROUP_SIZE is the exact number of stmts in the chain.
5320 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5321 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5322 iteration of the loop will be executed. */
5323 gcc_assert (next_stmt
5324 && gimple_assign_single_p (next_stmt));
5325 op = gimple_assign_rhs1 (next_stmt);
5326
b8698a0f 5327 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5328 NULL);
9771b263
DN
5329 dr_chain.quick_push (vec_oprnd);
5330 oprnds.quick_push (vec_oprnd);
e14c1050 5331 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5332 }
5333 }
5334
5335 /* We should have catched mismatched types earlier. */
5336 gcc_assert (useless_type_conversion_p (vectype,
5337 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5338 bool simd_lane_access_p
5339 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5340 if (simd_lane_access_p
5341 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5342 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5343 && integer_zerop (DR_OFFSET (first_dr))
5344 && integer_zerop (DR_INIT (first_dr))
5345 && alias_sets_conflict_p (get_alias_set (aggr_type),
5346 get_alias_set (DR_REF (first_dr))))
5347 {
5348 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5349 dataref_offset = build_int_cst (reference_alias_ptr_type
5350 (DR_REF (first_dr)), 0);
8928eff3 5351 inv_p = false;
74bf76ed
JJ
5352 }
5353 else
5354 dataref_ptr
5355 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5356 simd_lane_access_p ? loop : NULL,
09dfa495 5357 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5358 simd_lane_access_p, &inv_p);
a70d6342 5359 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5360 }
b8698a0f 5361 else
ebfd146a 5362 {
b8698a0f
L
5363 /* For interleaved stores we created vectorized defs for all the
5364 defs stored in OPRNDS in the previous iteration (previous copy).
5365 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5366 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5367 next copy.
0d0293ac 5368 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5369 OPRNDS are of size 1. */
5370 for (i = 0; i < group_size; i++)
5371 {
9771b263 5372 op = oprnds[i];
24ee1384
IR
5373 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5374 &def, &dt);
b8698a0f 5375 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5376 dr_chain[i] = vec_oprnd;
5377 oprnds[i] = vec_oprnd;
ebfd146a 5378 }
74bf76ed
JJ
5379 if (dataref_offset)
5380 dataref_offset
5381 = int_const_binop (PLUS_EXPR, dataref_offset,
5382 TYPE_SIZE_UNIT (aggr_type));
5383 else
5384 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5385 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5386 }
5387
272c6793 5388 if (store_lanes_p)
ebfd146a 5389 {
272c6793 5390 tree vec_array;
267d3070 5391
272c6793
RS
5392 /* Combine all the vectors into an array. */
5393 vec_array = create_vector_array (vectype, vec_num);
5394 for (i = 0; i < vec_num; i++)
c2d7ab2a 5395 {
9771b263 5396 vec_oprnd = dr_chain[i];
272c6793 5397 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5398 }
b8698a0f 5399
272c6793
RS
5400 /* Emit:
5401 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5402 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5403 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5404 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5406 }
5407 else
5408 {
5409 new_stmt = NULL;
0d0293ac 5410 if (grouped_store)
272c6793 5411 {
b6b9227d
JJ
5412 if (j == 0)
5413 result_chain.create (group_size);
272c6793
RS
5414 /* Permute. */
5415 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5416 &result_chain);
5417 }
c2d7ab2a 5418
272c6793
RS
5419 next_stmt = first_stmt;
5420 for (i = 0; i < vec_num; i++)
5421 {
644ffefd 5422 unsigned align, misalign;
272c6793
RS
5423
5424 if (i > 0)
5425 /* Bump the vector pointer. */
5426 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5427 stmt, NULL_TREE);
5428
5429 if (slp)
9771b263 5430 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5431 else if (grouped_store)
5432 /* For grouped stores vectorized defs are interleaved in
272c6793 5433 vect_permute_store_chain(). */
9771b263 5434 vec_oprnd = result_chain[i];
272c6793
RS
5435
5436 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
5437 dataref_offset
5438 ? dataref_offset
5439 : build_int_cst (reference_alias_ptr_type
5440 (DR_REF (first_dr)), 0));
644ffefd 5441 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5442 if (aligned_access_p (first_dr))
644ffefd 5443 misalign = 0;
272c6793
RS
5444 else if (DR_MISALIGNMENT (first_dr) == -1)
5445 {
5446 TREE_TYPE (data_ref)
5447 = build_aligned_type (TREE_TYPE (data_ref),
5448 TYPE_ALIGN (elem_type));
644ffefd
MJ
5449 align = TYPE_ALIGN_UNIT (elem_type);
5450 misalign = 0;
272c6793
RS
5451 }
5452 else
5453 {
5454 TREE_TYPE (data_ref)
5455 = build_aligned_type (TREE_TYPE (data_ref),
5456 TYPE_ALIGN (elem_type));
644ffefd 5457 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5458 }
74bf76ed
JJ
5459 if (dataref_offset == NULL_TREE)
5460 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5461 misalign);
c2d7ab2a 5462
f234d260
BM
5463 if (negative
5464 && dt != vect_constant_def
5465 && dt != vect_external_def)
09dfa495
BM
5466 {
5467 tree perm_mask = perm_mask_for_reverse (vectype);
5468 tree perm_dest
5469 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5470 vectype);
b731b390 5471 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
5472
5473 /* Generate the permute statement. */
5474 gimple perm_stmt
0d0e4a03
JJ
5475 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5476 vec_oprnd, perm_mask);
09dfa495
BM
5477 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5478
5479 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5480 vec_oprnd = new_temp;
5481 }
5482
272c6793
RS
5483 /* Arguments are ready. Create the new vector stmt. */
5484 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5485 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5486
5487 if (slp)
5488 continue;
5489
e14c1050 5490 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5491 if (!next_stmt)
5492 break;
5493 }
ebfd146a 5494 }
1da0876c
RS
5495 if (!slp)
5496 {
5497 if (j == 0)
5498 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5499 else
5500 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5501 prev_stmt_info = vinfo_for_stmt (new_stmt);
5502 }
ebfd146a
IR
5503 }
5504
9771b263
DN
5505 dr_chain.release ();
5506 oprnds.release ();
5507 result_chain.release ();
5508 vec_oprnds.release ();
ebfd146a
IR
5509
5510 return true;
5511}
5512
557be5a8
AL
5513/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5514 VECTOR_CST mask. No checks are made that the target platform supports the
5515 mask, so callers may wish to test can_vec_perm_p separately, or use
5516 vect_gen_perm_mask_checked. */
a1e53f3f 5517
3fcc1b55 5518tree
557be5a8 5519vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 5520{
d2a12ae7 5521 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5522 int i, nunits;
a1e53f3f 5523
22e4dee7 5524 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 5525
96f9265a
RG
5526 mask_elt_type = lang_hooks.types.type_for_mode
5527 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5528 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5529
d2a12ae7 5530 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5531 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5532 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5533 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5534
2635892a 5535 return mask_vec;
a1e53f3f
L
5536}
5537
cf7aa6a3
AL
5538/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5539 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
5540
5541tree
5542vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5543{
5544 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5545 return vect_gen_perm_mask_any (vectype, sel);
5546}
5547
aec7ae7d
JJ
5548/* Given a vector variable X and Y, that was generated for the scalar
5549 STMT, generate instructions to permute the vector elements of X and Y
5550 using permutation mask MASK_VEC, insert them at *GSI and return the
5551 permuted vector variable. */
a1e53f3f
L
5552
5553static tree
aec7ae7d
JJ
5554permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5555 gimple_stmt_iterator *gsi)
a1e53f3f
L
5556{
5557 tree vectype = TREE_TYPE (x);
aec7ae7d 5558 tree perm_dest, data_ref;
a1e53f3f
L
5559 gimple perm_stmt;
5560
acdcd61b 5561 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 5562 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
5563
5564 /* Generate the permute statement. */
0d0e4a03 5565 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
5566 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5567
5568 return data_ref;
5569}
5570
6b916b36
RB
5571/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5572 inserting them on the loops preheader edge. Returns true if we
5573 were successful in doing so (and thus STMT can be moved then),
5574 otherwise returns false. */
5575
5576static bool
5577hoist_defs_of_uses (gimple stmt, struct loop *loop)
5578{
5579 ssa_op_iter i;
5580 tree op;
5581 bool any = false;
5582
5583 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5584 {
5585 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5586 if (!gimple_nop_p (def_stmt)
5587 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5588 {
5589 /* Make sure we don't need to recurse. While we could do
5590 so in simple cases when there are more complex use webs
5591 we don't have an easy way to preserve stmt order to fulfil
5592 dependencies within them. */
5593 tree op2;
5594 ssa_op_iter i2;
d1417442
JJ
5595 if (gimple_code (def_stmt) == GIMPLE_PHI)
5596 return false;
6b916b36
RB
5597 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5598 {
5599 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5600 if (!gimple_nop_p (def_stmt2)
5601 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5602 return false;
5603 }
5604 any = true;
5605 }
5606 }
5607
5608 if (!any)
5609 return true;
5610
5611 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5612 {
5613 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5614 if (!gimple_nop_p (def_stmt)
5615 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5616 {
5617 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5618 gsi_remove (&gsi, false);
5619 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5620 }
5621 }
5622
5623 return true;
5624}
5625
ebfd146a
IR
5626/* vectorizable_load.
5627
b8698a0f
L
5628 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5629 can be vectorized.
5630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5631 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5633
5634static bool
5635vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5636 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5637{
5638 tree scalar_dest;
5639 tree vec_dest = NULL;
5640 tree data_ref = NULL;
5641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5642 stmt_vec_info prev_stmt_info;
ebfd146a 5643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5644 struct loop *loop = NULL;
ebfd146a 5645 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5646 bool nested_in_vect_loop = false;
c716e67f 5647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5648 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5649 tree elem_type;
ebfd146a 5650 tree new_temp;
ef4bddc2 5651 machine_mode mode;
ebfd146a
IR
5652 gimple new_stmt = NULL;
5653 tree dummy;
5654 enum dr_alignment_support alignment_support_scheme;
5655 tree dataref_ptr = NULL_TREE;
74bf76ed 5656 tree dataref_offset = NULL_TREE;
fef4d2b3 5657 gimple ptr_incr = NULL;
ebfd146a
IR
5658 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5659 int ncopies;
a64b9c26 5660 int i, j, group_size, group_gap;
ebfd146a
IR
5661 tree msq = NULL_TREE, lsq;
5662 tree offset = NULL_TREE;
356bbc4c 5663 tree byte_offset = NULL_TREE;
ebfd146a 5664 tree realignment_token = NULL_TREE;
538dd0b7 5665 gphi *phi = NULL;
6e1aa848 5666 vec<tree> dr_chain = vNULL;
0d0293ac 5667 bool grouped_load = false;
272c6793 5668 bool load_lanes_p = false;
ebfd146a 5669 gimple first_stmt;
ebfd146a 5670 bool inv_p;
319e6439 5671 bool negative = false;
ebfd146a
IR
5672 bool compute_in_loop = false;
5673 struct loop *at_loop;
5674 int vec_num;
5675 bool slp = (slp_node != NULL);
5676 bool slp_perm = false;
5677 enum tree_code code;
a70d6342
IR
5678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5679 int vf;
272c6793 5680 tree aggr_type;
aec7ae7d
JJ
5681 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5682 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5683 int gather_scale = 1;
5684 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5685
5686 if (loop_vinfo)
5687 {
5688 loop = LOOP_VINFO_LOOP (loop_vinfo);
5689 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5690 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5691 }
5692 else
3533e503 5693 vf = 1;
ebfd146a
IR
5694
5695 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5697 case of SLP. */
437f4a00 5698 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5699 ncopies = 1;
5700 else
5701 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5702
5703 gcc_assert (ncopies >= 1);
5704
5705 /* FORNOW. This restriction should be relaxed. */
5706 if (nested_in_vect_loop && ncopies > 1)
5707 {
73fbfcad 5708 if (dump_enabled_p ())
78c60e3d 5709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5710 "multiple types in nested loop.\n");
ebfd146a
IR
5711 return false;
5712 }
5713
f2556b68
RB
5714 /* Invalidate assumptions made by dependence analysis when vectorization
5715 on the unrolled body effectively re-orders stmts. */
5716 if (ncopies > 1
5717 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5718 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5719 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5720 {
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5723 "cannot perform implicit CSE when unrolling "
5724 "with negative dependence distance\n");
5725 return false;
5726 }
5727
a70d6342 5728 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5729 return false;
5730
8644a673 5731 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5732 return false;
5733
5734 /* Is vectorizable load? */
5735 if (!is_gimple_assign (stmt))
5736 return false;
5737
5738 scalar_dest = gimple_assign_lhs (stmt);
5739 if (TREE_CODE (scalar_dest) != SSA_NAME)
5740 return false;
5741
5742 code = gimple_assign_rhs_code (stmt);
5743 if (code != ARRAY_REF
38000232 5744 && code != BIT_FIELD_REF
ebfd146a 5745 && code != INDIRECT_REF
e9dbe7bb
IR
5746 && code != COMPONENT_REF
5747 && code != IMAGPART_EXPR
70f34814 5748 && code != REALPART_EXPR
42373e0b
RG
5749 && code != MEM_REF
5750 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5751 return false;
5752
5753 if (!STMT_VINFO_DATA_REF (stmt_info))
5754 return false;
5755
7b7b1813 5756 elem_type = TREE_TYPE (vectype);
947131ba 5757 mode = TYPE_MODE (vectype);
ebfd146a
IR
5758
5759 /* FORNOW. In some cases can vectorize even if data-type not supported
5760 (e.g. - data copies). */
947131ba 5761 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5762 {
73fbfcad 5763 if (dump_enabled_p ())
78c60e3d 5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5765 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5766 return false;
5767 }
5768
ebfd146a 5769 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5770 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5771 {
0d0293ac 5772 grouped_load = true;
ebfd146a 5773 /* FORNOW */
aec7ae7d 5774 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5775
e14c1050 5776 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d5f035ea
RB
5777
5778 /* If this is single-element interleaving with an element distance
5779 that leaves unused vector loads around punt - we at least create
5780 very sub-optimal code in that case (and blow up memory,
5781 see PR65518). */
5782 if (first_stmt == stmt
5783 && !GROUP_NEXT_ELEMENT (stmt_info)
5784 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5785 {
5786 if (dump_enabled_p ())
5787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5788 "single-element interleaving not supported "
5789 "for not adjacent vector loads\n");
5790 return false;
5791 }
5792
b602d918
RS
5793 if (!slp && !PURE_SLP_STMT (stmt_info))
5794 {
e14c1050 5795 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5796 if (vect_load_lanes_supported (vectype, group_size))
5797 load_lanes_p = true;
0d0293ac 5798 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5799 return false;
5800 }
f2556b68
RB
5801
5802 /* Invalidate assumptions made by dependence analysis when vectorization
5803 on the unrolled body effectively re-orders stmts. */
5804 if (!PURE_SLP_STMT (stmt_info)
5805 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5806 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5807 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5808 {
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "cannot perform implicit CSE when performing "
5812 "group loads with negative dependence distance\n");
5813 return false;
5814 }
96bb56b2
RB
5815
5816 /* Similarly when the stmt is a load that is both part of a SLP
5817 instance and a loop vectorized stmt via the same-dr mechanism
5818 we have to give up. */
5819 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5820 && (STMT_SLP_TYPE (stmt_info)
5821 != STMT_SLP_TYPE (vinfo_for_stmt
5822 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5823 {
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5826 "conflicting SLP types for CSEd load\n");
5827 return false;
5828 }
ebfd146a
IR
5829 }
5830
a1e53f3f 5831
aec7ae7d
JJ
5832 if (STMT_VINFO_GATHER_P (stmt_info))
5833 {
5834 gimple def_stmt;
5835 tree def;
5836 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5837 &gather_off, &gather_scale);
5838 gcc_assert (gather_decl);
24ee1384 5839 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
5840 &def_stmt, &def, &gather_dt,
5841 &gather_off_vectype))
5842 {
73fbfcad 5843 if (dump_enabled_p ())
78c60e3d 5844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5845 "gather index use not simple.\n");
aec7ae7d
JJ
5846 return false;
5847 }
5848 }
7d75abc8 5849 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 5850 ;
319e6439
RG
5851 else
5852 {
5853 negative = tree_int_cst_compare (nested_in_vect_loop
5854 ? STMT_VINFO_DR_STEP (stmt_info)
5855 : DR_STEP (dr),
5856 size_zero_node) < 0;
5857 if (negative && ncopies > 1)
5858 {
73fbfcad 5859 if (dump_enabled_p ())
78c60e3d 5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5861 "multiple types with negative step.\n");
319e6439
RG
5862 return false;
5863 }
5864
5865 if (negative)
5866 {
08940f33
RB
5867 if (grouped_load)
5868 {
5869 if (dump_enabled_p ())
5870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5871 "negative step for group load not supported"
5872 "\n");
08940f33
RB
5873 return false;
5874 }
319e6439
RG
5875 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5876 if (alignment_support_scheme != dr_aligned
5877 && alignment_support_scheme != dr_unaligned_supported)
5878 {
73fbfcad 5879 if (dump_enabled_p ())
78c60e3d 5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5881 "negative step but alignment required.\n");
319e6439
RG
5882 return false;
5883 }
5884 if (!perm_mask_for_reverse (vectype))
5885 {
73fbfcad 5886 if (dump_enabled_p ())
78c60e3d 5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5888 "negative step and reversing not supported."
5889 "\n");
319e6439
RG
5890 return false;
5891 }
5892 }
7d75abc8 5893 }
aec7ae7d 5894
ebfd146a
IR
5895 if (!vec_stmt) /* transformation not required. */
5896 {
5897 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
5898 /* The SLP costs are calculated during SLP analysis. */
5899 if (!PURE_SLP_STMT (stmt_info))
5900 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
5901 NULL, NULL, NULL);
ebfd146a
IR
5902 return true;
5903 }
5904
73fbfcad 5905 if (dump_enabled_p ())
78c60e3d 5906 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5907 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
5908
5909 /** Transform. **/
5910
c716e67f
XDL
5911 ensure_base_align (stmt_info, dr);
5912
aec7ae7d
JJ
5913 if (STMT_VINFO_GATHER_P (stmt_info))
5914 {
5915 tree vec_oprnd0 = NULL_TREE, op;
5916 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5917 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 5918 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
5919 edge pe = loop_preheader_edge (loop);
5920 gimple_seq seq;
5921 basic_block new_bb;
5922 enum { NARROW, NONE, WIDEN } modifier;
5923 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5924
5925 if (nunits == gather_off_nunits)
5926 modifier = NONE;
5927 else if (nunits == gather_off_nunits / 2)
5928 {
5929 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5930 modifier = WIDEN;
5931
5932 for (i = 0; i < gather_off_nunits; ++i)
5933 sel[i] = i | nunits;
5934
557be5a8 5935 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
5936 }
5937 else if (nunits == gather_off_nunits * 2)
5938 {
5939 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5940 modifier = NARROW;
5941
5942 for (i = 0; i < nunits; ++i)
5943 sel[i] = i < gather_off_nunits
5944 ? i : i + nunits - gather_off_nunits;
5945
557be5a8 5946 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
5947 ncopies *= 2;
5948 }
5949 else
5950 gcc_unreachable ();
5951
5952 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5953 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5954 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5955 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5956 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5957 scaletype = TREE_VALUE (arglist);
d3c2fee0 5958 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
5959
5960 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5961
5962 ptr = fold_convert (ptrtype, gather_base);
5963 if (!is_gimple_min_invariant (ptr))
5964 {
5965 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5966 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5967 gcc_assert (!new_bb);
5968 }
5969
5970 /* Currently we support only unconditional gather loads,
5971 so mask should be all ones. */
d3c2fee0
AI
5972 if (TREE_CODE (masktype) == INTEGER_TYPE)
5973 mask = build_int_cst (masktype, -1);
5974 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5975 {
5976 mask = build_int_cst (TREE_TYPE (masktype), -1);
5977 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5978 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 5979 }
aec7ae7d
JJ
5980 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5981 {
5982 REAL_VALUE_TYPE r;
5983 long tmp[6];
5984 for (j = 0; j < 6; ++j)
5985 tmp[j] = -1;
5986 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5987 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 5988 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5989 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
5990 }
5991 else
5992 gcc_unreachable ();
aec7ae7d
JJ
5993
5994 scale = build_int_cst (scaletype, gather_scale);
5995
d3c2fee0
AI
5996 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5997 merge = build_int_cst (TREE_TYPE (rettype), 0);
5998 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5999 {
6000 REAL_VALUE_TYPE r;
6001 long tmp[6];
6002 for (j = 0; j < 6; ++j)
6003 tmp[j] = 0;
6004 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6005 merge = build_real (TREE_TYPE (rettype), r);
6006 }
6007 else
6008 gcc_unreachable ();
6009 merge = build_vector_from_val (rettype, merge);
6010 merge = vect_init_vector (stmt, merge, rettype, NULL);
6011
aec7ae7d
JJ
6012 prev_stmt_info = NULL;
6013 for (j = 0; j < ncopies; ++j)
6014 {
6015 if (modifier == WIDEN && (j & 1))
6016 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6017 perm_mask, stmt, gsi);
6018 else if (j == 0)
6019 op = vec_oprnd0
6020 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6021 else
6022 op = vec_oprnd0
6023 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6024
6025 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6026 {
6027 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6028 == TYPE_VECTOR_SUBPARTS (idxtype));
6029 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 6030 var = make_ssa_name (var);
aec7ae7d
JJ
6031 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6032 new_stmt
0d0e4a03 6033 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6034 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6035 op = var;
6036 }
6037
6038 new_stmt
d3c2fee0 6039 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6040
6041 if (!useless_type_conversion_p (vectype, rettype))
6042 {
6043 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6044 == TYPE_VECTOR_SUBPARTS (rettype));
6045 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
6046 op = make_ssa_name (var, new_stmt);
6047 gimple_call_set_lhs (new_stmt, op);
6048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6049 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6050 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6051 new_stmt
0d0e4a03 6052 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6053 }
6054 else
6055 {
6056 var = make_ssa_name (vec_dest, new_stmt);
6057 gimple_call_set_lhs (new_stmt, var);
6058 }
6059
6060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6061
6062 if (modifier == NARROW)
6063 {
6064 if ((j & 1) == 0)
6065 {
6066 prev_res = var;
6067 continue;
6068 }
6069 var = permute_vec_elements (prev_res, var,
6070 perm_mask, stmt, gsi);
6071 new_stmt = SSA_NAME_DEF_STMT (var);
6072 }
6073
6074 if (prev_stmt_info == NULL)
6075 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6076 else
6077 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6078 prev_stmt_info = vinfo_for_stmt (new_stmt);
6079 }
6080 return true;
6081 }
7d75abc8
MM
6082 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6083 {
6084 gimple_stmt_iterator incr_gsi;
6085 bool insert_after;
6086 gimple incr;
6087 tree offvar;
7d75abc8
MM
6088 tree ivstep;
6089 tree running_off;
9771b263 6090 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6091 gimple_seq stmts = NULL;
14ac6aa2
RB
6092 tree stride_base, stride_step, alias_off;
6093
6094 gcc_assert (!nested_in_vect_loop);
7d75abc8 6095
14ac6aa2
RB
6096 stride_base
6097 = fold_build_pointer_plus
6098 (unshare_expr (DR_BASE_ADDRESS (dr)),
6099 size_binop (PLUS_EXPR,
6100 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 6101 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 6102 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
6103
6104 /* For a load with loop-invariant (but other than power-of-2)
6105 stride (i.e. not a grouped access) like so:
6106
6107 for (i = 0; i < n; i += stride)
6108 ... = array[i];
6109
6110 we generate a new induction variable and new accesses to
6111 form a new vector (or vectors, depending on ncopies):
6112
6113 for (j = 0; ; j += VF*stride)
6114 tmp1 = array[j];
6115 tmp2 = array[j + stride];
6116 ...
6117 vectemp = {tmp1, tmp2, ...}
6118 */
6119
6120 ivstep = stride_step;
6121 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6122 build_int_cst (TREE_TYPE (ivstep), vf));
6123
6124 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6125
6126 create_iv (stride_base, ivstep, NULL,
6127 loop, &incr_gsi, insert_after,
6128 &offvar, NULL);
6129 incr = gsi_stmt (incr_gsi);
6130 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6131
6132 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6133 if (stmts)
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6135
6136 prev_stmt_info = NULL;
6137 running_off = offvar;
14ac6aa2 6138 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
6139 for (j = 0; j < ncopies; j++)
6140 {
6141 tree vec_inv;
6142
9771b263 6143 vec_alloc (v, nunits);
7d75abc8
MM
6144 for (i = 0; i < nunits; i++)
6145 {
6146 tree newref, newoff;
6147 gimple incr;
14ac6aa2
RB
6148 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6149 running_off, alias_off);
7d75abc8
MM
6150
6151 newref = force_gimple_operand_gsi (gsi, newref, true,
6152 NULL_TREE, true,
6153 GSI_SAME_STMT);
6154 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
b731b390 6155 newoff = copy_ssa_name (running_off);
0d0e4a03
JJ
6156 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6157 running_off, stride_step);
7d75abc8
MM
6158 vect_finish_stmt_generation (stmt, incr, gsi);
6159
6160 running_off = newoff;
6161 }
6162
6163 vec_inv = build_constructor (vectype, v);
6164 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6165 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
6166
6167 if (j == 0)
6168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6169 else
6170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6171 prev_stmt_info = vinfo_for_stmt (new_stmt);
6172 }
6173 return true;
6174 }
aec7ae7d 6175
0d0293ac 6176 if (grouped_load)
ebfd146a 6177 {
e14c1050 6178 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 6179 if (slp
01d8bf07 6180 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
6181 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6182 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6183
ebfd146a 6184 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6185 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6186 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6187 ??? But we can only do so if there is exactly one
6188 as we have no way to get at the rest. Leave the CSE
6189 opportunity alone.
6190 ??? With the group load eventually participating
6191 in multiple different permutations (having multiple
6192 slp nodes which refer to the same group) the CSE
6193 is even wrong code. See PR56270. */
6194 && !slp)
ebfd146a
IR
6195 {
6196 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6197 return true;
6198 }
6199 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6200 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
6201
6202 /* VEC_NUM is the number of vect stmts to be created for this group. */
6203 if (slp)
6204 {
0d0293ac 6205 grouped_load = false;
ebfd146a 6206 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 6207 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 6208 slp_perm = true;
a64b9c26 6209 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 6210 }
ebfd146a 6211 else
a64b9c26
RB
6212 {
6213 vec_num = group_size;
6214 group_gap = 0;
6215 }
ebfd146a
IR
6216 }
6217 else
6218 {
6219 first_stmt = stmt;
6220 first_dr = dr;
6221 group_size = vec_num = 1;
a64b9c26 6222 group_gap = 0;
ebfd146a
IR
6223 }
6224
720f5239 6225 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6226 gcc_assert (alignment_support_scheme);
272c6793
RS
6227 /* Targets with load-lane instructions must not require explicit
6228 realignment. */
6229 gcc_assert (!load_lanes_p
6230 || alignment_support_scheme == dr_aligned
6231 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6232
6233 /* In case the vectorization factor (VF) is bigger than the number
6234 of elements that we can fit in a vectype (nunits), we have to generate
6235 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6236 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6237 from one copy of the vector stmt to the next, in the field
ff802fa1 6238 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6239 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6240 stmts that use the defs of the current stmt. The example below
6241 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6242 need to create 4 vectorized stmts):
ebfd146a
IR
6243
6244 before vectorization:
6245 RELATED_STMT VEC_STMT
6246 S1: x = memref - -
6247 S2: z = x + 1 - -
6248
6249 step 1: vectorize stmt S1:
6250 We first create the vector stmt VS1_0, and, as usual, record a
6251 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6252 Next, we create the vector stmt VS1_1, and record a pointer to
6253 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6254 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6255 stmts and pointers:
6256 RELATED_STMT VEC_STMT
6257 VS1_0: vx0 = memref0 VS1_1 -
6258 VS1_1: vx1 = memref1 VS1_2 -
6259 VS1_2: vx2 = memref2 VS1_3 -
6260 VS1_3: vx3 = memref3 - -
6261 S1: x = load - VS1_0
6262 S2: z = x + 1 - -
6263
b8698a0f
L
6264 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6265 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6266 stmt S2. */
6267
0d0293ac 6268 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6269
6270 S1: x2 = &base + 2
6271 S2: x0 = &base
6272 S3: x1 = &base + 1
6273 S4: x3 = &base + 3
6274
b8698a0f 6275 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6276 starting from the access of the first stmt of the chain:
6277
6278 VS1: vx0 = &base
6279 VS2: vx1 = &base + vec_size*1
6280 VS3: vx3 = &base + vec_size*2
6281 VS4: vx4 = &base + vec_size*3
6282
6283 Then permutation statements are generated:
6284
e2c83630
RH
6285 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6286 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6287 ...
6288
6289 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6290 (the order of the data-refs in the output of vect_permute_load_chain
6291 corresponds to the order of scalar stmts in the interleaving chain - see
6292 the documentation of vect_permute_load_chain()).
6293 The generation of permutation stmts and recording them in
0d0293ac 6294 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6295
b8698a0f 6296 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6297 permutation stmts above are created for every copy. The result vector
6298 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6299 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6300
6301 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6302 on a target that supports unaligned accesses (dr_unaligned_supported)
6303 we generate the following code:
6304 p = initial_addr;
6305 indx = 0;
6306 loop {
6307 p = p + indx * vectype_size;
6308 vec_dest = *(p);
6309 indx = indx + 1;
6310 }
6311
6312 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6313 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6314 then generate the following code, in which the data in each iteration is
6315 obtained by two vector loads, one from the previous iteration, and one
6316 from the current iteration:
6317 p1 = initial_addr;
6318 msq_init = *(floor(p1))
6319 p2 = initial_addr + VS - 1;
6320 realignment_token = call target_builtin;
6321 indx = 0;
6322 loop {
6323 p2 = p2 + indx * vectype_size
6324 lsq = *(floor(p2))
6325 vec_dest = realign_load (msq, lsq, realignment_token)
6326 indx = indx + 1;
6327 msq = lsq;
6328 } */
6329
6330 /* If the misalignment remains the same throughout the execution of the
6331 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6332 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6333 This can only occur when vectorizing memory accesses in the inner-loop
6334 nested within an outer-loop that is being vectorized. */
6335
d1e4b493 6336 if (nested_in_vect_loop
211bea38 6337 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6338 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6339 {
6340 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6341 compute_in_loop = true;
6342 }
6343
6344 if ((alignment_support_scheme == dr_explicit_realign_optimized
6345 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6346 && !compute_in_loop)
ebfd146a
IR
6347 {
6348 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6349 alignment_support_scheme, NULL_TREE,
6350 &at_loop);
6351 if (alignment_support_scheme == dr_explicit_realign_optimized)
6352 {
538dd0b7 6353 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
6354 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6355 size_one_node);
ebfd146a
IR
6356 }
6357 }
6358 else
6359 at_loop = loop;
6360
a1e53f3f
L
6361 if (negative)
6362 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6363
272c6793
RS
6364 if (load_lanes_p)
6365 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6366 else
6367 aggr_type = vectype;
6368
ebfd146a
IR
6369 prev_stmt_info = NULL;
6370 for (j = 0; j < ncopies; j++)
b8698a0f 6371 {
272c6793 6372 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6373 if (j == 0)
74bf76ed
JJ
6374 {
6375 bool simd_lane_access_p
6376 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6377 if (simd_lane_access_p
6378 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6379 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6380 && integer_zerop (DR_OFFSET (first_dr))
6381 && integer_zerop (DR_INIT (first_dr))
6382 && alias_sets_conflict_p (get_alias_set (aggr_type),
6383 get_alias_set (DR_REF (first_dr)))
6384 && (alignment_support_scheme == dr_aligned
6385 || alignment_support_scheme == dr_unaligned_supported))
6386 {
6387 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6388 dataref_offset = build_int_cst (reference_alias_ptr_type
6389 (DR_REF (first_dr)), 0);
8928eff3 6390 inv_p = false;
74bf76ed
JJ
6391 }
6392 else
6393 dataref_ptr
6394 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6395 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
6396 simd_lane_access_p, &inv_p,
6397 byte_offset);
74bf76ed
JJ
6398 }
6399 else if (dataref_offset)
6400 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6401 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6402 else
272c6793
RS
6403 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6404 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6405
0d0293ac 6406 if (grouped_load || slp_perm)
9771b263 6407 dr_chain.create (vec_num);
5ce1ee7f 6408
272c6793 6409 if (load_lanes_p)
ebfd146a 6410 {
272c6793
RS
6411 tree vec_array;
6412
6413 vec_array = create_vector_array (vectype, vec_num);
6414
6415 /* Emit:
6416 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6417 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6418 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6419 gimple_call_set_lhs (new_stmt, vec_array);
6420 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6421
272c6793
RS
6422 /* Extract each vector into an SSA_NAME. */
6423 for (i = 0; i < vec_num; i++)
ebfd146a 6424 {
272c6793
RS
6425 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6426 vec_array, i);
9771b263 6427 dr_chain.quick_push (new_temp);
272c6793
RS
6428 }
6429
6430 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6431 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6432 }
6433 else
6434 {
6435 for (i = 0; i < vec_num; i++)
6436 {
6437 if (i > 0)
6438 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6439 stmt, NULL_TREE);
6440
6441 /* 2. Create the vector-load in the loop. */
6442 switch (alignment_support_scheme)
6443 {
6444 case dr_aligned:
6445 case dr_unaligned_supported:
be1ac4ec 6446 {
644ffefd
MJ
6447 unsigned int align, misalign;
6448
272c6793
RS
6449 data_ref
6450 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
6451 dataref_offset
6452 ? dataref_offset
6453 : build_int_cst (reference_alias_ptr_type
6454 (DR_REF (first_dr)), 0));
644ffefd 6455 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6456 if (alignment_support_scheme == dr_aligned)
6457 {
6458 gcc_assert (aligned_access_p (first_dr));
644ffefd 6459 misalign = 0;
272c6793
RS
6460 }
6461 else if (DR_MISALIGNMENT (first_dr) == -1)
6462 {
6463 TREE_TYPE (data_ref)
6464 = build_aligned_type (TREE_TYPE (data_ref),
6465 TYPE_ALIGN (elem_type));
644ffefd
MJ
6466 align = TYPE_ALIGN_UNIT (elem_type);
6467 misalign = 0;
272c6793
RS
6468 }
6469 else
6470 {
6471 TREE_TYPE (data_ref)
6472 = build_aligned_type (TREE_TYPE (data_ref),
6473 TYPE_ALIGN (elem_type));
644ffefd 6474 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6475 }
74bf76ed
JJ
6476 if (dataref_offset == NULL_TREE)
6477 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6478 align, misalign);
272c6793 6479 break;
be1ac4ec 6480 }
272c6793 6481 case dr_explicit_realign:
267d3070 6482 {
272c6793 6483 tree ptr, bump;
272c6793 6484
d88981fc 6485 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
6486
6487 if (compute_in_loop)
6488 msq = vect_setup_realignment (first_stmt, gsi,
6489 &realignment_token,
6490 dr_explicit_realign,
6491 dataref_ptr, NULL);
6492
b731b390 6493 ptr = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6494 new_stmt = gimple_build_assign
6495 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6496 build_int_cst
6497 (TREE_TYPE (dataref_ptr),
6498 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6500 data_ref
6501 = build2 (MEM_REF, vectype, ptr,
6502 build_int_cst (reference_alias_ptr_type
6503 (DR_REF (first_dr)), 0));
6504 vec_dest = vect_create_destination_var (scalar_dest,
6505 vectype);
6506 new_stmt = gimple_build_assign (vec_dest, data_ref);
6507 new_temp = make_ssa_name (vec_dest, new_stmt);
6508 gimple_assign_set_lhs (new_stmt, new_temp);
6509 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6510 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6512 msq = new_temp;
6513
d88981fc 6514 bump = size_binop (MULT_EXPR, vs,
7b7b1813 6515 TYPE_SIZE_UNIT (elem_type));
d88981fc 6516 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 6517 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
6518 new_stmt = gimple_build_assign
6519 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
6520 build_int_cst
6521 (TREE_TYPE (ptr),
6522 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 6523 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
6524 gimple_assign_set_lhs (new_stmt, ptr);
6525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6526 data_ref
6527 = build2 (MEM_REF, vectype, ptr,
6528 build_int_cst (reference_alias_ptr_type
6529 (DR_REF (first_dr)), 0));
6530 break;
267d3070 6531 }
272c6793 6532 case dr_explicit_realign_optimized:
b731b390 6533 new_temp = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6534 new_stmt = gimple_build_assign
6535 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6536 build_int_cst
6537 (TREE_TYPE (dataref_ptr),
6538 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6539 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6540 data_ref
6541 = build2 (MEM_REF, vectype, new_temp,
6542 build_int_cst (reference_alias_ptr_type
6543 (DR_REF (first_dr)), 0));
6544 break;
6545 default:
6546 gcc_unreachable ();
6547 }
ebfd146a 6548 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6549 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6550 new_temp = make_ssa_name (vec_dest, new_stmt);
6551 gimple_assign_set_lhs (new_stmt, new_temp);
6552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6553
272c6793
RS
6554 /* 3. Handle explicit realignment if necessary/supported.
6555 Create in loop:
6556 vec_dest = realign_load (msq, lsq, realignment_token) */
6557 if (alignment_support_scheme == dr_explicit_realign_optimized
6558 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6559 {
272c6793
RS
6560 lsq = gimple_assign_lhs (new_stmt);
6561 if (!realignment_token)
6562 realignment_token = dataref_ptr;
6563 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
6564 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6565 msq, lsq, realignment_token);
272c6793
RS
6566 new_temp = make_ssa_name (vec_dest, new_stmt);
6567 gimple_assign_set_lhs (new_stmt, new_temp);
6568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6569
6570 if (alignment_support_scheme == dr_explicit_realign_optimized)
6571 {
6572 gcc_assert (phi);
6573 if (i == vec_num - 1 && j == ncopies - 1)
6574 add_phi_arg (phi, lsq,
6575 loop_latch_edge (containing_loop),
9e227d60 6576 UNKNOWN_LOCATION);
272c6793
RS
6577 msq = lsq;
6578 }
ebfd146a 6579 }
ebfd146a 6580
59fd17e3
RB
6581 /* 4. Handle invariant-load. */
6582 if (inv_p && !bb_vinfo)
6583 {
59fd17e3 6584 gcc_assert (!grouped_load);
d1417442
JJ
6585 /* If we have versioned for aliasing or the loop doesn't
6586 have any data dependencies that would preclude this,
6587 then we are sure this is a loop invariant load and
6588 thus we can insert it on the preheader edge. */
6589 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6590 && !nested_in_vect_loop
6b916b36 6591 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
6592 {
6593 if (dump_enabled_p ())
6594 {
6595 dump_printf_loc (MSG_NOTE, vect_location,
6596 "hoisting out of the vectorized "
6597 "loop: ");
6598 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 6599 }
b731b390 6600 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
6601 gsi_insert_on_edge_immediate
6602 (loop_preheader_edge (loop),
6603 gimple_build_assign (tem,
6604 unshare_expr
6605 (gimple_assign_rhs1 (stmt))));
6606 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6607 }
6608 else
6609 {
6610 gimple_stmt_iterator gsi2 = *gsi;
6611 gsi_next (&gsi2);
6612 new_temp = vect_init_vector (stmt, scalar_dest,
6613 vectype, &gsi2);
6614 }
59fd17e3 6615 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0
RB
6616 set_vinfo_for_stmt (new_stmt,
6617 new_stmt_vec_info (new_stmt, loop_vinfo,
6618 bb_vinfo));
59fd17e3
RB
6619 }
6620
272c6793
RS
6621 if (negative)
6622 {
aec7ae7d
JJ
6623 tree perm_mask = perm_mask_for_reverse (vectype);
6624 new_temp = permute_vec_elements (new_temp, new_temp,
6625 perm_mask, stmt, gsi);
ebfd146a
IR
6626 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6627 }
267d3070 6628
272c6793 6629 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6630 vect_transform_grouped_load (). */
6631 if (grouped_load || slp_perm)
9771b263 6632 dr_chain.quick_push (new_temp);
267d3070 6633
272c6793
RS
6634 /* Store vector loads in the corresponding SLP_NODE. */
6635 if (slp && !slp_perm)
9771b263 6636 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6637 }
a64b9c26
RB
6638 /* Bump the vector pointer to account for a gap. */
6639 if (slp && group_gap != 0)
6640 {
6641 tree bump = size_binop (MULT_EXPR,
6642 TYPE_SIZE_UNIT (elem_type),
6643 size_int (group_gap));
6644 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6645 stmt, bump);
6646 }
ebfd146a
IR
6647 }
6648
6649 if (slp && !slp_perm)
6650 continue;
6651
6652 if (slp_perm)
6653 {
01d8bf07 6654 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6655 slp_node_instance, false))
6656 {
9771b263 6657 dr_chain.release ();
ebfd146a
IR
6658 return false;
6659 }
6660 }
6661 else
6662 {
0d0293ac 6663 if (grouped_load)
ebfd146a 6664 {
272c6793 6665 if (!load_lanes_p)
0d0293ac 6666 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6667 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6668 }
6669 else
6670 {
6671 if (j == 0)
6672 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6673 else
6674 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6675 prev_stmt_info = vinfo_for_stmt (new_stmt);
6676 }
6677 }
9771b263 6678 dr_chain.release ();
ebfd146a
IR
6679 }
6680
ebfd146a
IR
6681 return true;
6682}
6683
6684/* Function vect_is_simple_cond.
b8698a0f 6685
ebfd146a
IR
6686 Input:
6687 LOOP - the loop that is being vectorized.
6688 COND - Condition that is checked for simple use.
6689
e9e1d143
RG
6690 Output:
6691 *COMP_VECTYPE - the vector type for the comparison.
6692
ebfd146a
IR
6693 Returns whether a COND can be vectorized. Checks whether
6694 condition operands are supportable using vec_is_simple_use. */
6695
87aab9b2 6696static bool
24ee1384
IR
6697vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6698 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6699{
6700 tree lhs, rhs;
6701 tree def;
6702 enum vect_def_type dt;
e9e1d143 6703 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6704
6705 if (!COMPARISON_CLASS_P (cond))
6706 return false;
6707
6708 lhs = TREE_OPERAND (cond, 0);
6709 rhs = TREE_OPERAND (cond, 1);
6710
6711 if (TREE_CODE (lhs) == SSA_NAME)
6712 {
6713 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6714 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6715 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6716 return false;
6717 }
6718 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6719 && TREE_CODE (lhs) != FIXED_CST)
6720 return false;
6721
6722 if (TREE_CODE (rhs) == SSA_NAME)
6723 {
6724 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6725 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6726 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6727 return false;
6728 }
f7e531cf 6729 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6730 && TREE_CODE (rhs) != FIXED_CST)
6731 return false;
6732
e9e1d143 6733 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6734 return true;
6735}
6736
6737/* vectorizable_condition.
6738
b8698a0f
L
6739 Check if STMT is conditional modify expression that can be vectorized.
6740 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6741 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6742 at GSI.
6743
6744 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6745 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6746 else caluse if it is 2).
ebfd146a
IR
6747
6748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6749
4bbe8262 6750bool
ebfd146a 6751vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
6752 gimple *vec_stmt, tree reduc_def, int reduc_index,
6753 slp_tree slp_node)
ebfd146a
IR
6754{
6755 tree scalar_dest = NULL_TREE;
6756 tree vec_dest = NULL_TREE;
ebfd146a
IR
6757 tree cond_expr, then_clause, else_clause;
6758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6759 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 6760 tree comp_vectype = NULL_TREE;
ff802fa1
IR
6761 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6762 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
6763 tree vec_compare, vec_cond_expr;
6764 tree new_temp;
6765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 6766 tree def;
a855b1b1 6767 enum vect_def_type dt, dts[4];
ebfd146a 6768 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 6769 int ncopies;
ebfd146a 6770 enum tree_code code;
a855b1b1 6771 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
6772 int i, j;
6773 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
6774 vec<tree> vec_oprnds0 = vNULL;
6775 vec<tree> vec_oprnds1 = vNULL;
6776 vec<tree> vec_oprnds2 = vNULL;
6777 vec<tree> vec_oprnds3 = vNULL;
74946978 6778 tree vec_cmp_type;
b8698a0f 6779
f7e531cf
IR
6780 if (slp_node || PURE_SLP_STMT (stmt_info))
6781 ncopies = 1;
6782 else
6783 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 6784
ebfd146a 6785 gcc_assert (ncopies >= 1);
a855b1b1 6786 if (reduc_index && ncopies > 1)
ebfd146a
IR
6787 return false; /* FORNOW */
6788
f7e531cf
IR
6789 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6790 return false;
6791
6792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6793 return false;
6794
4bbe8262
IR
6795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6796 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6797 && reduc_def))
ebfd146a
IR
6798 return false;
6799
ebfd146a 6800 /* FORNOW: not yet supported. */
b8698a0f 6801 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6802 {
73fbfcad 6803 if (dump_enabled_p ())
78c60e3d 6804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6805 "value used after loop.\n");
ebfd146a
IR
6806 return false;
6807 }
6808
6809 /* Is vectorizable conditional operation? */
6810 if (!is_gimple_assign (stmt))
6811 return false;
6812
6813 code = gimple_assign_rhs_code (stmt);
6814
6815 if (code != COND_EXPR)
6816 return false;
6817
4e71066d
RG
6818 cond_expr = gimple_assign_rhs1 (stmt);
6819 then_clause = gimple_assign_rhs2 (stmt);
6820 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 6821
24ee1384
IR
6822 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6823 &comp_vectype)
e9e1d143 6824 || !comp_vectype)
ebfd146a
IR
6825 return false;
6826
6827 if (TREE_CODE (then_clause) == SSA_NAME)
6828 {
6829 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 6830 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6831 &then_def_stmt, &def, &dt))
6832 return false;
6833 }
b8698a0f 6834 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
6835 && TREE_CODE (then_clause) != REAL_CST
6836 && TREE_CODE (then_clause) != FIXED_CST)
6837 return false;
6838
6839 if (TREE_CODE (else_clause) == SSA_NAME)
6840 {
6841 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 6842 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6843 &else_def_stmt, &def, &dt))
6844 return false;
6845 }
b8698a0f 6846 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
6847 && TREE_CODE (else_clause) != REAL_CST
6848 && TREE_CODE (else_clause) != FIXED_CST)
6849 return false;
6850
74946978
MP
6851 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6852 /* The result of a vector comparison should be signed type. */
6853 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6854 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6855 if (vec_cmp_type == NULL_TREE)
6856 return false;
784fb9b3 6857
b8698a0f 6858 if (!vec_stmt)
ebfd146a
IR
6859 {
6860 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 6861 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
6862 }
6863
f7e531cf
IR
6864 /* Transform. */
6865
6866 if (!slp_node)
6867 {
9771b263
DN
6868 vec_oprnds0.create (1);
6869 vec_oprnds1.create (1);
6870 vec_oprnds2.create (1);
6871 vec_oprnds3.create (1);
f7e531cf 6872 }
ebfd146a
IR
6873
6874 /* Handle def. */
6875 scalar_dest = gimple_assign_lhs (stmt);
6876 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6877
6878 /* Handle cond expr. */
a855b1b1
MM
6879 for (j = 0; j < ncopies; j++)
6880 {
538dd0b7 6881 gassign *new_stmt = NULL;
a855b1b1
MM
6882 if (j == 0)
6883 {
f7e531cf
IR
6884 if (slp_node)
6885 {
00f96dc9
TS
6886 auto_vec<tree, 4> ops;
6887 auto_vec<vec<tree>, 4> vec_defs;
9771b263 6888
9771b263
DN
6889 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6890 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6891 ops.safe_push (then_clause);
6892 ops.safe_push (else_clause);
f7e531cf 6893 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
6894 vec_oprnds3 = vec_defs.pop ();
6895 vec_oprnds2 = vec_defs.pop ();
6896 vec_oprnds1 = vec_defs.pop ();
6897 vec_oprnds0 = vec_defs.pop ();
f7e531cf 6898
9771b263
DN
6899 ops.release ();
6900 vec_defs.release ();
f7e531cf
IR
6901 }
6902 else
6903 {
6904 gimple gtemp;
6905 vec_cond_lhs =
a855b1b1
MM
6906 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6907 stmt, NULL);
24ee1384
IR
6908 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6909 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
6910
6911 vec_cond_rhs =
6912 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6913 stmt, NULL);
24ee1384
IR
6914 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6915 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
6916 if (reduc_index == 1)
6917 vec_then_clause = reduc_def;
6918 else
6919 {
6920 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6921 stmt, NULL);
24ee1384 6922 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
6923 NULL, &gtemp, &def, &dts[2]);
6924 }
6925 if (reduc_index == 2)
6926 vec_else_clause = reduc_def;
6927 else
6928 {
6929 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 6930 stmt, NULL);
24ee1384 6931 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 6932 NULL, &gtemp, &def, &dts[3]);
f7e531cf 6933 }
a855b1b1
MM
6934 }
6935 }
6936 else
6937 {
f7e531cf 6938 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 6939 vec_oprnds0.pop ());
f7e531cf 6940 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 6941 vec_oprnds1.pop ());
a855b1b1 6942 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 6943 vec_oprnds2.pop ());
a855b1b1 6944 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 6945 vec_oprnds3.pop ());
f7e531cf
IR
6946 }
6947
6948 if (!slp_node)
6949 {
9771b263
DN
6950 vec_oprnds0.quick_push (vec_cond_lhs);
6951 vec_oprnds1.quick_push (vec_cond_rhs);
6952 vec_oprnds2.quick_push (vec_then_clause);
6953 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
6954 }
6955
9dc3f7de 6956 /* Arguments are ready. Create the new vector stmt. */
9771b263 6957 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 6958 {
9771b263
DN
6959 vec_cond_rhs = vec_oprnds1[i];
6960 vec_then_clause = vec_oprnds2[i];
6961 vec_else_clause = vec_oprnds3[i];
a855b1b1 6962
784fb9b3
JJ
6963 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6964 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
6965 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6966 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 6967
f7e531cf
IR
6968 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6969 new_temp = make_ssa_name (vec_dest, new_stmt);
6970 gimple_assign_set_lhs (new_stmt, new_temp);
6971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6972 if (slp_node)
9771b263 6973 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
6974 }
6975
6976 if (slp_node)
6977 continue;
6978
6979 if (j == 0)
6980 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6981 else
6982 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6983
6984 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 6985 }
b8698a0f 6986
9771b263
DN
6987 vec_oprnds0.release ();
6988 vec_oprnds1.release ();
6989 vec_oprnds2.release ();
6990 vec_oprnds3.release ();
f7e531cf 6991
ebfd146a
IR
6992 return true;
6993}
6994
6995
8644a673 6996/* Make sure the statement is vectorizable. */
ebfd146a
IR
6997
6998bool
a70d6342 6999vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7000{
8644a673 7001 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7002 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7003 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7004 bool ok;
a70d6342 7005 tree scalar_type, vectype;
363477c0
JJ
7006 gimple pattern_stmt;
7007 gimple_seq pattern_def_seq;
ebfd146a 7008
73fbfcad 7009 if (dump_enabled_p ())
ebfd146a 7010 {
78c60e3d
SS
7011 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7012 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7013 }
ebfd146a 7014
1825a1f3 7015 if (gimple_has_volatile_ops (stmt))
b8698a0f 7016 {
73fbfcad 7017 if (dump_enabled_p ())
78c60e3d 7018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7019 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7020
7021 return false;
7022 }
b8698a0f
L
7023
7024 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7025 to include:
7026 - the COND_EXPR which is the loop exit condition
7027 - any LABEL_EXPRs in the loop
b8698a0f 7028 - computations that are used only for array indexing or loop control.
8644a673 7029 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7030 instance, therefore, all the statements are relevant.
ebfd146a 7031
d092494c 7032 Pattern statement needs to be analyzed instead of the original statement
83197f37 7033 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7034 statements. In basic blocks we are called from some SLP instance
7035 traversal, don't analyze pattern stmts instead, the pattern stmts
7036 already will be part of SLP instance. */
83197f37
IR
7037
7038 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7039 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7040 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7041 {
9d5e7640 7042 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7043 && pattern_stmt
9d5e7640
IR
7044 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7045 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7046 {
83197f37 7047 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7048 stmt = pattern_stmt;
7049 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7050 if (dump_enabled_p ())
9d5e7640 7051 {
78c60e3d
SS
7052 dump_printf_loc (MSG_NOTE, vect_location,
7053 "==> examining pattern statement: ");
7054 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7055 }
7056 }
7057 else
7058 {
73fbfcad 7059 if (dump_enabled_p ())
e645e942 7060 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7061
9d5e7640
IR
7062 return true;
7063 }
8644a673 7064 }
83197f37 7065 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7066 && node == NULL
83197f37
IR
7067 && pattern_stmt
7068 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7069 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7070 {
7071 /* Analyze PATTERN_STMT too. */
73fbfcad 7072 if (dump_enabled_p ())
83197f37 7073 {
78c60e3d
SS
7074 dump_printf_loc (MSG_NOTE, vect_location,
7075 "==> examining pattern statement: ");
7076 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7077 }
7078
7079 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7080 return false;
7081 }
ebfd146a 7082
1107f3ae 7083 if (is_pattern_stmt_p (stmt_info)
079c527f 7084 && node == NULL
363477c0 7085 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 7086 {
363477c0 7087 gimple_stmt_iterator si;
1107f3ae 7088
363477c0
JJ
7089 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7090 {
7091 gimple pattern_def_stmt = gsi_stmt (si);
7092 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7093 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7094 {
7095 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 7096 if (dump_enabled_p ())
363477c0 7097 {
78c60e3d
SS
7098 dump_printf_loc (MSG_NOTE, vect_location,
7099 "==> examining pattern def statement: ");
7100 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 7101 }
1107f3ae 7102
363477c0
JJ
7103 if (!vect_analyze_stmt (pattern_def_stmt,
7104 need_to_vectorize, node))
7105 return false;
7106 }
7107 }
7108 }
1107f3ae 7109
8644a673
IR
7110 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7111 {
7112 case vect_internal_def:
7113 break;
ebfd146a 7114
8644a673 7115 case vect_reduction_def:
7c5222ff 7116 case vect_nested_cycle:
a70d6342 7117 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 7118 || relevance == vect_used_in_outer_by_reduction
a70d6342 7119 || relevance == vect_unused_in_scope));
8644a673
IR
7120 break;
7121
7122 case vect_induction_def:
7123 case vect_constant_def:
7124 case vect_external_def:
7125 case vect_unknown_def_type:
7126 default:
7127 gcc_unreachable ();
7128 }
ebfd146a 7129
a70d6342
IR
7130 if (bb_vinfo)
7131 {
7132 gcc_assert (PURE_SLP_STMT (stmt_info));
7133
b690cc0f 7134 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 7135 if (dump_enabled_p ())
a70d6342 7136 {
78c60e3d
SS
7137 dump_printf_loc (MSG_NOTE, vect_location,
7138 "get vectype for scalar type: ");
7139 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 7140 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7141 }
7142
7143 vectype = get_vectype_for_scalar_type (scalar_type);
7144 if (!vectype)
7145 {
73fbfcad 7146 if (dump_enabled_p ())
a70d6342 7147 {
78c60e3d
SS
7148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7149 "not SLPed: unsupported data-type ");
7150 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7151 scalar_type);
e645e942 7152 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
7153 }
7154 return false;
7155 }
7156
73fbfcad 7157 if (dump_enabled_p ())
a70d6342 7158 {
78c60e3d
SS
7159 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7160 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 7161 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7162 }
7163
7164 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7165 }
7166
8644a673 7167 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 7168 {
8644a673 7169 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
7170 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7171 || (is_gimple_call (stmt)
7172 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 7173 *need_to_vectorize = true;
ebfd146a
IR
7174 }
7175
8644a673 7176 ok = true;
b8698a0f 7177 if (!bb_vinfo
a70d6342
IR
7178 && (STMT_VINFO_RELEVANT_P (stmt_info)
7179 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
0136f8f0
AH
7180 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7181 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 7182 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
7183 || vectorizable_operation (stmt, NULL, NULL, NULL)
7184 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7185 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 7186 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 7187 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 7188 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 7189 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
7190 else
7191 {
7192 if (bb_vinfo)
0136f8f0
AH
7193 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7194 || vectorizable_conversion (stmt, NULL, NULL, node)
4a00c761 7195 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 7196 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
7197 || vectorizable_assignment (stmt, NULL, NULL, node)
7198 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 7199 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
7200 || vectorizable_store (stmt, NULL, NULL, node)
7201 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 7202 }
8644a673
IR
7203
7204 if (!ok)
ebfd146a 7205 {
73fbfcad 7206 if (dump_enabled_p ())
8644a673 7207 {
78c60e3d
SS
7208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7209 "not vectorized: relevant stmt not ");
7210 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7211 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7212 }
b8698a0f 7213
ebfd146a
IR
7214 return false;
7215 }
7216
a70d6342
IR
7217 if (bb_vinfo)
7218 return true;
7219
8644a673
IR
7220 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7221 need extra handling, except for vectorizable reductions. */
7222 if (STMT_VINFO_LIVE_P (stmt_info)
7223 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7224 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 7225
8644a673 7226 if (!ok)
ebfd146a 7227 {
73fbfcad 7228 if (dump_enabled_p ())
8644a673 7229 {
78c60e3d
SS
7230 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7231 "not vectorized: live stmt not ");
7232 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7233 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7234 }
b8698a0f 7235
8644a673 7236 return false;
ebfd146a
IR
7237 }
7238
ebfd146a
IR
7239 return true;
7240}
7241
7242
7243/* Function vect_transform_stmt.
7244
7245 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7246
7247bool
7248vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 7249 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
7250 slp_instance slp_node_instance)
7251{
7252 bool is_store = false;
7253 gimple vec_stmt = NULL;
7254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 7255 bool done;
ebfd146a
IR
7256
7257 switch (STMT_VINFO_TYPE (stmt_info))
7258 {
7259 case type_demotion_vec_info_type:
ebfd146a 7260 case type_promotion_vec_info_type:
ebfd146a
IR
7261 case type_conversion_vec_info_type:
7262 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7263 gcc_assert (done);
7264 break;
7265
7266 case induc_vec_info_type:
7267 gcc_assert (!slp_node);
7268 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7269 gcc_assert (done);
7270 break;
7271
9dc3f7de
IR
7272 case shift_vec_info_type:
7273 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7274 gcc_assert (done);
7275 break;
7276
ebfd146a
IR
7277 case op_vec_info_type:
7278 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7279 gcc_assert (done);
7280 break;
7281
7282 case assignment_vec_info_type:
7283 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7284 gcc_assert (done);
7285 break;
7286
7287 case load_vec_info_type:
b8698a0f 7288 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7289 slp_node_instance);
7290 gcc_assert (done);
7291 break;
7292
7293 case store_vec_info_type:
7294 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7295 gcc_assert (done);
0d0293ac 7296 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7297 {
7298 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7299 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7300 one are skipped, and there vec_stmt_info shouldn't be freed
7301 meanwhile. */
0d0293ac 7302 *grouped_store = true;
ebfd146a
IR
7303 if (STMT_VINFO_VEC_STMT (stmt_info))
7304 is_store = true;
7305 }
7306 else
7307 is_store = true;
7308 break;
7309
7310 case condition_vec_info_type:
f7e531cf 7311 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7312 gcc_assert (done);
7313 break;
7314
7315 case call_vec_info_type:
190c2236 7316 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7317 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7318 if (is_gimple_call (stmt)
7319 && gimple_call_internal_p (stmt)
7320 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7321 is_store = true;
ebfd146a
IR
7322 break;
7323
0136f8f0
AH
7324 case call_simd_clone_vec_info_type:
7325 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7326 stmt = gsi_stmt (*gsi);
7327 break;
7328
ebfd146a 7329 case reduc_vec_info_type:
b5aeb3bb 7330 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7331 gcc_assert (done);
7332 break;
7333
7334 default:
7335 if (!STMT_VINFO_LIVE_P (stmt_info))
7336 {
73fbfcad 7337 if (dump_enabled_p ())
78c60e3d 7338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7339 "stmt not supported.\n");
ebfd146a
IR
7340 gcc_unreachable ();
7341 }
7342 }
7343
7344 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7345 is being vectorized, but outside the immediately enclosing loop. */
7346 if (vec_stmt
a70d6342
IR
7347 && STMT_VINFO_LOOP_VINFO (stmt_info)
7348 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7349 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7350 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7351 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7352 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7353 vect_used_in_outer_by_reduction))
ebfd146a 7354 {
a70d6342
IR
7355 struct loop *innerloop = LOOP_VINFO_LOOP (
7356 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7357 imm_use_iterator imm_iter;
7358 use_operand_p use_p;
7359 tree scalar_dest;
7360 gimple exit_phi;
7361
73fbfcad 7362 if (dump_enabled_p ())
78c60e3d 7363 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7364 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7365
7366 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7367 (to be used when vectorizing outer-loop stmts that use the DEF of
7368 STMT). */
7369 if (gimple_code (stmt) == GIMPLE_PHI)
7370 scalar_dest = PHI_RESULT (stmt);
7371 else
7372 scalar_dest = gimple_assign_lhs (stmt);
7373
7374 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7375 {
7376 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7377 {
7378 exit_phi = USE_STMT (use_p);
7379 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7380 }
7381 }
7382 }
7383
7384 /* Handle stmts whose DEF is used outside the loop-nest that is
7385 being vectorized. */
7386 if (STMT_VINFO_LIVE_P (stmt_info)
7387 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7388 {
7389 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7390 gcc_assert (done);
7391 }
7392
7393 if (vec_stmt)
83197f37 7394 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7395
b8698a0f 7396 return is_store;
ebfd146a
IR
7397}
7398
7399
b8698a0f 7400/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7401 stmt_vec_info. */
7402
7403void
7404vect_remove_stores (gimple first_stmt)
7405{
7406 gimple next = first_stmt;
7407 gimple tmp;
7408 gimple_stmt_iterator next_si;
7409
7410 while (next)
7411 {
78048b1c
JJ
7412 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7413
7414 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7415 if (is_pattern_stmt_p (stmt_info))
7416 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7417 /* Free the attached stmt_vec_info and remove the stmt. */
7418 next_si = gsi_for_stmt (next);
3d3f2249 7419 unlink_stmt_vdef (next);
ebfd146a 7420 gsi_remove (&next_si, true);
3d3f2249 7421 release_defs (next);
ebfd146a
IR
7422 free_stmt_vec_info (next);
7423 next = tmp;
7424 }
7425}
7426
7427
7428/* Function new_stmt_vec_info.
7429
7430 Create and initialize a new stmt_vec_info struct for STMT. */
7431
7432stmt_vec_info
b8698a0f 7433new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7434 bb_vec_info bb_vinfo)
ebfd146a
IR
7435{
7436 stmt_vec_info res;
7437 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7438
7439 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7440 STMT_VINFO_STMT (res) = stmt;
7441 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7442 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7443 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7444 STMT_VINFO_LIVE_P (res) = false;
7445 STMT_VINFO_VECTYPE (res) = NULL;
7446 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7447 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7448 STMT_VINFO_IN_PATTERN_P (res) = false;
7449 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7450 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7451 STMT_VINFO_DATA_REF (res) = NULL;
7452
7453 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7454 STMT_VINFO_DR_OFFSET (res) = NULL;
7455 STMT_VINFO_DR_INIT (res) = NULL;
7456 STMT_VINFO_DR_STEP (res) = NULL;
7457 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7458
7459 if (gimple_code (stmt) == GIMPLE_PHI
7460 && is_loop_header_bb_p (gimple_bb (stmt)))
7461 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7462 else
8644a673
IR
7463 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7464
9771b263 7465 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7466 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7467 GROUP_FIRST_ELEMENT (res) = NULL;
7468 GROUP_NEXT_ELEMENT (res) = NULL;
7469 GROUP_SIZE (res) = 0;
7470 GROUP_STORE_COUNT (res) = 0;
7471 GROUP_GAP (res) = 0;
7472 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7473
7474 return res;
7475}
7476
7477
7478/* Create a hash table for stmt_vec_info. */
7479
7480void
7481init_stmt_vec_info_vec (void)
7482{
9771b263
DN
7483 gcc_assert (!stmt_vec_info_vec.exists ());
7484 stmt_vec_info_vec.create (50);
ebfd146a
IR
7485}
7486
7487
7488/* Free hash table for stmt_vec_info. */
7489
7490void
7491free_stmt_vec_info_vec (void)
7492{
93675444
JJ
7493 unsigned int i;
7494 vec_void_p info;
7495 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7496 if (info != NULL)
7497 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7498 gcc_assert (stmt_vec_info_vec.exists ());
7499 stmt_vec_info_vec.release ();
ebfd146a
IR
7500}
7501
7502
7503/* Free stmt vectorization related info. */
7504
7505void
7506free_stmt_vec_info (gimple stmt)
7507{
7508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7509
7510 if (!stmt_info)
7511 return;
7512
78048b1c
JJ
7513 /* Check if this statement has a related "pattern stmt"
7514 (introduced by the vectorizer during the pattern recognition
7515 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7516 too. */
7517 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7518 {
7519 stmt_vec_info patt_info
7520 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7521 if (patt_info)
7522 {
363477c0 7523 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
f0281fde
RB
7524 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7525 gimple_set_bb (patt_stmt, NULL);
7526 tree lhs = gimple_get_lhs (patt_stmt);
7527 if (TREE_CODE (lhs) == SSA_NAME)
7528 release_ssa_name (lhs);
363477c0
JJ
7529 if (seq)
7530 {
7531 gimple_stmt_iterator si;
7532 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde
RB
7533 {
7534 gimple seq_stmt = gsi_stmt (si);
7535 gimple_set_bb (seq_stmt, NULL);
7536 lhs = gimple_get_lhs (patt_stmt);
7537 if (TREE_CODE (lhs) == SSA_NAME)
7538 release_ssa_name (lhs);
7539 free_stmt_vec_info (seq_stmt);
7540 }
363477c0 7541 }
f0281fde 7542 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
7543 }
7544 }
7545
9771b263 7546 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 7547 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
7548 set_vinfo_for_stmt (stmt, NULL);
7549 free (stmt_info);
7550}
7551
7552
bb67d9c7 7553/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7554
bb67d9c7 7555 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7556 by the target. */
7557
bb67d9c7
RG
7558static tree
7559get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 7560{
ef4bddc2
RS
7561 machine_mode inner_mode = TYPE_MODE (scalar_type);
7562 machine_mode simd_mode;
2f816591 7563 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7564 int nunits;
7565 tree vectype;
7566
cc4b5170 7567 if (nbytes == 0)
ebfd146a
IR
7568 return NULL_TREE;
7569
48f2e373
RB
7570 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7571 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7572 return NULL_TREE;
7573
7b7b1813
RG
7574 /* For vector types of elements whose mode precision doesn't
7575 match their types precision we use a element type of mode
7576 precision. The vectorization routines will have to make sure
48f2e373
RB
7577 they support the proper result truncation/extension.
7578 We also make sure to build vector types with INTEGER_TYPE
7579 component type only. */
6d7971b8 7580 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7581 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7582 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7583 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7584 TYPE_UNSIGNED (scalar_type));
6d7971b8 7585
ccbf5bb4
RG
7586 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7587 When the component mode passes the above test simply use a type
7588 corresponding to that mode. The theory is that any use that
7589 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7590 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7591 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7592 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7593
7594 /* We can't build a vector type of elements with alignment bigger than
7595 their size. */
dfc2e2ac 7596 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7597 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7598 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7599
dfc2e2ac
RB
7600 /* If we felt back to using the mode fail if there was
7601 no scalar type for it. */
7602 if (scalar_type == NULL_TREE)
7603 return NULL_TREE;
7604
bb67d9c7
RG
7605 /* If no size was supplied use the mode the target prefers. Otherwise
7606 lookup a vector mode of the specified size. */
7607 if (size == 0)
7608 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7609 else
7610 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7611 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7612 if (nunits <= 1)
7613 return NULL_TREE;
ebfd146a
IR
7614
7615 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7616
7617 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7618 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7619 return NULL_TREE;
ebfd146a
IR
7620
7621 return vectype;
7622}
7623
bb67d9c7
RG
7624unsigned int current_vector_size;
7625
7626/* Function get_vectype_for_scalar_type.
7627
7628 Returns the vector type corresponding to SCALAR_TYPE as supported
7629 by the target. */
7630
7631tree
7632get_vectype_for_scalar_type (tree scalar_type)
7633{
7634 tree vectype;
7635 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7636 current_vector_size);
7637 if (vectype
7638 && current_vector_size == 0)
7639 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7640 return vectype;
7641}
7642
b690cc0f
RG
7643/* Function get_same_sized_vectype
7644
7645 Returns a vector type corresponding to SCALAR_TYPE of size
7646 VECTOR_TYPE if supported by the target. */
7647
7648tree
bb67d9c7 7649get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7650{
bb67d9c7
RG
7651 return get_vectype_for_scalar_type_and_size
7652 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7653}
7654
ebfd146a
IR
7655/* Function vect_is_simple_use.
7656
7657 Input:
a70d6342
IR
7658 LOOP_VINFO - the vect info of the loop that is being vectorized.
7659 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7660 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7661 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7662
7663 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7664 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7665 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7666 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7667 is the case in reduction/induction computations).
7668 For basic blocks, supportable operands are constants and bb invariants.
7669 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7670
7671bool
24ee1384 7672vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7673 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7674 tree *def, enum vect_def_type *dt)
b8698a0f 7675{
ebfd146a
IR
7676 basic_block bb;
7677 stmt_vec_info stmt_vinfo;
a70d6342 7678 struct loop *loop = NULL;
b8698a0f 7679
a70d6342
IR
7680 if (loop_vinfo)
7681 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
7682
7683 *def_stmt = NULL;
7684 *def = NULL_TREE;
b8698a0f 7685
73fbfcad 7686 if (dump_enabled_p ())
ebfd146a 7687 {
78c60e3d
SS
7688 dump_printf_loc (MSG_NOTE, vect_location,
7689 "vect_is_simple_use: operand ");
7690 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7691 dump_printf (MSG_NOTE, "\n");
ebfd146a 7692 }
b8698a0f 7693
b758f602 7694 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7695 {
7696 *dt = vect_constant_def;
7697 return true;
7698 }
b8698a0f 7699
ebfd146a
IR
7700 if (is_gimple_min_invariant (operand))
7701 {
7702 *def = operand;
8644a673 7703 *dt = vect_external_def;
ebfd146a
IR
7704 return true;
7705 }
7706
7707 if (TREE_CODE (operand) == PAREN_EXPR)
7708 {
73fbfcad 7709 if (dump_enabled_p ())
e645e942 7710 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
7711 operand = TREE_OPERAND (operand, 0);
7712 }
b8698a0f 7713
ebfd146a
IR
7714 if (TREE_CODE (operand) != SSA_NAME)
7715 {
73fbfcad 7716 if (dump_enabled_p ())
78c60e3d 7717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7718 "not ssa-name.\n");
ebfd146a
IR
7719 return false;
7720 }
b8698a0f 7721
ebfd146a
IR
7722 *def_stmt = SSA_NAME_DEF_STMT (operand);
7723 if (*def_stmt == NULL)
7724 {
73fbfcad 7725 if (dump_enabled_p ())
78c60e3d 7726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7727 "no def_stmt.\n");
ebfd146a
IR
7728 return false;
7729 }
7730
73fbfcad 7731 if (dump_enabled_p ())
ebfd146a 7732 {
78c60e3d
SS
7733 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7734 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
7735 }
7736
8644a673 7737 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
7738 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7739 if (gimple_nop_p (*def_stmt))
7740 {
7741 *def = operand;
8644a673 7742 *dt = vect_external_def;
ebfd146a
IR
7743 return true;
7744 }
7745
7746 bb = gimple_bb (*def_stmt);
a70d6342
IR
7747
7748 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7749 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 7750 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 7751 *dt = vect_external_def;
ebfd146a
IR
7752 else
7753 {
7754 stmt_vinfo = vinfo_for_stmt (*def_stmt);
90dd6e3d
RB
7755 if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7756 *dt = vect_external_def;
7757 else
7758 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
7759 }
7760
2e8ab70c
RB
7761 if (dump_enabled_p ())
7762 {
7763 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7764 switch (*dt)
7765 {
7766 case vect_uninitialized_def:
7767 dump_printf (MSG_NOTE, "uninitialized\n");
7768 break;
7769 case vect_constant_def:
7770 dump_printf (MSG_NOTE, "constant\n");
7771 break;
7772 case vect_external_def:
7773 dump_printf (MSG_NOTE, "external\n");
7774 break;
7775 case vect_internal_def:
7776 dump_printf (MSG_NOTE, "internal\n");
7777 break;
7778 case vect_induction_def:
7779 dump_printf (MSG_NOTE, "induction\n");
7780 break;
7781 case vect_reduction_def:
7782 dump_printf (MSG_NOTE, "reduction\n");
7783 break;
7784 case vect_double_reduction_def:
7785 dump_printf (MSG_NOTE, "double reduction\n");
7786 break;
7787 case vect_nested_cycle:
7788 dump_printf (MSG_NOTE, "nested cycle\n");
7789 break;
7790 case vect_unknown_def_type:
7791 dump_printf (MSG_NOTE, "unknown\n");
7792 break;
7793 }
7794 }
7795
24ee1384
IR
7796 if (*dt == vect_unknown_def_type
7797 || (stmt
7798 && *dt == vect_double_reduction_def
7799 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 7800 {
73fbfcad 7801 if (dump_enabled_p ())
78c60e3d 7802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7803 "Unsupported pattern.\n");
ebfd146a
IR
7804 return false;
7805 }
7806
ebfd146a
IR
7807 switch (gimple_code (*def_stmt))
7808 {
7809 case GIMPLE_PHI:
7810 *def = gimple_phi_result (*def_stmt);
7811 break;
7812
7813 case GIMPLE_ASSIGN:
7814 *def = gimple_assign_lhs (*def_stmt);
7815 break;
7816
7817 case GIMPLE_CALL:
7818 *def = gimple_call_lhs (*def_stmt);
7819 if (*def != NULL)
7820 break;
7821 /* FALLTHRU */
7822 default:
73fbfcad 7823 if (dump_enabled_p ())
78c60e3d 7824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7825 "unsupported defining stmt:\n");
ebfd146a
IR
7826 return false;
7827 }
7828
7829 return true;
7830}
7831
b690cc0f
RG
7832/* Function vect_is_simple_use_1.
7833
7834 Same as vect_is_simple_use_1 but also determines the vector operand
7835 type of OPERAND and stores it to *VECTYPE. If the definition of
7836 OPERAND is vect_uninitialized_def, vect_constant_def or
7837 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7838 is responsible to compute the best suited vector type for the
7839 scalar operand. */
7840
7841bool
24ee1384 7842vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
7843 bb_vec_info bb_vinfo, gimple *def_stmt,
7844 tree *def, enum vect_def_type *dt, tree *vectype)
7845{
24ee1384
IR
7846 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7847 def, dt))
b690cc0f
RG
7848 return false;
7849
7850 /* Now get a vector type if the def is internal, otherwise supply
7851 NULL_TREE and leave it up to the caller to figure out a proper
7852 type for the use stmt. */
7853 if (*dt == vect_internal_def
7854 || *dt == vect_induction_def
7855 || *dt == vect_reduction_def
7856 || *dt == vect_double_reduction_def
7857 || *dt == vect_nested_cycle)
7858 {
7859 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
7860
7861 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7862 && !STMT_VINFO_RELEVANT (stmt_info)
7863 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 7864 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 7865
b690cc0f
RG
7866 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7867 gcc_assert (*vectype != NULL_TREE);
7868 }
7869 else if (*dt == vect_uninitialized_def
7870 || *dt == vect_constant_def
7871 || *dt == vect_external_def)
7872 *vectype = NULL_TREE;
7873 else
7874 gcc_unreachable ();
7875
7876 return true;
7877}
7878
ebfd146a
IR
7879
7880/* Function supportable_widening_operation
7881
b8698a0f
L
7882 Check whether an operation represented by the code CODE is a
7883 widening operation that is supported by the target platform in
b690cc0f
RG
7884 vector form (i.e., when operating on arguments of type VECTYPE_IN
7885 producing a result of type VECTYPE_OUT).
b8698a0f 7886
ebfd146a
IR
7887 Widening operations we currently support are NOP (CONVERT), FLOAT
7888 and WIDEN_MULT. This function checks if these operations are supported
7889 by the target platform either directly (via vector tree-codes), or via
7890 target builtins.
7891
7892 Output:
b8698a0f
L
7893 - CODE1 and CODE2 are codes of vector operations to be used when
7894 vectorizing the operation, if available.
ebfd146a
IR
7895 - MULTI_STEP_CVT determines the number of required intermediate steps in
7896 case of multi-step conversion (like char->short->int - in that case
7897 MULTI_STEP_CVT will be 1).
b8698a0f
L
7898 - INTERM_TYPES contains the intermediate type required to perform the
7899 widening operation (short in the above example). */
ebfd146a
IR
7900
7901bool
b690cc0f
RG
7902supportable_widening_operation (enum tree_code code, gimple stmt,
7903 tree vectype_out, tree vectype_in,
ebfd146a
IR
7904 enum tree_code *code1, enum tree_code *code2,
7905 int *multi_step_cvt,
9771b263 7906 vec<tree> *interm_types)
ebfd146a
IR
7907{
7908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7909 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 7910 struct loop *vect_loop = NULL;
ef4bddc2 7911 machine_mode vec_mode;
81f40b79 7912 enum insn_code icode1, icode2;
ebfd146a 7913 optab optab1, optab2;
b690cc0f
RG
7914 tree vectype = vectype_in;
7915 tree wide_vectype = vectype_out;
ebfd146a 7916 enum tree_code c1, c2;
4a00c761
JJ
7917 int i;
7918 tree prev_type, intermediate_type;
ef4bddc2 7919 machine_mode intermediate_mode, prev_mode;
4a00c761 7920 optab optab3, optab4;
ebfd146a 7921
4a00c761 7922 *multi_step_cvt = 0;
4ef69dfc
IR
7923 if (loop_info)
7924 vect_loop = LOOP_VINFO_LOOP (loop_info);
7925
ebfd146a
IR
7926 switch (code)
7927 {
7928 case WIDEN_MULT_EXPR:
6ae6116f
RH
7929 /* The result of a vectorized widening operation usually requires
7930 two vectors (because the widened results do not fit into one vector).
7931 The generated vector results would normally be expected to be
7932 generated in the same order as in the original scalar computation,
7933 i.e. if 8 results are generated in each vector iteration, they are
7934 to be organized as follows:
7935 vect1: [res1,res2,res3,res4],
7936 vect2: [res5,res6,res7,res8].
7937
7938 However, in the special case that the result of the widening
7939 operation is used in a reduction computation only, the order doesn't
7940 matter (because when vectorizing a reduction we change the order of
7941 the computation). Some targets can take advantage of this and
7942 generate more efficient code. For example, targets like Altivec,
7943 that support widen_mult using a sequence of {mult_even,mult_odd}
7944 generate the following vectors:
7945 vect1: [res1,res3,res5,res7],
7946 vect2: [res2,res4,res6,res8].
7947
7948 When vectorizing outer-loops, we execute the inner-loop sequentially
7949 (each vectorized inner-loop iteration contributes to VF outer-loop
7950 iterations in parallel). We therefore don't allow to change the
7951 order of the computation in the inner-loop during outer-loop
7952 vectorization. */
7953 /* TODO: Another case in which order doesn't *really* matter is when we
7954 widen and then contract again, e.g. (short)((int)x * y >> 8).
7955 Normally, pack_trunc performs an even/odd permute, whereas the
7956 repack from an even/odd expansion would be an interleave, which
7957 would be significantly simpler for e.g. AVX2. */
7958 /* In any case, in order to avoid duplicating the code below, recurse
7959 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7960 are properly set up for the caller. If we fail, we'll continue with
7961 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7962 if (vect_loop
7963 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7964 && !nested_in_vect_loop_p (vect_loop, stmt)
7965 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7966 stmt, vectype_out, vectype_in,
a86ec597
RH
7967 code1, code2, multi_step_cvt,
7968 interm_types))
ebc047a2
CH
7969 {
7970 /* Elements in a vector with vect_used_by_reduction property cannot
7971 be reordered if the use chain with this property does not have the
7972 same operation. One such an example is s += a * b, where elements
7973 in a and b cannot be reordered. Here we check if the vector defined
7974 by STMT is only directly used in the reduction statement. */
7975 tree lhs = gimple_assign_lhs (stmt);
7976 use_operand_p dummy;
7977 gimple use_stmt;
7978 stmt_vec_info use_stmt_info = NULL;
7979 if (single_imm_use (lhs, &dummy, &use_stmt)
7980 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7981 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7982 return true;
7983 }
4a00c761
JJ
7984 c1 = VEC_WIDEN_MULT_LO_EXPR;
7985 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
7986 break;
7987
6ae6116f
RH
7988 case VEC_WIDEN_MULT_EVEN_EXPR:
7989 /* Support the recursion induced just above. */
7990 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7991 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7992 break;
7993
36ba4aae 7994 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
7995 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7996 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
7997 break;
7998
ebfd146a 7999 CASE_CONVERT:
4a00c761
JJ
8000 c1 = VEC_UNPACK_LO_EXPR;
8001 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8002 break;
8003
8004 case FLOAT_EXPR:
4a00c761
JJ
8005 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8006 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8007 break;
8008
8009 case FIX_TRUNC_EXPR:
8010 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8011 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8012 computing the operation. */
8013 return false;
8014
8015 default:
8016 gcc_unreachable ();
8017 }
8018
6ae6116f 8019 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
8020 {
8021 enum tree_code ctmp = c1;
8022 c1 = c2;
8023 c2 = ctmp;
8024 }
8025
ebfd146a
IR
8026 if (code == FIX_TRUNC_EXPR)
8027 {
8028 /* The signedness is determined from output operand. */
b690cc0f
RG
8029 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8030 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8031 }
8032 else
8033 {
8034 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8035 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8036 }
8037
8038 if (!optab1 || !optab2)
8039 return false;
8040
8041 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8042 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8043 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8044 return false;
8045
4a00c761
JJ
8046 *code1 = c1;
8047 *code2 = c2;
8048
8049 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8050 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8051 return true;
8052
b8698a0f 8053 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8054 types. */
ebfd146a 8055
4a00c761
JJ
8056 prev_type = vectype;
8057 prev_mode = vec_mode;
b8698a0f 8058
4a00c761
JJ
8059 if (!CONVERT_EXPR_CODE_P (code))
8060 return false;
b8698a0f 8061
4a00c761
JJ
8062 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8063 intermediate steps in promotion sequence. We try
8064 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8065 not. */
9771b263 8066 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8067 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8068 {
8069 intermediate_mode = insn_data[icode1].operand[0].mode;
8070 intermediate_type
8071 = lang_hooks.types.type_for_mode (intermediate_mode,
8072 TYPE_UNSIGNED (prev_type));
8073 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8074 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8075
8076 if (!optab3 || !optab4
8077 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8078 || insn_data[icode1].operand[0].mode != intermediate_mode
8079 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8080 || insn_data[icode2].operand[0].mode != intermediate_mode
8081 || ((icode1 = optab_handler (optab3, intermediate_mode))
8082 == CODE_FOR_nothing)
8083 || ((icode2 = optab_handler (optab4, intermediate_mode))
8084 == CODE_FOR_nothing))
8085 break;
ebfd146a 8086
9771b263 8087 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8088 (*multi_step_cvt)++;
8089
8090 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8091 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8092 return true;
8093
8094 prev_type = intermediate_type;
8095 prev_mode = intermediate_mode;
ebfd146a
IR
8096 }
8097
9771b263 8098 interm_types->release ();
4a00c761 8099 return false;
ebfd146a
IR
8100}
8101
8102
8103/* Function supportable_narrowing_operation
8104
b8698a0f
L
8105 Check whether an operation represented by the code CODE is a
8106 narrowing operation that is supported by the target platform in
b690cc0f
RG
8107 vector form (i.e., when operating on arguments of type VECTYPE_IN
8108 and producing a result of type VECTYPE_OUT).
b8698a0f 8109
ebfd146a 8110 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 8111 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
8112 the target platform directly via vector tree-codes.
8113
8114 Output:
b8698a0f
L
8115 - CODE1 is the code of a vector operation to be used when
8116 vectorizing the operation, if available.
ebfd146a
IR
8117 - MULTI_STEP_CVT determines the number of required intermediate steps in
8118 case of multi-step conversion (like int->short->char - in that case
8119 MULTI_STEP_CVT will be 1).
8120 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 8121 narrowing operation (short in the above example). */
ebfd146a
IR
8122
8123bool
8124supportable_narrowing_operation (enum tree_code code,
b690cc0f 8125 tree vectype_out, tree vectype_in,
ebfd146a 8126 enum tree_code *code1, int *multi_step_cvt,
9771b263 8127 vec<tree> *interm_types)
ebfd146a 8128{
ef4bddc2 8129 machine_mode vec_mode;
ebfd146a
IR
8130 enum insn_code icode1;
8131 optab optab1, interm_optab;
b690cc0f
RG
8132 tree vectype = vectype_in;
8133 tree narrow_vectype = vectype_out;
ebfd146a 8134 enum tree_code c1;
4a00c761 8135 tree intermediate_type;
ef4bddc2 8136 machine_mode intermediate_mode, prev_mode;
ebfd146a 8137 int i;
4a00c761 8138 bool uns;
ebfd146a 8139
4a00c761 8140 *multi_step_cvt = 0;
ebfd146a
IR
8141 switch (code)
8142 {
8143 CASE_CONVERT:
8144 c1 = VEC_PACK_TRUNC_EXPR;
8145 break;
8146
8147 case FIX_TRUNC_EXPR:
8148 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8149 break;
8150
8151 case FLOAT_EXPR:
8152 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8153 tree code and optabs used for computing the operation. */
8154 return false;
8155
8156 default:
8157 gcc_unreachable ();
8158 }
8159
8160 if (code == FIX_TRUNC_EXPR)
8161 /* The signedness is determined from output operand. */
b690cc0f 8162 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
8163 else
8164 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8165
8166 if (!optab1)
8167 return false;
8168
8169 vec_mode = TYPE_MODE (vectype);
947131ba 8170 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8171 return false;
8172
4a00c761
JJ
8173 *code1 = c1;
8174
8175 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8176 return true;
8177
ebfd146a
IR
8178 /* Check if it's a multi-step conversion that can be done using intermediate
8179 types. */
4a00c761
JJ
8180 prev_mode = vec_mode;
8181 if (code == FIX_TRUNC_EXPR)
8182 uns = TYPE_UNSIGNED (vectype_out);
8183 else
8184 uns = TYPE_UNSIGNED (vectype);
8185
8186 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8187 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8188 costly than signed. */
8189 if (code == FIX_TRUNC_EXPR && uns)
8190 {
8191 enum insn_code icode2;
8192
8193 intermediate_type
8194 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8195 interm_optab
8196 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 8197 if (interm_optab != unknown_optab
4a00c761
JJ
8198 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8199 && insn_data[icode1].operand[0].mode
8200 == insn_data[icode2].operand[0].mode)
8201 {
8202 uns = false;
8203 optab1 = interm_optab;
8204 icode1 = icode2;
8205 }
8206 }
ebfd146a 8207
4a00c761
JJ
8208 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8209 intermediate steps in promotion sequence. We try
8210 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 8211 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8212 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8213 {
8214 intermediate_mode = insn_data[icode1].operand[0].mode;
8215 intermediate_type
8216 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8217 interm_optab
8218 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8219 optab_default);
8220 if (!interm_optab
8221 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8222 || insn_data[icode1].operand[0].mode != intermediate_mode
8223 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8224 == CODE_FOR_nothing))
8225 break;
8226
9771b263 8227 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8228 (*multi_step_cvt)++;
8229
8230 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8231 return true;
8232
8233 prev_mode = intermediate_mode;
8234 optab1 = interm_optab;
ebfd146a
IR
8235 }
8236
9771b263 8237 interm_types->release ();
4a00c761 8238 return false;
ebfd146a 8239}