]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
cfgexpand.c (expand_gimple_stmt_1): Use ops.code.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
40e23961
MC
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
ebfd146a 36#include "tree.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
ebfd146a 39#include "target.h"
60393bbc 40#include "predict.h"
60393bbc 41#include "hard-reg-set.h"
60393bbc
AM
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
ebfd146a 45#include "basic-block.h"
cf835838 46#include "gimple-pretty-print.h"
2fb9a547
AM
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "tree-eh.h"
50#include "gimple-expr.h"
51#include "is-a.h"
18f429e2 52#include "gimple.h"
45b0be94 53#include "gimplify.h"
5be5c238 54#include "gimple-iterator.h"
18f429e2 55#include "gimplify-me.h"
442b4905
AM
56#include "gimple-ssa.h"
57#include "tree-cfg.h"
58#include "tree-phinodes.h"
59#include "ssa-iterators.h"
d8a2d370 60#include "stringpool.h"
442b4905 61#include "tree-ssanames.h"
e28030cf 62#include "tree-ssa-loop-manip.h"
ebfd146a 63#include "cfgloop.h"
0136f8f0
AH
64#include "tree-ssa-loop.h"
65#include "tree-scalar-evolution.h"
36566b39
PK
66#include "hashtab.h"
67#include "rtl.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
ebfd146a 80#include "expr.h"
7ee2468b 81#include "recog.h" /* FIXME: for insn_data */
b0710fe1 82#include "insn-codes.h"
ebfd146a 83#include "optabs.h"
718f9c0f 84#include "diagnostic-core.h"
ebfd146a 85#include "tree-vectorizer.h"
c582198b
AM
86#include "hash-map.h"
87#include "plugin-api.h"
88#include "ipa-ref.h"
0136f8f0 89#include "cgraph.h"
9b2b7279 90#include "builtins.h"
ebfd146a 91
7ee2468b
SB
92/* For lang_hooks.types.type_for_mode. */
93#include "langhooks.h"
ebfd146a 94
c3e7ee41
BS
95/* Return the vectorized type for the given statement. */
96
97tree
98stmt_vectype (struct _stmt_vec_info *stmt_info)
99{
100 return STMT_VINFO_VECTYPE (stmt_info);
101}
102
103/* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105bool
106stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107{
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
112
113 if (!loop_vinfo)
114 return false;
115
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
117
118 return (bb->loop_father == loop->inner);
119}
120
121/* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
124
125unsigned
92345349 126record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 128 int misalign, enum vect_cost_model_location where)
c3e7ee41 129{
92345349 130 if (body_cost_vec)
c3e7ee41 131 {
92345349
BS
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
c3e7ee41 136 return (unsigned)
92345349 137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
138
139 }
140 else
141 {
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
145
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150
92345349
BS
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
c3e7ee41
BS
153 }
154}
155
272c6793
RS
156/* Return a variable of type ELEM_TYPE[NELEMS]. */
157
158static tree
159create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160{
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
163}
164
165/* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
169
170static tree
171read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
173{
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
176
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
183
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
188
189 return vect_name;
190}
191
192/* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
195
196static void
197write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
199{
200 tree array_ref;
201 gimple new_stmt;
202
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
206
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
209}
210
211/* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
214
215static tree
216create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217{
272c6793
RS
218 tree mem_ref, alias_ptr_type;
219
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
644ffefd 223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
224 return mem_ref;
225}
226
ebfd146a
IR
227/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
228
229/* Function vect_mark_relevant.
230
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
232
233static void
9771b263 234vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
ebfd146a
IR
237{
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 241 gimple pattern_stmt;
ebfd146a 242
73fbfcad 243 if (dump_enabled_p ())
78c60e3d 244 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 245 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 246
83197f37
IR
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
ebfd146a
IR
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252 {
83197f37
IR
253 bool found = false;
254 if (!used_in_pattern)
255 {
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
13c931c9
JJ
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 262
83197f37
IR
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
ebfd146a 267
83197f37
IR
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
5ce9450f 271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273 {
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
277
13c931c9
JJ
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
280
ab0ef706
JJ
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283 {
284 found = true;
285 break;
286 }
287 }
83197f37
IR
288 }
289
290 if (!found)
291 {
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
296
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298
73fbfcad 299 if (dump_enabled_p ())
78c60e3d
SS
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
e645e942 302 " relevant/live.\n");
83197f37
IR
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
308 }
ebfd146a
IR
309 }
310
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
314
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317 {
73fbfcad 318 if (dump_enabled_p ())
78c60e3d 319 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 320 "already marked relevant/live.\n");
ebfd146a
IR
321 return;
322 }
323
9771b263 324 worklist->safe_push (stmt);
ebfd146a
IR
325}
326
327
328/* Function vect_stmt_relevant_p.
329
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
332
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
337
338 CHECKME: what other side effects would the vectorizer allow? */
339
340static bool
341vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
343{
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
349
8644a673 350 *relevant = vect_unused_in_scope;
ebfd146a
IR
351 *live_p = false;
352
353 /* cond stmt other than loop exit cond. */
b8698a0f
L
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
8644a673 357 *relevant = vect_used_in_scope;
ebfd146a
IR
358
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
ebfd146a 363 {
73fbfcad 364 if (dump_enabled_p ())
78c60e3d 365 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 366 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 367 *relevant = vect_used_in_scope;
ebfd146a
IR
368 }
369
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372 {
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374 {
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
377 {
73fbfcad 378 if (dump_enabled_p ())
78c60e3d 379 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 380 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 381
3157b0c2
AO
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
384
ebfd146a
IR
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
389
390 *live_p = true;
391 }
392 }
393 }
394
395 return (*live_p || *relevant);
396}
397
398
b8698a0f 399/* Function exist_non_indexing_operands_for_use_p
ebfd146a 400
ff802fa1 401 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
402 used in STMT for anything other than indexing an array. */
403
404static bool
405exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406{
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 409
ff802fa1 410 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
59a05b0c 415
ebfd146a
IR
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
421
422 'var' in the second case corresponds to a def, not a use,
b8698a0f 423 so USE cannot correspond to any operands that are not used
ebfd146a
IR
424 for array indexing.
425
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
ebfd146a
IR
428
429 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
430 {
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
434 {
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
447 }
448 return false;
449 }
450
59a05b0c
EB
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
ebfd146a 453 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
456
457 if (operand == use)
458 return true;
459
460 return false;
461}
462
463
b8698a0f 464/*
ebfd146a
IR
465 Function process_use.
466
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 470 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
ebfd146a
IR
474
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 482 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 483 of the respective DEF_STMT is left unchanged.
b8698a0f
L
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
488
489 Return true if everything is as expected. Return false otherwise. */
490
491static bool
b8698a0f 492process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 493 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 494 bool force)
ebfd146a
IR
495{
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
503
b8698a0f 504 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 505 that are used for address computation are not considered relevant. */
aec7ae7d 506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
507 return true;
508
24ee1384 509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 510 {
73fbfcad 511 if (dump_enabled_p ())
78c60e3d 512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 513 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
514 return false;
515 }
516
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
519
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
522 {
73fbfcad 523 if (dump_enabled_p ())
e645e942 524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
525 return true;
526 }
527
b8698a0f
L
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
540 {
73fbfcad 541 if (dump_enabled_p ())
78c60e3d 542 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 543 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
549 return true;
550 }
551
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560 {
73fbfcad 561 if (dump_enabled_p ())
78c60e3d 562 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 563 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 564
ebfd146a
IR
565 switch (relevant)
566 {
8644a673 567 case vect_unused_in_scope:
7c5222ff
IR
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 570 break;
7c5222ff 571
ebfd146a 572 case vect_used_in_outer_by_reduction:
7c5222ff 573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
574 relevant = vect_used_by_reduction;
575 break;
7c5222ff 576
ebfd146a 577 case vect_used_in_outer:
7c5222ff 578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 579 relevant = vect_used_in_scope;
ebfd146a 580 break;
7c5222ff 581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 break;
584
585 default:
586 gcc_unreachable ();
b8698a0f 587 }
ebfd146a
IR
588 }
589
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
592 ...
593 inner-loop:
594 d = def_stmt
06066f92 595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598 {
73fbfcad 599 if (dump_enabled_p ())
78c60e3d 600 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 601 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 602
ebfd146a
IR
603 switch (relevant)
604 {
8644a673 605 case vect_unused_in_scope:
b8698a0f 606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
609 break;
610
ebfd146a
IR
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
614
8644a673 615 case vect_used_in_scope:
ebfd146a
IR
616 relevant = vect_used_in_outer;
617 break;
618
619 default:
620 gcc_unreachable ();
621 }
622 }
623
83197f37
IR
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
626 return true;
627}
628
629
630/* Function vect_mark_stmts_to_be_vectorized.
631
632 Not all stmts in the loop need to be vectorized. For example:
633
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
638
639 3. j = j + 1
640
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
643
644 This pass detects such stmts. */
645
646bool
647vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648{
ebfd146a
IR
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
06066f92
IR
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
ebfd146a 661
73fbfcad 662 if (dump_enabled_p ())
78c60e3d 663 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 664 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 665
00f96dc9 666 auto_vec<gimple, 64> worklist;
ebfd146a
IR
667
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
670 {
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 673 {
ebfd146a 674 phi = gsi_stmt (si);
73fbfcad 675 if (dump_enabled_p ())
ebfd146a 676 {
78c60e3d
SS
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
679 }
680
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
683 }
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685 {
686 stmt = gsi_stmt (si);
73fbfcad 687 if (dump_enabled_p ())
ebfd146a 688 {
78c60e3d
SS
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 691 }
ebfd146a
IR
692
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
695 }
696 }
697
698 /* 2. Process_worklist */
9771b263 699 while (worklist.length () > 0)
ebfd146a
IR
700 {
701 use_operand_p use_p;
702 ssa_op_iter iter;
703
9771b263 704 stmt = worklist.pop ();
73fbfcad 705 if (dump_enabled_p ())
ebfd146a 706 {
78c60e3d
SS
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
709 }
710
b8698a0f
L
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 728 those that are used by a reduction computation, and those that are
ff802fa1 729 (also) used by a regular computation. This allows us later on to
b8698a0f 730 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 731 order of the results that they produce does not have to be kept. */
ebfd146a 732
06066f92
IR
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
ebfd146a 736 {
06066f92
IR
737 case vect_reduction_def:
738 switch (tmp_relevant)
739 {
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
743
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
748
749 default:
73fbfcad 750 if (dump_enabled_p ())
78c60e3d 751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 752 "unsupported use of reduction.\n");
06066f92
IR
753 return false;
754 }
755
b8698a0f 756 live_p = false;
06066f92 757 break;
b8698a0f 758
06066f92
IR
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
763 {
73fbfcad 764 if (dump_enabled_p ())
78c60e3d 765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 766 "unsupported use of nested cycle.\n");
7c5222ff 767
06066f92
IR
768 return false;
769 }
7c5222ff 770
b8698a0f
L
771 live_p = false;
772 break;
773
06066f92
IR
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
777 {
73fbfcad 778 if (dump_enabled_p ())
78c60e3d 779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 780 "unsupported use of double reduction.\n");
7c5222ff 781
7c5222ff 782 return false;
06066f92
IR
783 }
784
785 live_p = false;
b8698a0f 786 break;
7c5222ff 787
06066f92
IR
788 default:
789 break;
7c5222ff 790 }
b8698a0f 791
aec7ae7d 792 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
793 {
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
798 {
69d2aade
JJ
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
801
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804 {
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 806 live_p, relevant, &worklist, false)
69d2aade 807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 808 live_p, relevant, &worklist, false))
566d377a 809 return false;
69d2aade
JJ
810 i = 2;
811 }
812 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 813 {
69d2aade 814 op = gimple_op (stmt, i);
9d5e7640 815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 816 &worklist, false))
07687835 817 return false;
9d5e7640
IR
818 }
819 }
820 else if (is_gimple_call (stmt))
821 {
822 for (i = 0; i < gimple_call_num_args (stmt); i++)
823 {
824 tree arg = gimple_call_arg (stmt, i);
825 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 826 &worklist, false))
07687835 827 return false;
9d5e7640
IR
828 }
829 }
830 }
831 else
832 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833 {
834 tree op = USE_FROM_PTR (use_p);
835 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 836 &worklist, false))
07687835 837 return false;
9d5e7640 838 }
aec7ae7d
JJ
839
840 if (STMT_VINFO_GATHER_P (stmt_vinfo))
841 {
842 tree off;
843 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844 gcc_assert (decl);
845 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846 &worklist, true))
566d377a 847 return false;
aec7ae7d 848 }
ebfd146a
IR
849 } /* while worklist */
850
ebfd146a
IR
851 return true;
852}
853
854
b8698a0f 855/* Function vect_model_simple_cost.
ebfd146a 856
b8698a0f 857 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
858 single op. Right now, this does not account for multiple insns that could
859 be generated for the single vector op. We will handle that shortly. */
860
861void
b8698a0f 862vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
863 enum vect_def_type *dt,
864 stmt_vector_for_cost *prologue_cost_vec,
865 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
866{
867 int i;
92345349 868 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
869
870 /* The SLP costs were already calculated during SLP tree build. */
871 if (PURE_SLP_STMT (stmt_info))
872 return;
873
ebfd146a
IR
874 /* FORNOW: Assuming maximum 2 args per stmts. */
875 for (i = 0; i < 2; i++)
92345349
BS
876 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878 stmt_info, 0, vect_prologue);
c3e7ee41
BS
879
880 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
881 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882 stmt_info, 0, vect_body);
c3e7ee41 883
73fbfcad 884 if (dump_enabled_p ())
78c60e3d
SS
885 dump_printf_loc (MSG_NOTE, vect_location,
886 "vect_model_simple_cost: inside_cost = %d, "
e645e942 887 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
888}
889
890
8bd37302
BS
891/* Model cost for type demotion and promotion operations. PWR is normally
892 zero for single-step promotions and demotions. It will be one if
893 two-step promotion/demotion is required, and so on. Each additional
894 step doubles the number of instructions required. */
895
896static void
897vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898 enum vect_def_type *dt, int pwr)
899{
900 int i, tmp;
92345349 901 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904 void *target_cost_data;
8bd37302
BS
905
906 /* The SLP costs were already calculated during SLP tree build. */
907 if (PURE_SLP_STMT (stmt_info))
908 return;
909
c3e7ee41
BS
910 if (loop_vinfo)
911 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912 else
913 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914
8bd37302
BS
915 for (i = 0; i < pwr + 1; i++)
916 {
917 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918 (i + 1) : i;
c3e7ee41 919 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
920 vec_promote_demote, stmt_info, 0,
921 vect_body);
8bd37302
BS
922 }
923
924 /* FORNOW: Assuming maximum 2 args per stmts. */
925 for (i = 0; i < 2; i++)
92345349
BS
926 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928 stmt_info, 0, vect_prologue);
8bd37302 929
73fbfcad 930 if (dump_enabled_p ())
78c60e3d
SS
931 dump_printf_loc (MSG_NOTE, vect_location,
932 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 933 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
934}
935
0d0293ac 936/* Function vect_cost_group_size
b8698a0f 937
0d0293ac 938 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
939 load or store of a group, else return 1. This ensures that group size is
940 only returned once per group. */
941
942static int
0d0293ac 943vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 944{
e14c1050 945 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
946
947 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 948 return GROUP_SIZE (stmt_info);
ebfd146a
IR
949
950 return 1;
951}
952
953
954/* Function vect_model_store_cost
955
0d0293ac
MM
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
958
959void
b8698a0f 960vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 961 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
962 slp_tree slp_node,
963 stmt_vector_for_cost *prologue_cost_vec,
964 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
965{
966 int group_size;
92345349 967 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
968 struct data_reference *first_dr;
969 gimple first_stmt;
ebfd146a
IR
970
971 /* The SLP costs were already calculated during SLP tree build. */
972 if (PURE_SLP_STMT (stmt_info))
973 return;
974
8644a673 975 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
976 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
977 stmt_info, 0, vect_prologue);
ebfd146a 978
0d0293ac
MM
979 /* Grouped access? */
980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
981 {
982 if (slp_node)
983 {
9771b263 984 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
985 group_size = 1;
986 }
987 else
988 {
e14c1050 989 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 990 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
991 }
992
993 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
994 }
0d0293ac 995 /* Not a grouped access. */
ebfd146a 996 else
720f5239
IR
997 {
998 group_size = 1;
999 first_dr = STMT_VINFO_DATA_REF (stmt_info);
1000 }
ebfd146a 1001
272c6793 1002 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 1003 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
1004 access is instead being provided by a permute-and-store operation,
1005 include the cost of the permutes. */
1006 if (!store_lanes_p && group_size > 1)
ebfd146a 1007 {
e1377713
ES
1008 /* Uses a high and low interleave or shuffle operations for each
1009 needed permute. */
1010 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
1011 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1012 stmt_info, 0, vect_body);
ebfd146a 1013
73fbfcad 1014 if (dump_enabled_p ())
78c60e3d 1015 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1016 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1017 group_size);
ebfd146a
IR
1018 }
1019
1020 /* Costs of the stores. */
92345349 1021 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 1022
73fbfcad 1023 if (dump_enabled_p ())
78c60e3d
SS
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "vect_model_store_cost: inside_cost = %d, "
e645e942 1026 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1027}
1028
1029
720f5239
IR
1030/* Calculate cost of DR's memory access. */
1031void
1032vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1033 unsigned int *inside_cost,
92345349 1034 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1035{
1036 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1037 gimple stmt = DR_STMT (dr);
1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1039
1040 switch (alignment_support_scheme)
1041 {
1042 case dr_aligned:
1043 {
92345349
BS
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045 vector_store, stmt_info, 0,
1046 vect_body);
720f5239 1047
73fbfcad 1048 if (dump_enabled_p ())
78c60e3d 1049 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1050 "vect_model_store_cost: aligned.\n");
720f5239
IR
1051 break;
1052 }
1053
1054 case dr_unaligned_supported:
1055 {
720f5239 1056 /* Here, we assign an additional cost for the unaligned store. */
92345349 1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1058 unaligned_store, stmt_info,
92345349 1059 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1060 if (dump_enabled_p ())
78c60e3d
SS
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: unaligned supported by "
e645e942 1063 "hardware.\n");
720f5239
IR
1064 break;
1065 }
1066
38eec4c6
UW
1067 case dr_unaligned_unsupported:
1068 {
1069 *inside_cost = VECT_MAX_COST;
1070
73fbfcad 1071 if (dump_enabled_p ())
78c60e3d 1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1073 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1074 break;
1075 }
1076
720f5239
IR
1077 default:
1078 gcc_unreachable ();
1079 }
1080}
1081
1082
ebfd146a
IR
1083/* Function vect_model_load_cost
1084
0d0293ac
MM
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1087 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1088 access scheme chosen. */
1089
1090void
92345349
BS
1091vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092 bool load_lanes_p, slp_tree slp_node,
1093 stmt_vector_for_cost *prologue_cost_vec,
1094 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1095{
1096 int group_size;
ebfd146a
IR
1097 gimple first_stmt;
1098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1099 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1100
1101 /* The SLP costs were already calculated during SLP tree build. */
1102 if (PURE_SLP_STMT (stmt_info))
1103 return;
1104
0d0293ac 1105 /* Grouped accesses? */
e14c1050 1106 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1107 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1108 {
0d0293ac 1109 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1110 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1111 }
0d0293ac 1112 /* Not a grouped access. */
ebfd146a
IR
1113 else
1114 {
1115 group_size = 1;
1116 first_dr = dr;
1117 }
1118
272c6793 1119 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1120 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1121 access is instead being provided by a load-and-permute operation,
1122 include the cost of the permutes. */
1123 if (!load_lanes_p && group_size > 1)
ebfd146a 1124 {
2c23db6d
ES
1125 /* Uses an even and odd extract operations or shuffle operations
1126 for each needed permute. */
1127 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1128 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1129 stmt_info, 0, vect_body);
ebfd146a 1130
73fbfcad 1131 if (dump_enabled_p ())
e645e942
TJ
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1134 group_size);
ebfd146a
IR
1135 }
1136
1137 /* The loads themselves. */
a82960aa
RG
1138 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139 {
a21892ad
BS
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1142 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1144 scalar_load, stmt_info, 0, vect_body);
1145 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1146 stmt_info, 0, vect_body);
a82960aa
RG
1147 }
1148 else
1149 vect_get_load_cost (first_dr, ncopies,
1150 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151 || group_size > 1 || slp_node),
92345349
BS
1152 &inside_cost, &prologue_cost,
1153 prologue_cost_vec, body_cost_vec, true);
720f5239 1154
73fbfcad 1155 if (dump_enabled_p ())
78c60e3d
SS
1156 dump_printf_loc (MSG_NOTE, vect_location,
1157 "vect_model_load_cost: inside_cost = %d, "
e645e942 1158 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1159}
1160
1161
1162/* Calculate cost of DR's memory access. */
1163void
1164vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1165 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1166 unsigned int *prologue_cost,
1167 stmt_vector_for_cost *prologue_cost_vec,
1168 stmt_vector_for_cost *body_cost_vec,
1169 bool record_prologue_costs)
720f5239
IR
1170{
1171 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1172 gimple stmt = DR_STMT (dr);
1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1174
1175 switch (alignment_support_scheme)
ebfd146a
IR
1176 {
1177 case dr_aligned:
1178 {
92345349
BS
1179 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1180 stmt_info, 0, vect_body);
ebfd146a 1181
73fbfcad 1182 if (dump_enabled_p ())
78c60e3d 1183 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1184 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1185
1186 break;
1187 }
1188 case dr_unaligned_supported:
1189 {
720f5239 1190 /* Here, we assign an additional cost for the unaligned load. */
92345349 1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1192 unaligned_load, stmt_info,
92345349 1193 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1194
73fbfcad 1195 if (dump_enabled_p ())
78c60e3d
SS
1196 dump_printf_loc (MSG_NOTE, vect_location,
1197 "vect_model_load_cost: unaligned supported by "
e645e942 1198 "hardware.\n");
ebfd146a
IR
1199
1200 break;
1201 }
1202 case dr_explicit_realign:
1203 {
92345349
BS
1204 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1205 vector_load, stmt_info, 0, vect_body);
1206 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1207 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1208
1209 /* FIXME: If the misalignment remains fixed across the iterations of
1210 the containing loop, the following cost should be added to the
92345349 1211 prologue costs. */
ebfd146a 1212 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1213 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1214 stmt_info, 0, vect_body);
ebfd146a 1215
73fbfcad 1216 if (dump_enabled_p ())
e645e942
TJ
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: explicit realign\n");
8bd37302 1219
ebfd146a
IR
1220 break;
1221 }
1222 case dr_explicit_realign_optimized:
1223 {
73fbfcad 1224 if (dump_enabled_p ())
e645e942 1225 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1226 "vect_model_load_cost: unaligned software "
e645e942 1227 "pipelined.\n");
ebfd146a
IR
1228
1229 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1230 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1231 if this is an access in a group of loads, which provide grouped
ebfd146a 1232 access, then the above cost should only be considered for one
ff802fa1 1233 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1234 and a realignment op. */
1235
92345349 1236 if (add_realign_cost && record_prologue_costs)
ebfd146a 1237 {
92345349
BS
1238 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1239 vector_stmt, stmt_info,
1240 0, vect_prologue);
ebfd146a 1241 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1242 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1243 vector_stmt, stmt_info,
1244 0, vect_prologue);
ebfd146a
IR
1245 }
1246
92345349
BS
1247 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1248 stmt_info, 0, vect_body);
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1250 stmt_info, 0, vect_body);
8bd37302 1251
73fbfcad 1252 if (dump_enabled_p ())
78c60e3d 1253 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1254 "vect_model_load_cost: explicit realign optimized"
1255 "\n");
8bd37302 1256
ebfd146a
IR
1257 break;
1258 }
1259
38eec4c6
UW
1260 case dr_unaligned_unsupported:
1261 {
1262 *inside_cost = VECT_MAX_COST;
1263
73fbfcad 1264 if (dump_enabled_p ())
78c60e3d 1265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1266 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1267 break;
1268 }
1269
ebfd146a
IR
1270 default:
1271 gcc_unreachable ();
1272 }
ebfd146a
IR
1273}
1274
418b7df3
RG
1275/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1277
418b7df3
RG
1278static void
1279vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1280{
ebfd146a 1281 if (gsi)
418b7df3 1282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1283 else
1284 {
418b7df3 1285 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1287
a70d6342
IR
1288 if (loop_vinfo)
1289 {
1290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1291 basic_block new_bb;
1292 edge pe;
a70d6342
IR
1293
1294 if (nested_in_vect_loop_p (loop, stmt))
1295 loop = loop->inner;
b8698a0f 1296
a70d6342 1297 pe = loop_preheader_edge (loop);
418b7df3 1298 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1299 gcc_assert (!new_bb);
1300 }
1301 else
1302 {
1303 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1304 basic_block bb;
1305 gimple_stmt_iterator gsi_bb_start;
1306
1307 gcc_assert (bb_vinfo);
1308 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1309 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1310 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1311 }
ebfd146a
IR
1312 }
1313
73fbfcad 1314 if (dump_enabled_p ())
ebfd146a 1315 {
78c60e3d
SS
1316 dump_printf_loc (MSG_NOTE, vect_location,
1317 "created new init_stmt: ");
1318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1319 }
418b7df3
RG
1320}
1321
1322/* Function vect_init_vector.
ebfd146a 1323
5467ee52
RG
1324 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1326 vector type a vector with all elements equal to VAL is created first.
1327 Place the initialization at BSI if it is not NULL. Otherwise, place the
1328 initialization at the loop preheader.
418b7df3
RG
1329 Return the DEF of INIT_STMT.
1330 It will be used in the vectorization of STMT. */
1331
1332tree
5467ee52 1333vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1334{
1335 tree new_var;
1336 gimple init_stmt;
1337 tree vec_oprnd;
1338 tree new_temp;
1339
5467ee52
RG
1340 if (TREE_CODE (type) == VECTOR_TYPE
1341 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1342 {
5467ee52 1343 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1344 {
5467ee52
RG
1345 if (CONSTANT_CLASS_P (val))
1346 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1347 else
1348 {
b731b390 1349 new_temp = make_ssa_name (TREE_TYPE (type));
0d0e4a03 1350 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1351 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1352 val = new_temp;
418b7df3
RG
1353 }
1354 }
5467ee52 1355 val = build_vector_from_val (type, val);
418b7df3
RG
1356 }
1357
5467ee52 1358 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1359 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1360 new_temp = make_ssa_name (new_var, init_stmt);
1361 gimple_assign_set_lhs (init_stmt, new_temp);
1362 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1363 vec_oprnd = gimple_assign_lhs (init_stmt);
1364 return vec_oprnd;
1365}
1366
a70d6342 1367
ebfd146a
IR
1368/* Function vect_get_vec_def_for_operand.
1369
ff802fa1 1370 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1371 used in the vectorized stmt for STMT.
1372
1373 In the case that OP is an SSA_NAME which is defined in the loop, then
1374 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375
1376 In case OP is an invariant or constant, a new stmt that creates a vector def
1377 needs to be introduced. */
1378
1379tree
1380vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381{
1382 tree vec_oprnd;
1383 gimple vec_stmt;
1384 gimple def_stmt;
1385 stmt_vec_info def_stmt_info = NULL;
1386 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1387 unsigned int nunits;
ebfd146a 1388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1389 tree def;
ebfd146a
IR
1390 enum vect_def_type dt;
1391 bool is_simple_use;
1392 tree vector_type;
1393
73fbfcad 1394 if (dump_enabled_p ())
ebfd146a 1395 {
78c60e3d
SS
1396 dump_printf_loc (MSG_NOTE, vect_location,
1397 "vect_get_vec_def_for_operand: ");
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1399 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1400 }
1401
24ee1384
IR
1402 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1403 &def_stmt, &def, &dt);
ebfd146a 1404 gcc_assert (is_simple_use);
73fbfcad 1405 if (dump_enabled_p ())
ebfd146a 1406 {
78c60e3d 1407 int loc_printed = 0;
ebfd146a
IR
1408 if (def)
1409 {
78c60e3d
SS
1410 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1411 loc_printed = 1;
1412 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1413 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1414 }
1415 if (def_stmt)
1416 {
78c60e3d
SS
1417 if (loc_printed)
1418 dump_printf (MSG_NOTE, " def_stmt = ");
1419 else
1420 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1422 }
1423 }
1424
1425 switch (dt)
1426 {
1427 /* Case 1: operand is a constant. */
1428 case vect_constant_def:
1429 {
7569a6cc
RG
1430 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431 gcc_assert (vector_type);
9dc3f7de 1432 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1433
b8698a0f 1434 if (scalar_def)
ebfd146a
IR
1435 *scalar_def = op;
1436
1437 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1438 if (dump_enabled_p ())
78c60e3d 1439 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1440 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1441
418b7df3 1442 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1443 }
1444
1445 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1446 case vect_external_def:
ebfd146a
IR
1447 {
1448 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1449 gcc_assert (vector_type);
ebfd146a 1450
b8698a0f 1451 if (scalar_def)
ebfd146a
IR
1452 *scalar_def = def;
1453
1454 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1455 if (dump_enabled_p ())
e645e942 1456 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1457
418b7df3 1458 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1459 }
1460
1461 /* Case 3: operand is defined inside the loop. */
8644a673 1462 case vect_internal_def:
ebfd146a 1463 {
b8698a0f 1464 if (scalar_def)
ebfd146a
IR
1465 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466
1467 /* Get the def from the vectorized stmt. */
1468 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1469
ebfd146a 1470 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1471 /* Get vectorized pattern statement. */
1472 if (!vec_stmt
1473 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1474 && !STMT_VINFO_RELEVANT (def_stmt_info))
1475 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1477 gcc_assert (vec_stmt);
1478 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1479 vec_oprnd = PHI_RESULT (vec_stmt);
1480 else if (is_gimple_call (vec_stmt))
1481 vec_oprnd = gimple_call_lhs (vec_stmt);
1482 else
1483 vec_oprnd = gimple_assign_lhs (vec_stmt);
1484 return vec_oprnd;
1485 }
1486
1487 /* Case 4: operand is defined by a loop header phi - reduction */
1488 case vect_reduction_def:
06066f92 1489 case vect_double_reduction_def:
7c5222ff 1490 case vect_nested_cycle:
ebfd146a
IR
1491 {
1492 struct loop *loop;
1493
1494 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1495 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1496
1497 /* Get the def before the loop */
1498 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1499 return get_initial_def_for_reduction (stmt, op, scalar_def);
1500 }
1501
1502 /* Case 5: operand is defined by loop-header phi - induction. */
1503 case vect_induction_def:
1504 {
1505 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506
1507 /* Get the def from the vectorized stmt. */
1508 def_stmt_info = vinfo_for_stmt (def_stmt);
1509 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1510 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511 vec_oprnd = PHI_RESULT (vec_stmt);
1512 else
1513 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1514 return vec_oprnd;
1515 }
1516
1517 default:
1518 gcc_unreachable ();
1519 }
1520}
1521
1522
1523/* Function vect_get_vec_def_for_stmt_copy
1524
ff802fa1 1525 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1526 vectorized stmt to be created (by the caller to this function) is a "copy"
1527 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1528 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1529 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1530 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1531 DT is the type of the vector def VEC_OPRND.
1532
1533 Context:
1534 In case the vectorization factor (VF) is bigger than the number
1535 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1536 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1537 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1538 smallest data-type determines the VF, and as a result, when vectorizing
1539 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1541 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1542 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543 which VF=16 and nunits=4, so the number of copies required is 4):
1544
1545 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1546
ebfd146a
IR
1547 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1548 VS1.1: vx.1 = memref1 VS1.2
1549 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1550 VS1.3: vx.3 = memref3
ebfd146a
IR
1551
1552 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1553 VSnew.1: vz1 = vx.1 + ... VSnew.2
1554 VSnew.2: vz2 = vx.2 + ... VSnew.3
1555 VSnew.3: vz3 = vx.3 + ...
1556
1557 The vectorization of S1 is explained in vectorizable_load.
1558 The vectorization of S2:
b8698a0f
L
1559 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1561 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1562 returns the vector-def 'vx.0'.
1563
b8698a0f
L
1564 To create the remaining copies of the vector-stmt (VSnew.j), this
1565 function is called to get the relevant vector-def for each operand. It is
1566 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1567 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568
b8698a0f
L
1569 For example, to obtain the vector-def 'vx.1' in order to create the
1570 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1572 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573 and return its def ('vx.1').
1574 Overall, to create the above sequence this function will be called 3 times:
1575 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1578
1579tree
1580vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581{
1582 gimple vec_stmt_for_operand;
1583 stmt_vec_info def_stmt_info;
1584
1585 /* Do nothing; can reuse same def. */
8644a673 1586 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1587 return vec_oprnd;
1588
1589 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1590 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1591 gcc_assert (def_stmt_info);
1592 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1593 gcc_assert (vec_stmt_for_operand);
1594 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1595 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1596 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1597 else
1598 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1599 return vec_oprnd;
1600}
1601
1602
1603/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1604 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1605
1606static void
b8698a0f 1607vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1608 vec<tree> *vec_oprnds0,
1609 vec<tree> *vec_oprnds1)
ebfd146a 1610{
9771b263 1611 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1612
1613 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1614 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1615
9771b263 1616 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1617 {
9771b263 1618 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1619 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1620 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1621 }
1622}
1623
1624
d092494c
IR
1625/* Get vectorized definitions for OP0 and OP1.
1626 REDUC_INDEX is the index of reduction operand in case of reduction,
1627 and -1 otherwise. */
ebfd146a 1628
d092494c 1629void
ebfd146a 1630vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1631 vec<tree> *vec_oprnds0,
1632 vec<tree> *vec_oprnds1,
d092494c 1633 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1634{
1635 if (slp_node)
d092494c
IR
1636 {
1637 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1638 auto_vec<tree> ops (nops);
1639 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1640
9771b263 1641 ops.quick_push (op0);
d092494c 1642 if (op1)
9771b263 1643 ops.quick_push (op1);
d092494c
IR
1644
1645 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646
37b5ec8f 1647 *vec_oprnds0 = vec_defs[0];
d092494c 1648 if (op1)
37b5ec8f 1649 *vec_oprnds1 = vec_defs[1];
d092494c 1650 }
ebfd146a
IR
1651 else
1652 {
1653 tree vec_oprnd;
1654
9771b263 1655 vec_oprnds0->create (1);
b8698a0f 1656 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1657 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1658
1659 if (op1)
1660 {
9771b263 1661 vec_oprnds1->create (1);
b8698a0f 1662 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1663 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1664 }
1665 }
1666}
1667
1668
1669/* Function vect_finish_stmt_generation.
1670
1671 Insert a new stmt. */
1672
1673void
1674vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1675 gimple_stmt_iterator *gsi)
1676{
1677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1679 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1680
1681 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682
54e8e2c3
RG
1683 if (!gsi_end_p (*gsi)
1684 && gimple_has_mem_ops (vec_stmt))
1685 {
1686 gimple at_stmt = gsi_stmt (*gsi);
1687 tree vuse = gimple_vuse (at_stmt);
1688 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689 {
1690 tree vdef = gimple_vdef (at_stmt);
1691 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1692 /* If we have an SSA vuse and insert a store, update virtual
1693 SSA form to avoid triggering the renamer. Do so only
1694 if we can easily see all uses - which is what almost always
1695 happens with the way vectorized stmts are inserted. */
1696 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1697 && ((is_gimple_assign (vec_stmt)
1698 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1699 || (is_gimple_call (vec_stmt)
1700 && !(gimple_call_flags (vec_stmt)
1701 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702 {
1703 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1704 gimple_set_vdef (vec_stmt, new_vdef);
1705 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706 }
1707 }
1708 }
ebfd146a
IR
1709 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710
b8698a0f 1711 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1712 bb_vinfo));
ebfd146a 1713
73fbfcad 1714 if (dump_enabled_p ())
ebfd146a 1715 {
78c60e3d
SS
1716 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1717 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1718 }
1719
ad885386 1720 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1721
1722 /* While EH edges will generally prevent vectorization, stmt might
1723 e.g. be in a must-not-throw region. Ensure newly created stmts
1724 that could throw are part of the same region. */
1725 int lp_nr = lookup_stmt_eh_lp (stmt);
1726 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1727 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1728}
1729
1730/* Checks if CALL can be vectorized in type VECTYPE. Returns
1731 a function declaration if the target has a vectorized version
1732 of the function, or NULL_TREE if the function cannot be vectorized. */
1733
1734tree
538dd0b7 1735vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
ebfd146a
IR
1736{
1737 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1738
1739 /* We only handle functions that do not read or clobber memory -- i.e.
1740 const or novops ones. */
1741 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1742 return NULL_TREE;
1743
1744 if (!fndecl
1745 || TREE_CODE (fndecl) != FUNCTION_DECL
1746 || !DECL_BUILT_IN (fndecl))
1747 return NULL_TREE;
1748
62f7fd21 1749 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1750 vectype_in);
1751}
1752
5ce9450f
JJ
1753
1754static tree permute_vec_elements (tree, tree, tree, gimple,
1755 gimple_stmt_iterator *);
1756
1757
1758/* Function vectorizable_mask_load_store.
1759
1760 Check if STMT performs a conditional load or store that can be vectorized.
1761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1764
1765static bool
1766vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1767 gimple *vec_stmt, slp_tree slp_node)
1768{
1769 tree vec_dest = NULL;
1770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1771 stmt_vec_info prev_stmt_info;
1772 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1775 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1776 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1777 tree elem_type;
1778 gimple new_stmt;
1779 tree dummy;
1780 tree dataref_ptr = NULL_TREE;
1781 gimple ptr_incr;
1782 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1783 int ncopies;
1784 int i, j;
1785 bool inv_p;
1786 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1787 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1788 int gather_scale = 1;
1789 enum vect_def_type gather_dt = vect_unknown_def_type;
1790 bool is_store;
1791 tree mask;
1792 gimple def_stmt;
1793 tree def;
1794 enum vect_def_type dt;
1795
1796 if (slp_node != NULL)
1797 return false;
1798
1799 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1800 gcc_assert (ncopies >= 1);
1801
1802 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1803 mask = gimple_call_arg (stmt, 2);
1804 if (TYPE_PRECISION (TREE_TYPE (mask))
1805 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1806 return false;
1807
1808 /* FORNOW. This restriction should be relaxed. */
1809 if (nested_in_vect_loop && ncopies > 1)
1810 {
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "multiple types in nested loop.");
1814 return false;
1815 }
1816
1817 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1818 return false;
1819
1820 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1821 return false;
1822
1823 if (!STMT_VINFO_DATA_REF (stmt_info))
1824 return false;
1825
1826 elem_type = TREE_TYPE (vectype);
1827
1828 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829 return false;
1830
1831 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1832 return false;
1833
1834 if (STMT_VINFO_GATHER_P (stmt_info))
1835 {
1836 gimple def_stmt;
1837 tree def;
1838 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1839 &gather_off, &gather_scale);
1840 gcc_assert (gather_decl);
1841 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1842 &def_stmt, &def, &gather_dt,
1843 &gather_off_vectype))
1844 {
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "gather index use not simple.");
1848 return false;
1849 }
03b9e8e4
JJ
1850
1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852 tree masktype
1853 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1854 if (TREE_CODE (masktype) == INTEGER_TYPE)
1855 {
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "masked gather with integer mask not supported.");
1859 return false;
1860 }
5ce9450f
JJ
1861 }
1862 else if (tree_int_cst_compare (nested_in_vect_loop
1863 ? STMT_VINFO_DR_STEP (stmt_info)
1864 : DR_STEP (dr), size_zero_node) <= 0)
1865 return false;
1866 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1867 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1868 return false;
1869
1870 if (TREE_CODE (mask) != SSA_NAME)
1871 return false;
1872
1873 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1874 &def_stmt, &def, &dt))
1875 return false;
1876
1877 if (is_store)
1878 {
1879 tree rhs = gimple_call_arg (stmt, 3);
1880 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1881 &def_stmt, &def, &dt))
1882 return false;
1883 }
1884
1885 if (!vec_stmt) /* transformation not required. */
1886 {
1887 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1888 if (is_store)
1889 vect_model_store_cost (stmt_info, ncopies, false, dt,
1890 NULL, NULL, NULL);
1891 else
1892 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1893 return true;
1894 }
1895
1896 /** Transform. **/
1897
1898 if (STMT_VINFO_GATHER_P (stmt_info))
1899 {
1900 tree vec_oprnd0 = NULL_TREE, op;
1901 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1902 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1903 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1904 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1905 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1906 edge pe = loop_preheader_edge (loop);
1907 gimple_seq seq;
1908 basic_block new_bb;
1909 enum { NARROW, NONE, WIDEN } modifier;
1910 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911
acdcd61b
JJ
1912 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1913 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917 scaletype = TREE_VALUE (arglist);
1918 gcc_checking_assert (types_compatible_p (srctype, rettype)
1919 && types_compatible_p (srctype, masktype));
1920
5ce9450f
JJ
1921 if (nunits == gather_off_nunits)
1922 modifier = NONE;
1923 else if (nunits == gather_off_nunits / 2)
1924 {
1925 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1926 modifier = WIDEN;
1927
1928 for (i = 0; i < gather_off_nunits; ++i)
1929 sel[i] = i | nunits;
1930
557be5a8 1931 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1932 }
1933 else if (nunits == gather_off_nunits * 2)
1934 {
1935 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1936 modifier = NARROW;
1937
1938 for (i = 0; i < nunits; ++i)
1939 sel[i] = i < gather_off_nunits
1940 ? i : i + nunits - gather_off_nunits;
1941
557be5a8 1942 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1943 ncopies *= 2;
acdcd61b
JJ
1944 for (i = 0; i < nunits; ++i)
1945 sel[i] = i | gather_off_nunits;
557be5a8 1946 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1947 }
1948 else
1949 gcc_unreachable ();
1950
5ce9450f
JJ
1951 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952
1953 ptr = fold_convert (ptrtype, gather_base);
1954 if (!is_gimple_min_invariant (ptr))
1955 {
1956 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1957 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1958 gcc_assert (!new_bb);
1959 }
1960
1961 scale = build_int_cst (scaletype, gather_scale);
1962
1963 prev_stmt_info = NULL;
1964 for (j = 0; j < ncopies; ++j)
1965 {
1966 if (modifier == WIDEN && (j & 1))
1967 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1968 perm_mask, stmt, gsi);
1969 else if (j == 0)
1970 op = vec_oprnd0
1971 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1972 else
1973 op = vec_oprnd0
1974 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975
1976 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977 {
1978 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1979 == TYPE_VECTOR_SUBPARTS (idxtype));
1980 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 1981 var = make_ssa_name (var);
5ce9450f
JJ
1982 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1983 new_stmt
0d0e4a03 1984 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 op = var;
1987 }
1988
acdcd61b
JJ
1989 if (mask_perm_mask && (j & 1))
1990 mask_op = permute_vec_elements (mask_op, mask_op,
1991 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1992 else
1993 {
acdcd61b
JJ
1994 if (j == 0)
1995 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1996 else
1997 {
1998 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1999 &def_stmt, &def, &dt);
2000 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2001 }
5ce9450f 2002
acdcd61b
JJ
2003 mask_op = vec_mask;
2004 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005 {
2006 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2007 == TYPE_VECTOR_SUBPARTS (masktype));
2008 var = vect_get_new_vect_var (masktype, vect_simple_var,
2009 NULL);
b731b390 2010 var = make_ssa_name (var);
acdcd61b
JJ
2011 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2012 new_stmt
0d0e4a03 2013 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015 mask_op = var;
2016 }
5ce9450f
JJ
2017 }
2018
2019 new_stmt
2020 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2021 scale);
2022
2023 if (!useless_type_conversion_p (vectype, rettype))
2024 {
2025 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2026 == TYPE_VECTOR_SUBPARTS (rettype));
2027 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2028 op = make_ssa_name (var, new_stmt);
2029 gimple_call_set_lhs (new_stmt, op);
2030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2031 var = make_ssa_name (vec_dest);
5ce9450f 2032 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2033 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2034 }
2035 else
2036 {
2037 var = make_ssa_name (vec_dest, new_stmt);
2038 gimple_call_set_lhs (new_stmt, var);
2039 }
2040
2041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042
2043 if (modifier == NARROW)
2044 {
2045 if ((j & 1) == 0)
2046 {
2047 prev_res = var;
2048 continue;
2049 }
2050 var = permute_vec_elements (prev_res, var,
2051 perm_mask, stmt, gsi);
2052 new_stmt = SSA_NAME_DEF_STMT (var);
2053 }
2054
2055 if (prev_stmt_info == NULL)
2056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2057 else
2058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2059 prev_stmt_info = vinfo_for_stmt (new_stmt);
2060 }
3efe2e2c
JJ
2061
2062 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063 from the IL. */
2064 tree lhs = gimple_call_lhs (stmt);
2065 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2066 set_vinfo_for_stmt (new_stmt, stmt_info);
2067 set_vinfo_for_stmt (stmt, NULL);
2068 STMT_VINFO_STMT (stmt_info) = new_stmt;
2069 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2070 return true;
2071 }
2072 else if (is_store)
2073 {
2074 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2075 prev_stmt_info = NULL;
2076 for (i = 0; i < ncopies; i++)
2077 {
2078 unsigned align, misalign;
2079
2080 if (i == 0)
2081 {
2082 tree rhs = gimple_call_arg (stmt, 3);
2083 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2084 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2085 /* We should have catched mismatched types earlier. */
2086 gcc_assert (useless_type_conversion_p (vectype,
2087 TREE_TYPE (vec_rhs)));
2088 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2089 NULL_TREE, &dummy, gsi,
2090 &ptr_incr, false, &inv_p);
2091 gcc_assert (!inv_p);
2092 }
2093 else
2094 {
2095 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2096 &def, &dt);
2097 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2098 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2099 &def, &dt);
2100 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2101 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2102 TYPE_SIZE_UNIT (vectype));
2103 }
2104
2105 align = TYPE_ALIGN_UNIT (vectype);
2106 if (aligned_access_p (dr))
2107 misalign = 0;
2108 else if (DR_MISALIGNMENT (dr) == -1)
2109 {
2110 align = TYPE_ALIGN_UNIT (elem_type);
2111 misalign = 0;
2112 }
2113 else
2114 misalign = DR_MISALIGNMENT (dr);
2115 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2116 misalign);
2117 new_stmt
2118 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2119 gimple_call_arg (stmt, 1),
2120 vec_mask, vec_rhs);
2121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 if (i == 0)
2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 else
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 prev_stmt_info = vinfo_for_stmt (new_stmt);
2127 }
2128 }
2129 else
2130 {
2131 tree vec_mask = NULL_TREE;
2132 prev_stmt_info = NULL;
2133 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2134 for (i = 0; i < ncopies; i++)
2135 {
2136 unsigned align, misalign;
2137
2138 if (i == 0)
2139 {
2140 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2141 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2142 NULL_TREE, &dummy, gsi,
2143 &ptr_incr, false, &inv_p);
2144 gcc_assert (!inv_p);
2145 }
2146 else
2147 {
2148 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2149 &def, &dt);
2150 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2151 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2152 TYPE_SIZE_UNIT (vectype));
2153 }
2154
2155 align = TYPE_ALIGN_UNIT (vectype);
2156 if (aligned_access_p (dr))
2157 misalign = 0;
2158 else if (DR_MISALIGNMENT (dr) == -1)
2159 {
2160 align = TYPE_ALIGN_UNIT (elem_type);
2161 misalign = 0;
2162 }
2163 else
2164 misalign = DR_MISALIGNMENT (dr);
2165 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2166 misalign);
2167 new_stmt
2168 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2169 gimple_call_arg (stmt, 1),
2170 vec_mask);
b731b390 2171 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2173 if (i == 0)
2174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2175 else
2176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177 prev_stmt_info = vinfo_for_stmt (new_stmt);
2178 }
2179 }
2180
3efe2e2c
JJ
2181 if (!is_store)
2182 {
2183 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184 from the IL. */
2185 tree lhs = gimple_call_lhs (stmt);
2186 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2187 set_vinfo_for_stmt (new_stmt, stmt_info);
2188 set_vinfo_for_stmt (stmt, NULL);
2189 STMT_VINFO_STMT (stmt_info) = new_stmt;
2190 gsi_replace (gsi, new_stmt, true);
2191 }
2192
5ce9450f
JJ
2193 return true;
2194}
2195
2196
ebfd146a
IR
2197/* Function vectorizable_call.
2198
538dd0b7 2199 Check if GS performs a function call that can be vectorized.
b8698a0f 2200 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2201 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2203
2204static bool
538dd0b7 2205vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
190c2236 2206 slp_tree slp_node)
ebfd146a 2207{
538dd0b7 2208 gcall *stmt;
ebfd146a
IR
2209 tree vec_dest;
2210 tree scalar_dest;
2211 tree op, type;
2212 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2213 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2214 tree vectype_out, vectype_in;
2215 int nunits_in;
2216 int nunits_out;
2217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2219 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2220 gimple def_stmt;
0502fb85
UB
2221 enum vect_def_type dt[3]
2222 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2223 gimple new_stmt = NULL;
ebfd146a 2224 int ncopies, j;
6e1aa848 2225 vec<tree> vargs = vNULL;
ebfd146a
IR
2226 enum { NARROW, NONE, WIDEN } modifier;
2227 size_t i, nargs;
9d5e7640 2228 tree lhs;
ebfd146a 2229
190c2236 2230 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2231 return false;
2232
8644a673 2233 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2234 return false;
2235
538dd0b7
DM
2236 /* Is GS a vectorizable call? */
2237 stmt = dyn_cast <gcall *> (gs);
2238 if (!stmt)
ebfd146a
IR
2239 return false;
2240
5ce9450f
JJ
2241 if (gimple_call_internal_p (stmt)
2242 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2243 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2244 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2245 slp_node);
2246
0136f8f0
AH
2247 if (gimple_call_lhs (stmt) == NULL_TREE
2248 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2249 return false;
2250
0136f8f0 2251 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2252
b690cc0f
RG
2253 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254
ebfd146a
IR
2255 /* Process function arguments. */
2256 rhs_type = NULL_TREE;
b690cc0f 2257 vectype_in = NULL_TREE;
ebfd146a
IR
2258 nargs = gimple_call_num_args (stmt);
2259
1b1562a5
MM
2260 /* Bail out if the function has more than three arguments, we do not have
2261 interesting builtin functions to vectorize with more than two arguments
2262 except for fma. No arguments is also not good. */
2263 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2264 return false;
2265
74bf76ed
JJ
2266 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2267 if (gimple_call_internal_p (stmt)
2268 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269 {
2270 nargs = 0;
2271 rhs_type = unsigned_type_node;
2272 }
2273
ebfd146a
IR
2274 for (i = 0; i < nargs; i++)
2275 {
b690cc0f
RG
2276 tree opvectype;
2277
ebfd146a
IR
2278 op = gimple_call_arg (stmt, i);
2279
2280 /* We can only handle calls with arguments of the same type. */
2281 if (rhs_type
8533c9d8 2282 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2283 {
73fbfcad 2284 if (dump_enabled_p ())
78c60e3d 2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2286 "argument types differ.\n");
ebfd146a
IR
2287 return false;
2288 }
b690cc0f
RG
2289 if (!rhs_type)
2290 rhs_type = TREE_TYPE (op);
ebfd146a 2291
24ee1384 2292 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2293 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2294 {
73fbfcad 2295 if (dump_enabled_p ())
78c60e3d 2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2297 "use not simple.\n");
ebfd146a
IR
2298 return false;
2299 }
ebfd146a 2300
b690cc0f
RG
2301 if (!vectype_in)
2302 vectype_in = opvectype;
2303 else if (opvectype
2304 && opvectype != vectype_in)
2305 {
73fbfcad 2306 if (dump_enabled_p ())
78c60e3d 2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2308 "argument vector types differ.\n");
b690cc0f
RG
2309 return false;
2310 }
2311 }
2312 /* If all arguments are external or constant defs use a vector type with
2313 the same size as the output vector type. */
ebfd146a 2314 if (!vectype_in)
b690cc0f 2315 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2316 if (vec_stmt)
2317 gcc_assert (vectype_in);
2318 if (!vectype_in)
2319 {
73fbfcad 2320 if (dump_enabled_p ())
7d8930a0 2321 {
78c60e3d
SS
2322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2323 "no vectype for scalar type ");
2324 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2326 }
2327
2328 return false;
2329 }
ebfd146a
IR
2330
2331 /* FORNOW */
b690cc0f
RG
2332 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2334 if (nunits_in == nunits_out / 2)
2335 modifier = NARROW;
2336 else if (nunits_out == nunits_in)
2337 modifier = NONE;
2338 else if (nunits_out == nunits_in / 2)
2339 modifier = WIDEN;
2340 else
2341 return false;
2342
2343 /* For now, we only vectorize functions if a target specific builtin
2344 is available. TODO -- in some cases, it might be profitable to
2345 insert the calls for pieces of the vector, in order to be able
2346 to vectorize other operations in the loop. */
2347 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2348 if (fndecl == NULL_TREE)
2349 {
74bf76ed
JJ
2350 if (gimple_call_internal_p (stmt)
2351 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2352 && !slp_node
2353 && loop_vinfo
2354 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2355 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2356 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2357 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358 {
2359 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360 { 0, 1, 2, ... vf - 1 } vector. */
2361 gcc_assert (nargs == 0);
2362 }
2363 else
2364 {
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2367 "function is not vectorizable.\n");
74bf76ed
JJ
2368 return false;
2369 }
ebfd146a
IR
2370 }
2371
5006671f 2372 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2373
190c2236
JJ
2374 if (slp_node || PURE_SLP_STMT (stmt_info))
2375 ncopies = 1;
2376 else if (modifier == NARROW)
ebfd146a
IR
2377 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2378 else
2379 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380
2381 /* Sanity check: make sure that at least one copy of the vectorized stmt
2382 needs to be generated. */
2383 gcc_assert (ncopies >= 1);
2384
2385 if (!vec_stmt) /* transformation not required. */
2386 {
2387 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2388 if (dump_enabled_p ())
e645e942
TJ
2389 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2390 "\n");
c3e7ee41 2391 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2392 return true;
2393 }
2394
2395 /** Transform. **/
2396
73fbfcad 2397 if (dump_enabled_p ())
e645e942 2398 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2399
2400 /* Handle def. */
2401 scalar_dest = gimple_call_lhs (stmt);
2402 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403
2404 prev_stmt_info = NULL;
2405 switch (modifier)
2406 {
2407 case NONE:
2408 for (j = 0; j < ncopies; ++j)
2409 {
2410 /* Build argument list for the vectorized call. */
2411 if (j == 0)
9771b263 2412 vargs.create (nargs);
ebfd146a 2413 else
9771b263 2414 vargs.truncate (0);
ebfd146a 2415
190c2236
JJ
2416 if (slp_node)
2417 {
ef062b13 2418 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2419 vec<tree> vec_oprnds0;
190c2236
JJ
2420
2421 for (i = 0; i < nargs; i++)
9771b263 2422 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2423 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2424 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2425
2426 /* Arguments are ready. Create the new vector stmt. */
9771b263 2427 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2428 {
2429 size_t k;
2430 for (k = 0; k < nargs; k++)
2431 {
37b5ec8f 2432 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2433 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2434 }
2435 new_stmt = gimple_build_call_vec (fndecl, vargs);
2436 new_temp = make_ssa_name (vec_dest, new_stmt);
2437 gimple_call_set_lhs (new_stmt, new_temp);
2438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2440 }
2441
2442 for (i = 0; i < nargs; i++)
2443 {
37b5ec8f 2444 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2445 vec_oprndsi.release ();
190c2236 2446 }
190c2236
JJ
2447 continue;
2448 }
2449
ebfd146a
IR
2450 for (i = 0; i < nargs; i++)
2451 {
2452 op = gimple_call_arg (stmt, i);
2453 if (j == 0)
2454 vec_oprnd0
2455 = vect_get_vec_def_for_operand (op, stmt, NULL);
2456 else
63827fb8
IR
2457 {
2458 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2459 vec_oprnd0
2460 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2461 }
ebfd146a 2462
9771b263 2463 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2464 }
2465
74bf76ed
JJ
2466 if (gimple_call_internal_p (stmt)
2467 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468 {
2469 tree *v = XALLOCAVEC (tree, nunits_out);
2470 int k;
2471 for (k = 0; k < nunits_out; ++k)
2472 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2473 tree cst = build_vector (vectype_out, v);
2474 tree new_var
2475 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2476 gimple init_stmt = gimple_build_assign (new_var, cst);
2477 new_temp = make_ssa_name (new_var, init_stmt);
2478 gimple_assign_set_lhs (init_stmt, new_temp);
2479 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2480 new_temp = make_ssa_name (vec_dest);
74bf76ed
JJ
2481 new_stmt = gimple_build_assign (new_temp,
2482 gimple_assign_lhs (init_stmt));
2483 }
2484 else
2485 {
2486 new_stmt = gimple_build_call_vec (fndecl, vargs);
2487 new_temp = make_ssa_name (vec_dest, new_stmt);
2488 gimple_call_set_lhs (new_stmt, new_temp);
2489 }
ebfd146a
IR
2490 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491
2492 if (j == 0)
2493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2494 else
2495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496
2497 prev_stmt_info = vinfo_for_stmt (new_stmt);
2498 }
2499
2500 break;
2501
2502 case NARROW:
2503 for (j = 0; j < ncopies; ++j)
2504 {
2505 /* Build argument list for the vectorized call. */
2506 if (j == 0)
9771b263 2507 vargs.create (nargs * 2);
ebfd146a 2508 else
9771b263 2509 vargs.truncate (0);
ebfd146a 2510
190c2236
JJ
2511 if (slp_node)
2512 {
ef062b13 2513 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2514 vec<tree> vec_oprnds0;
190c2236
JJ
2515
2516 for (i = 0; i < nargs; i++)
9771b263 2517 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2518 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2519 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2520
2521 /* Arguments are ready. Create the new vector stmt. */
9771b263 2522 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2523 {
2524 size_t k;
9771b263 2525 vargs.truncate (0);
190c2236
JJ
2526 for (k = 0; k < nargs; k++)
2527 {
37b5ec8f 2528 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2529 vargs.quick_push (vec_oprndsk[i]);
2530 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2531 }
2532 new_stmt = gimple_build_call_vec (fndecl, vargs);
2533 new_temp = make_ssa_name (vec_dest, new_stmt);
2534 gimple_call_set_lhs (new_stmt, new_temp);
2535 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2536 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2537 }
2538
2539 for (i = 0; i < nargs; i++)
2540 {
37b5ec8f 2541 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2542 vec_oprndsi.release ();
190c2236 2543 }
190c2236
JJ
2544 continue;
2545 }
2546
ebfd146a
IR
2547 for (i = 0; i < nargs; i++)
2548 {
2549 op = gimple_call_arg (stmt, i);
2550 if (j == 0)
2551 {
2552 vec_oprnd0
2553 = vect_get_vec_def_for_operand (op, stmt, NULL);
2554 vec_oprnd1
63827fb8 2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2556 }
2557 else
2558 {
336ecb65 2559 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2560 vec_oprnd0
63827fb8 2561 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2562 vec_oprnd1
63827fb8 2563 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2564 }
2565
9771b263
DN
2566 vargs.quick_push (vec_oprnd0);
2567 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2568 }
2569
2570 new_stmt = gimple_build_call_vec (fndecl, vargs);
2571 new_temp = make_ssa_name (vec_dest, new_stmt);
2572 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574
2575 if (j == 0)
2576 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577 else
2578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579
2580 prev_stmt_info = vinfo_for_stmt (new_stmt);
2581 }
2582
2583 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584
2585 break;
2586
2587 case WIDEN:
2588 /* No current target implements this case. */
2589 return false;
2590 }
2591
9771b263 2592 vargs.release ();
ebfd146a 2593
ebfd146a
IR
2594 /* The call in STMT might prevent it from being removed in dce.
2595 We however cannot remove it here, due to the way the ssa name
2596 it defines is mapped to the new definition. So just replace
2597 rhs of the statement with something harmless. */
2598
dd34c087
JJ
2599 if (slp_node)
2600 return true;
2601
ebfd146a 2602 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2603 if (is_pattern_stmt_p (stmt_info))
2604 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2605 else
2606 lhs = gimple_call_lhs (stmt);
2607 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2608 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2609 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2610 STMT_VINFO_STMT (stmt_info) = new_stmt;
2611 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2612
2613 return true;
2614}
2615
2616
0136f8f0
AH
2617struct simd_call_arg_info
2618{
2619 tree vectype;
2620 tree op;
2621 enum vect_def_type dt;
2622 HOST_WIDE_INT linear_step;
2623 unsigned int align;
2624};
2625
2626/* Function vectorizable_simd_clone_call.
2627
2628 Check if STMT performs a function call that can be vectorized
2629 by calling a simd clone of the function.
2630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2633
2634static bool
2635vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2636 gimple *vec_stmt, slp_tree slp_node)
2637{
2638 tree vec_dest;
2639 tree scalar_dest;
2640 tree op, type;
2641 tree vec_oprnd0 = NULL_TREE;
2642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2643 tree vectype;
2644 unsigned int nunits;
2645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2647 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2648 tree fndecl, new_temp, def;
2649 gimple def_stmt;
2650 gimple new_stmt = NULL;
2651 int ncopies, j;
2652 vec<simd_call_arg_info> arginfo = vNULL;
2653 vec<tree> vargs = vNULL;
2654 size_t i, nargs;
2655 tree lhs, rtype, ratype;
2656 vec<constructor_elt, va_gc> *ret_ctor_elts;
2657
2658 /* Is STMT a vectorizable call? */
2659 if (!is_gimple_call (stmt))
2660 return false;
2661
2662 fndecl = gimple_call_fndecl (stmt);
2663 if (fndecl == NULL_TREE)
2664 return false;
2665
d52f5295 2666 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2667 if (node == NULL || node->simd_clones == NULL)
2668 return false;
2669
2670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2671 return false;
2672
2673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2674 return false;
2675
2676 if (gimple_call_lhs (stmt)
2677 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2678 return false;
2679
2680 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681
2682 vectype = STMT_VINFO_VECTYPE (stmt_info);
2683
2684 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2685 return false;
2686
2687 /* FORNOW */
2688 if (slp_node || PURE_SLP_STMT (stmt_info))
2689 return false;
2690
2691 /* Process function arguments. */
2692 nargs = gimple_call_num_args (stmt);
2693
2694 /* Bail out if the function has zero arguments. */
2695 if (nargs == 0)
2696 return false;
2697
2698 arginfo.create (nargs);
2699
2700 for (i = 0; i < nargs; i++)
2701 {
2702 simd_call_arg_info thisarginfo;
2703 affine_iv iv;
2704
2705 thisarginfo.linear_step = 0;
2706 thisarginfo.align = 0;
2707 thisarginfo.op = NULL_TREE;
2708
2709 op = gimple_call_arg (stmt, i);
2710 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2711 &def_stmt, &def, &thisarginfo.dt,
2712 &thisarginfo.vectype)
2713 || thisarginfo.dt == vect_uninitialized_def)
2714 {
2715 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717 "use not simple.\n");
2718 arginfo.release ();
2719 return false;
2720 }
2721
2722 if (thisarginfo.dt == vect_constant_def
2723 || thisarginfo.dt == vect_external_def)
2724 gcc_assert (thisarginfo.vectype == NULL_TREE);
2725 else
2726 gcc_assert (thisarginfo.vectype != NULL_TREE);
2727
6c9e85fb
JJ
2728 /* For linear arguments, the analyze phase should have saved
2729 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2730 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2731 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732 {
2733 gcc_assert (vec_stmt);
2734 thisarginfo.linear_step
2735 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2736 thisarginfo.op
2737 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2738 /* If loop has been peeled for alignment, we need to adjust it. */
2739 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2740 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2741 if (n1 != n2)
2742 {
2743 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2744 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2745 tree opt = TREE_TYPE (thisarginfo.op);
2746 bias = fold_convert (TREE_TYPE (step), bias);
2747 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2748 thisarginfo.op
2749 = fold_build2 (POINTER_TYPE_P (opt)
2750 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2751 thisarginfo.op, bias);
2752 }
2753 }
2754 else if (!vec_stmt
2755 && thisarginfo.dt != vect_constant_def
2756 && thisarginfo.dt != vect_external_def
2757 && loop_vinfo
2758 && TREE_CODE (op) == SSA_NAME
2759 && simple_iv (loop, loop_containing_stmt (stmt), op,
2760 &iv, false)
2761 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2762 {
2763 thisarginfo.linear_step = tree_to_shwi (iv.step);
2764 thisarginfo.op = iv.base;
2765 }
2766 else if ((thisarginfo.dt == vect_constant_def
2767 || thisarginfo.dt == vect_external_def)
2768 && POINTER_TYPE_P (TREE_TYPE (op)))
2769 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770
2771 arginfo.quick_push (thisarginfo);
2772 }
2773
2774 unsigned int badness = 0;
2775 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2776 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2777 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2778 else
2779 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2780 n = n->simdclone->next_clone)
2781 {
2782 unsigned int this_badness = 0;
2783 if (n->simdclone->simdlen
2784 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2785 || n->simdclone->nargs != nargs)
2786 continue;
2787 if (n->simdclone->simdlen
2788 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 - exact_log2 (n->simdclone->simdlen)) * 1024;
2791 if (n->simdclone->inbranch)
2792 this_badness += 2048;
2793 int target_badness = targetm.simd_clone.usable (n);
2794 if (target_badness < 0)
2795 continue;
2796 this_badness += target_badness * 512;
2797 /* FORNOW: Have to add code to add the mask argument. */
2798 if (n->simdclone->inbranch)
2799 continue;
2800 for (i = 0; i < nargs; i++)
2801 {
2802 switch (n->simdclone->args[i].arg_type)
2803 {
2804 case SIMD_CLONE_ARG_TYPE_VECTOR:
2805 if (!useless_type_conversion_p
2806 (n->simdclone->args[i].orig_type,
2807 TREE_TYPE (gimple_call_arg (stmt, i))))
2808 i = -1;
2809 else if (arginfo[i].dt == vect_constant_def
2810 || arginfo[i].dt == vect_external_def
2811 || arginfo[i].linear_step)
2812 this_badness += 64;
2813 break;
2814 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2815 if (arginfo[i].dt != vect_constant_def
2816 && arginfo[i].dt != vect_external_def)
2817 i = -1;
2818 break;
2819 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2820 if (arginfo[i].dt == vect_constant_def
2821 || arginfo[i].dt == vect_external_def
2822 || (arginfo[i].linear_step
2823 != n->simdclone->args[i].linear_step))
2824 i = -1;
2825 break;
2826 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2827 /* FORNOW */
2828 i = -1;
2829 break;
2830 case SIMD_CLONE_ARG_TYPE_MASK:
2831 gcc_unreachable ();
2832 }
2833 if (i == (size_t) -1)
2834 break;
2835 if (n->simdclone->args[i].alignment > arginfo[i].align)
2836 {
2837 i = -1;
2838 break;
2839 }
2840 if (arginfo[i].align)
2841 this_badness += (exact_log2 (arginfo[i].align)
2842 - exact_log2 (n->simdclone->args[i].alignment));
2843 }
2844 if (i == (size_t) -1)
2845 continue;
2846 if (bestn == NULL || this_badness < badness)
2847 {
2848 bestn = n;
2849 badness = this_badness;
2850 }
2851 }
2852
2853 if (bestn == NULL)
2854 {
2855 arginfo.release ();
2856 return false;
2857 }
2858
2859 for (i = 0; i < nargs; i++)
2860 if ((arginfo[i].dt == vect_constant_def
2861 || arginfo[i].dt == vect_external_def)
2862 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863 {
2864 arginfo[i].vectype
2865 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2866 i)));
2867 if (arginfo[i].vectype == NULL
2868 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2869 > bestn->simdclone->simdlen))
2870 {
2871 arginfo.release ();
2872 return false;
2873 }
2874 }
2875
2876 fndecl = bestn->decl;
2877 nunits = bestn->simdclone->simdlen;
2878 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879
2880 /* If the function isn't const, only allow it in simd loops where user
2881 has asserted that at least nunits consecutive iterations can be
2882 performed using SIMD instructions. */
2883 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2884 && gimple_vuse (stmt))
2885 {
2886 arginfo.release ();
2887 return false;
2888 }
2889
2890 /* Sanity check: make sure that at least one copy of the vectorized stmt
2891 needs to be generated. */
2892 gcc_assert (ncopies >= 1);
2893
2894 if (!vec_stmt) /* transformation not required. */
2895 {
6c9e85fb
JJ
2896 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2897 for (i = 0; i < nargs; i++)
2898 if (bestn->simdclone->args[i].arg_type
2899 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900 {
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2902 + 1);
2903 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2904 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2905 ? size_type_node : TREE_TYPE (arginfo[i].op);
2906 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908 }
0136f8f0
AH
2909 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2910 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_NOTE, vect_location,
2912 "=== vectorizable_simd_clone_call ===\n");
2913/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914 arginfo.release ();
2915 return true;
2916 }
2917
2918 /** Transform. **/
2919
2920 if (dump_enabled_p ())
2921 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922
2923 /* Handle def. */
2924 scalar_dest = gimple_call_lhs (stmt);
2925 vec_dest = NULL_TREE;
2926 rtype = NULL_TREE;
2927 ratype = NULL_TREE;
2928 if (scalar_dest)
2929 {
2930 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2931 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2932 if (TREE_CODE (rtype) == ARRAY_TYPE)
2933 {
2934 ratype = rtype;
2935 rtype = TREE_TYPE (ratype);
2936 }
2937 }
2938
2939 prev_stmt_info = NULL;
2940 for (j = 0; j < ncopies; ++j)
2941 {
2942 /* Build argument list for the vectorized call. */
2943 if (j == 0)
2944 vargs.create (nargs);
2945 else
2946 vargs.truncate (0);
2947
2948 for (i = 0; i < nargs; i++)
2949 {
2950 unsigned int k, l, m, o;
2951 tree atype;
2952 op = gimple_call_arg (stmt, i);
2953 switch (bestn->simdclone->args[i].arg_type)
2954 {
2955 case SIMD_CLONE_ARG_TYPE_VECTOR:
2956 atype = bestn->simdclone->args[i].vector_type;
2957 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2958 for (m = j * o; m < (j + 1) * o; m++)
2959 {
2960 if (TYPE_VECTOR_SUBPARTS (atype)
2961 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962 {
2963 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2964 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2965 / TYPE_VECTOR_SUBPARTS (atype));
2966 gcc_assert ((k & (k - 1)) == 0);
2967 if (m == 0)
2968 vec_oprnd0
2969 = vect_get_vec_def_for_operand (op, stmt, NULL);
2970 else
2971 {
2972 vec_oprnd0 = arginfo[i].op;
2973 if ((m & (k - 1)) == 0)
2974 vec_oprnd0
2975 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2976 vec_oprnd0);
2977 }
2978 arginfo[i].op = vec_oprnd0;
2979 vec_oprnd0
2980 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2981 size_int (prec),
2982 bitsize_int ((m & (k - 1)) * prec));
2983 new_stmt
b731b390 2984 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
2985 vec_oprnd0);
2986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987 vargs.safe_push (gimple_assign_lhs (new_stmt));
2988 }
2989 else
2990 {
2991 k = (TYPE_VECTOR_SUBPARTS (atype)
2992 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2993 gcc_assert ((k & (k - 1)) == 0);
2994 vec<constructor_elt, va_gc> *ctor_elts;
2995 if (k != 1)
2996 vec_alloc (ctor_elts, k);
2997 else
2998 ctor_elts = NULL;
2999 for (l = 0; l < k; l++)
3000 {
3001 if (m == 0 && l == 0)
3002 vec_oprnd0
3003 = vect_get_vec_def_for_operand (op, stmt, NULL);
3004 else
3005 vec_oprnd0
3006 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3007 arginfo[i].op);
3008 arginfo[i].op = vec_oprnd0;
3009 if (k == 1)
3010 break;
3011 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3012 vec_oprnd0);
3013 }
3014 if (k == 1)
3015 vargs.safe_push (vec_oprnd0);
3016 else
3017 {
3018 vec_oprnd0 = build_constructor (atype, ctor_elts);
3019 new_stmt
b731b390 3020 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3021 vec_oprnd0);
3022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3023 vargs.safe_push (gimple_assign_lhs (new_stmt));
3024 }
3025 }
3026 }
3027 break;
3028 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3029 vargs.safe_push (op);
3030 break;
3031 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3032 if (j == 0)
3033 {
3034 gimple_seq stmts;
3035 arginfo[i].op
3036 = force_gimple_operand (arginfo[i].op, &stmts, true,
3037 NULL_TREE);
3038 if (stmts != NULL)
3039 {
3040 basic_block new_bb;
3041 edge pe = loop_preheader_edge (loop);
3042 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3043 gcc_assert (!new_bb);
3044 }
b731b390 3045 tree phi_res = copy_ssa_name (op);
538dd0b7 3046 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0
AH
3047 set_vinfo_for_stmt (new_phi,
3048 new_stmt_vec_info (new_phi, loop_vinfo,
3049 NULL));
3050 add_phi_arg (new_phi, arginfo[i].op,
3051 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3052 enum tree_code code
3053 = POINTER_TYPE_P (TREE_TYPE (op))
3054 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3055 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3056 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3057 widest_int cst
3058 = wi::mul (bestn->simdclone->args[i].linear_step,
3059 ncopies * nunits);
3060 tree tcst = wide_int_to_tree (type, cst);
b731b390 3061 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3062 new_stmt
3063 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3064 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3065 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3066 set_vinfo_for_stmt (new_stmt,
3067 new_stmt_vec_info (new_stmt, loop_vinfo,
3068 NULL));
3069 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3070 UNKNOWN_LOCATION);
3071 arginfo[i].op = phi_res;
3072 vargs.safe_push (phi_res);
3073 }
3074 else
3075 {
3076 enum tree_code code
3077 = POINTER_TYPE_P (TREE_TYPE (op))
3078 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3079 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3080 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3081 widest_int cst
3082 = wi::mul (bestn->simdclone->args[i].linear_step,
3083 j * nunits);
3084 tree tcst = wide_int_to_tree (type, cst);
b731b390 3085 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3086 new_stmt = gimple_build_assign (new_temp, code,
3087 arginfo[i].op, tcst);
0136f8f0
AH
3088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3089 vargs.safe_push (new_temp);
3090 }
3091 break;
3092 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3093 default:
3094 gcc_unreachable ();
3095 }
3096 }
3097
3098 new_stmt = gimple_build_call_vec (fndecl, vargs);
3099 if (vec_dest)
3100 {
3101 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3102 if (ratype)
b731b390 3103 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3104 else if (TYPE_VECTOR_SUBPARTS (vectype)
3105 == TYPE_VECTOR_SUBPARTS (rtype))
3106 new_temp = make_ssa_name (vec_dest, new_stmt);
3107 else
3108 new_temp = make_ssa_name (rtype, new_stmt);
3109 gimple_call_set_lhs (new_stmt, new_temp);
3110 }
3111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112
3113 if (vec_dest)
3114 {
3115 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116 {
3117 unsigned int k, l;
3118 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3119 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3120 gcc_assert ((k & (k - 1)) == 0);
3121 for (l = 0; l < k; l++)
3122 {
3123 tree t;
3124 if (ratype)
3125 {
3126 t = build_fold_addr_expr (new_temp);
3127 t = build2 (MEM_REF, vectype, t,
3128 build_int_cst (TREE_TYPE (t),
3129 l * prec / BITS_PER_UNIT));
3130 }
3131 else
3132 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3133 size_int (prec), bitsize_int (l * prec));
3134 new_stmt
b731b390 3135 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3137 if (j == 0 && l == 0)
3138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3139 else
3140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141
3142 prev_stmt_info = vinfo_for_stmt (new_stmt);
3143 }
3144
3145 if (ratype)
3146 {
3147 tree clobber = build_constructor (ratype, NULL);
3148 TREE_THIS_VOLATILE (clobber) = 1;
3149 new_stmt = gimple_build_assign (new_temp, clobber);
3150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151 }
3152 continue;
3153 }
3154 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155 {
3156 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3157 / TYPE_VECTOR_SUBPARTS (rtype));
3158 gcc_assert ((k & (k - 1)) == 0);
3159 if ((j & (k - 1)) == 0)
3160 vec_alloc (ret_ctor_elts, k);
3161 if (ratype)
3162 {
3163 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3164 for (m = 0; m < o; m++)
3165 {
3166 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3167 size_int (m), NULL_TREE, NULL_TREE);
3168 new_stmt
b731b390 3169 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3171 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3172 gimple_assign_lhs (new_stmt));
3173 }
3174 tree clobber = build_constructor (ratype, NULL);
3175 TREE_THIS_VOLATILE (clobber) = 1;
3176 new_stmt = gimple_build_assign (new_temp, clobber);
3177 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178 }
3179 else
3180 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3181 if ((j & (k - 1)) != k - 1)
3182 continue;
3183 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3184 new_stmt
b731b390 3185 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187
3188 if ((unsigned) j == k - 1)
3189 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3190 else
3191 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192
3193 prev_stmt_info = vinfo_for_stmt (new_stmt);
3194 continue;
3195 }
3196 else if (ratype)
3197 {
3198 tree t = build_fold_addr_expr (new_temp);
3199 t = build2 (MEM_REF, vectype, t,
3200 build_int_cst (TREE_TYPE (t), 0));
3201 new_stmt
b731b390 3202 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3204 tree clobber = build_constructor (ratype, NULL);
3205 TREE_THIS_VOLATILE (clobber) = 1;
3206 vect_finish_stmt_generation (stmt,
3207 gimple_build_assign (new_temp,
3208 clobber), gsi);
3209 }
3210 }
3211
3212 if (j == 0)
3213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3214 else
3215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216
3217 prev_stmt_info = vinfo_for_stmt (new_stmt);
3218 }
3219
3220 vargs.release ();
3221
3222 /* The call in STMT might prevent it from being removed in dce.
3223 We however cannot remove it here, due to the way the ssa name
3224 it defines is mapped to the new definition. So just replace
3225 rhs of the statement with something harmless. */
3226
3227 if (slp_node)
3228 return true;
3229
3230 if (scalar_dest)
3231 {
3232 type = TREE_TYPE (scalar_dest);
3233 if (is_pattern_stmt_p (stmt_info))
3234 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3235 else
3236 lhs = gimple_call_lhs (stmt);
3237 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238 }
3239 else
3240 new_stmt = gimple_build_nop ();
3241 set_vinfo_for_stmt (new_stmt, stmt_info);
3242 set_vinfo_for_stmt (stmt, NULL);
3243 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3244 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3245 unlink_stmt_vdef (stmt);
3246
3247 return true;
3248}
3249
3250
ebfd146a
IR
3251/* Function vect_gen_widened_results_half
3252
3253 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3254 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3255 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3256 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257 needs to be created (DECL is a function-decl of a target-builtin).
3258 STMT is the original scalar stmt that we are vectorizing. */
3259
3260static gimple
3261vect_gen_widened_results_half (enum tree_code code,
3262 tree decl,
3263 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3264 tree vec_dest, gimple_stmt_iterator *gsi,
3265 gimple stmt)
b8698a0f 3266{
ebfd146a 3267 gimple new_stmt;
b8698a0f
L
3268 tree new_temp;
3269
3270 /* Generate half of the widened result: */
3271 if (code == CALL_EXPR)
3272 {
3273 /* Target specific support */
ebfd146a
IR
3274 if (op_type == binary_op)
3275 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3276 else
3277 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3278 new_temp = make_ssa_name (vec_dest, new_stmt);
3279 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3280 }
3281 else
ebfd146a 3282 {
b8698a0f
L
3283 /* Generic support */
3284 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3285 if (op_type != binary_op)
3286 vec_oprnd1 = NULL;
0d0e4a03 3287 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3288 new_temp = make_ssa_name (vec_dest, new_stmt);
3289 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3290 }
ebfd146a
IR
3291 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292
ebfd146a
IR
3293 return new_stmt;
3294}
3295
4a00c761
JJ
3296
3297/* Get vectorized definitions for loop-based vectorization. For the first
3298 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299 scalar operand), and for the rest we get a copy with
3300 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302 The vectors are collected into VEC_OPRNDS. */
3303
3304static void
3305vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3306 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3307{
3308 tree vec_oprnd;
3309
3310 /* Get first vector operand. */
3311 /* All the vector operands except the very first one (that is scalar oprnd)
3312 are stmt copies. */
3313 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3314 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3315 else
3316 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317
9771b263 3318 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3319
3320 /* Get second vector operand. */
3321 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3322 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3323
3324 *oprnd = vec_oprnd;
3325
3326 /* For conversion in multiple steps, continue to get operands
3327 recursively. */
3328 if (multi_step_cvt)
3329 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3330}
3331
3332
3333/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334 For multi-step conversions store the resulting vectors and call the function
3335 recursively. */
3336
3337static void
9771b263 3338vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3339 int multi_step_cvt, gimple stmt,
9771b263 3340 vec<tree> vec_dsts,
4a00c761
JJ
3341 gimple_stmt_iterator *gsi,
3342 slp_tree slp_node, enum tree_code code,
3343 stmt_vec_info *prev_stmt_info)
3344{
3345 unsigned int i;
3346 tree vop0, vop1, new_tmp, vec_dest;
3347 gimple new_stmt;
3348 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349
9771b263 3350 vec_dest = vec_dsts.pop ();
4a00c761 3351
9771b263 3352 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3353 {
3354 /* Create demotion operation. */
9771b263
DN
3355 vop0 = (*vec_oprnds)[i];
3356 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3357 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3358 new_tmp = make_ssa_name (vec_dest, new_stmt);
3359 gimple_assign_set_lhs (new_stmt, new_tmp);
3360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361
3362 if (multi_step_cvt)
3363 /* Store the resulting vector for next recursive call. */
9771b263 3364 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3365 else
3366 {
3367 /* This is the last step of the conversion sequence. Store the
3368 vectors in SLP_NODE or in vector info of the scalar statement
3369 (or in STMT_VINFO_RELATED_STMT chain). */
3370 if (slp_node)
9771b263 3371 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3372 else
3373 {
3374 if (!*prev_stmt_info)
3375 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376 else
3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378
3379 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3380 }
3381 }
3382 }
3383
3384 /* For multi-step demotion operations we first generate demotion operations
3385 from the source type to the intermediate types, and then combine the
3386 results (stored in VEC_OPRNDS) in demotion operation to the destination
3387 type. */
3388 if (multi_step_cvt)
3389 {
3390 /* At each level of recursion we have half of the operands we had at the
3391 previous level. */
9771b263 3392 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3393 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3394 stmt, vec_dsts, gsi, slp_node,
3395 VEC_PACK_TRUNC_EXPR,
3396 prev_stmt_info);
3397 }
3398
9771b263 3399 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3400}
3401
3402
3403/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3405 the resulting vectors and call the function recursively. */
3406
3407static void
9771b263
DN
3408vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3409 vec<tree> *vec_oprnds1,
4a00c761
JJ
3410 gimple stmt, tree vec_dest,
3411 gimple_stmt_iterator *gsi,
3412 enum tree_code code1,
3413 enum tree_code code2, tree decl1,
3414 tree decl2, int op_type)
3415{
3416 int i;
3417 tree vop0, vop1, new_tmp1, new_tmp2;
3418 gimple new_stmt1, new_stmt2;
6e1aa848 3419 vec<tree> vec_tmp = vNULL;
4a00c761 3420
9771b263
DN
3421 vec_tmp.create (vec_oprnds0->length () * 2);
3422 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3423 {
3424 if (op_type == binary_op)
9771b263 3425 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3426 else
3427 vop1 = NULL_TREE;
3428
3429 /* Generate the two halves of promotion operation. */
3430 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3431 op_type, vec_dest, gsi, stmt);
3432 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3433 op_type, vec_dest, gsi, stmt);
3434 if (is_gimple_call (new_stmt1))
3435 {
3436 new_tmp1 = gimple_call_lhs (new_stmt1);
3437 new_tmp2 = gimple_call_lhs (new_stmt2);
3438 }
3439 else
3440 {
3441 new_tmp1 = gimple_assign_lhs (new_stmt1);
3442 new_tmp2 = gimple_assign_lhs (new_stmt2);
3443 }
3444
3445 /* Store the results for the next step. */
9771b263
DN
3446 vec_tmp.quick_push (new_tmp1);
3447 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3448 }
3449
689eaba3 3450 vec_oprnds0->release ();
4a00c761
JJ
3451 *vec_oprnds0 = vec_tmp;
3452}
3453
3454
b8698a0f
L
3455/* Check if STMT performs a conversion operation, that can be vectorized.
3456 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3457 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3458 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3459
3460static bool
3461vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3462 gimple *vec_stmt, slp_tree slp_node)
3463{
3464 tree vec_dest;
3465 tree scalar_dest;
4a00c761 3466 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3467 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3468 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3469 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3470 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3471 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3472 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3473 tree new_temp;
3474 tree def;
3475 gimple def_stmt;
3476 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3477 gimple new_stmt = NULL;
3478 stmt_vec_info prev_stmt_info;
3479 int nunits_in;
3480 int nunits_out;
3481 tree vectype_out, vectype_in;
4a00c761
JJ
3482 int ncopies, i, j;
3483 tree lhs_type, rhs_type;
ebfd146a 3484 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3485 vec<tree> vec_oprnds0 = vNULL;
3486 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3487 tree vop0;
4a00c761
JJ
3488 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3489 int multi_step_cvt = 0;
6e1aa848
DN
3490 vec<tree> vec_dsts = vNULL;
3491 vec<tree> interm_types = vNULL;
4a00c761
JJ
3492 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3493 int op_type;
ef4bddc2 3494 machine_mode rhs_mode;
4a00c761 3495 unsigned short fltsz;
ebfd146a
IR
3496
3497 /* Is STMT a vectorizable conversion? */
3498
4a00c761 3499 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3500 return false;
3501
8644a673 3502 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3503 return false;
3504
3505 if (!is_gimple_assign (stmt))
3506 return false;
3507
3508 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509 return false;
3510
3511 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3512 if (!CONVERT_EXPR_CODE_P (code)
3513 && code != FIX_TRUNC_EXPR
3514 && code != FLOAT_EXPR
3515 && code != WIDEN_MULT_EXPR
3516 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3517 return false;
3518
4a00c761
JJ
3519 op_type = TREE_CODE_LENGTH (code);
3520
ebfd146a 3521 /* Check types of lhs and rhs. */
b690cc0f 3522 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3523 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3524 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525
ebfd146a
IR
3526 op0 = gimple_assign_rhs1 (stmt);
3527 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3528
3529 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3530 && !((INTEGRAL_TYPE_P (lhs_type)
3531 && INTEGRAL_TYPE_P (rhs_type))
3532 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3533 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3534 return false;
3535
3536 if ((INTEGRAL_TYPE_P (lhs_type)
3537 && (TYPE_PRECISION (lhs_type)
3538 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3539 || (INTEGRAL_TYPE_P (rhs_type)
3540 && (TYPE_PRECISION (rhs_type)
3541 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542 {
73fbfcad 3543 if (dump_enabled_p ())
78c60e3d 3544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3545 "type conversion to/from bit-precision unsupported."
3546 "\n");
4a00c761
JJ
3547 return false;
3548 }
3549
b690cc0f 3550 /* Check the operands of the operation. */
24ee1384 3551 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3552 &def_stmt, &def, &dt[0], &vectype_in))
3553 {
73fbfcad 3554 if (dump_enabled_p ())
78c60e3d 3555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3556 "use not simple.\n");
b690cc0f
RG
3557 return false;
3558 }
4a00c761
JJ
3559 if (op_type == binary_op)
3560 {
3561 bool ok;
3562
3563 op1 = gimple_assign_rhs2 (stmt);
3564 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3565 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566 OP1. */
3567 if (CONSTANT_CLASS_P (op0))
f5709183 3568 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3569 &def_stmt, &def, &dt[1], &vectype_in);
3570 else
f5709183 3571 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3572 &def, &dt[1]);
4a00c761
JJ
3573
3574 if (!ok)
3575 {
73fbfcad 3576 if (dump_enabled_p ())
78c60e3d 3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3578 "use not simple.\n");
4a00c761
JJ
3579 return false;
3580 }
3581 }
3582
b690cc0f
RG
3583 /* If op0 is an external or constant defs use a vector type of
3584 the same size as the output vector type. */
ebfd146a 3585 if (!vectype_in)
b690cc0f 3586 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3587 if (vec_stmt)
3588 gcc_assert (vectype_in);
3589 if (!vectype_in)
3590 {
73fbfcad 3591 if (dump_enabled_p ())
4a00c761 3592 {
78c60e3d
SS
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "no vectype for scalar type ");
3595 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3596 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3597 }
7d8930a0
IR
3598
3599 return false;
3600 }
ebfd146a 3601
b690cc0f
RG
3602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3603 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3604 if (nunits_in < nunits_out)
ebfd146a
IR
3605 modifier = NARROW;
3606 else if (nunits_out == nunits_in)
3607 modifier = NONE;
ebfd146a 3608 else
4a00c761 3609 modifier = WIDEN;
ebfd146a 3610
ff802fa1
IR
3611 /* Multiple types in SLP are handled by creating the appropriate number of
3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3613 case of SLP. */
437f4a00 3614 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3615 ncopies = 1;
4a00c761
JJ
3616 else if (modifier == NARROW)
3617 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3618 else
3619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3620
ebfd146a
IR
3621 /* Sanity check: make sure that at least one copy of the vectorized stmt
3622 needs to be generated. */
3623 gcc_assert (ncopies >= 1);
3624
ebfd146a 3625 /* Supportable by target? */
4a00c761 3626 switch (modifier)
ebfd146a 3627 {
4a00c761
JJ
3628 case NONE:
3629 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3630 return false;
3631 if (supportable_convert_operation (code, vectype_out, vectype_in,
3632 &decl1, &code1))
3633 break;
3634 /* FALLTHRU */
3635 unsupported:
73fbfcad 3636 if (dump_enabled_p ())
78c60e3d 3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3638 "conversion not supported by target.\n");
ebfd146a 3639 return false;
ebfd146a 3640
4a00c761
JJ
3641 case WIDEN:
3642 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3643 &code1, &code2, &multi_step_cvt,
3644 &interm_types))
4a00c761
JJ
3645 {
3646 /* Binary widening operation can only be supported directly by the
3647 architecture. */
3648 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3649 break;
3650 }
3651
3652 if (code != FLOAT_EXPR
3653 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3654 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3655 goto unsupported;
3656
3657 rhs_mode = TYPE_MODE (rhs_type);
3658 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3659 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3660 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3661 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662 {
3663 cvt_type
3664 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3665 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3666 if (cvt_type == NULL_TREE)
3667 goto unsupported;
3668
3669 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670 {
3671 if (!supportable_convert_operation (code, vectype_out,
3672 cvt_type, &decl1, &codecvt1))
3673 goto unsupported;
3674 }
3675 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3676 cvt_type, &codecvt1,
3677 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3678 &interm_types))
3679 continue;
3680 else
3681 gcc_assert (multi_step_cvt == 0);
3682
3683 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3684 vectype_in, &code1, &code2,
3685 &multi_step_cvt, &interm_types))
4a00c761
JJ
3686 break;
3687 }
3688
3689 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3690 goto unsupported;
3691
3692 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3693 codecvt2 = ERROR_MARK;
3694 else
3695 {
3696 multi_step_cvt++;
9771b263 3697 interm_types.safe_push (cvt_type);
4a00c761
JJ
3698 cvt_type = NULL_TREE;
3699 }
3700 break;
3701
3702 case NARROW:
3703 gcc_assert (op_type == unary_op);
3704 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3705 &code1, &multi_step_cvt,
3706 &interm_types))
3707 break;
3708
3709 if (code != FIX_TRUNC_EXPR
3710 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3711 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3712 goto unsupported;
3713
3714 rhs_mode = TYPE_MODE (rhs_type);
3715 cvt_type
3716 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3717 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3718 if (cvt_type == NULL_TREE)
3719 goto unsupported;
3720 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3721 &decl1, &codecvt1))
3722 goto unsupported;
3723 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3724 &code1, &multi_step_cvt,
3725 &interm_types))
3726 break;
3727 goto unsupported;
3728
3729 default:
3730 gcc_unreachable ();
ebfd146a
IR
3731 }
3732
3733 if (!vec_stmt) /* transformation not required. */
3734 {
73fbfcad 3735 if (dump_enabled_p ())
78c60e3d 3736 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3737 "=== vectorizable_conversion ===\n");
4a00c761 3738 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3739 {
3740 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3741 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3742 }
4a00c761
JJ
3743 else if (modifier == NARROW)
3744 {
3745 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3746 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3747 }
3748 else
3749 {
3750 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3751 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3752 }
9771b263 3753 interm_types.release ();
ebfd146a
IR
3754 return true;
3755 }
3756
3757 /** Transform. **/
73fbfcad 3758 if (dump_enabled_p ())
78c60e3d 3759 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3760 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3761
4a00c761
JJ
3762 if (op_type == binary_op)
3763 {
3764 if (CONSTANT_CLASS_P (op0))
3765 op0 = fold_convert (TREE_TYPE (op1), op0);
3766 else if (CONSTANT_CLASS_P (op1))
3767 op1 = fold_convert (TREE_TYPE (op0), op1);
3768 }
3769
3770 /* In case of multi-step conversion, we first generate conversion operations
3771 to the intermediate types, and then from that types to the final one.
3772 We create vector destinations for the intermediate type (TYPES) received
3773 from supportable_*_operation, and store them in the correct order
3774 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3775 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3776 vec_dest = vect_create_destination_var (scalar_dest,
3777 (cvt_type && modifier == WIDEN)
3778 ? cvt_type : vectype_out);
9771b263 3779 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3780
3781 if (multi_step_cvt)
3782 {
9771b263
DN
3783 for (i = interm_types.length () - 1;
3784 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3785 {
3786 vec_dest = vect_create_destination_var (scalar_dest,
3787 intermediate_type);
9771b263 3788 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3789 }
3790 }
ebfd146a 3791
4a00c761 3792 if (cvt_type)
82294ec1
JJ
3793 vec_dest = vect_create_destination_var (scalar_dest,
3794 modifier == WIDEN
3795 ? vectype_out : cvt_type);
4a00c761
JJ
3796
3797 if (!slp_node)
3798 {
30862efc 3799 if (modifier == WIDEN)
4a00c761 3800 {
c3284718 3801 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3802 if (op_type == binary_op)
9771b263 3803 vec_oprnds1.create (1);
4a00c761 3804 }
30862efc 3805 else if (modifier == NARROW)
9771b263
DN
3806 vec_oprnds0.create (
3807 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3808 }
3809 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3810 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3811
4a00c761 3812 last_oprnd = op0;
ebfd146a
IR
3813 prev_stmt_info = NULL;
3814 switch (modifier)
3815 {
3816 case NONE:
3817 for (j = 0; j < ncopies; j++)
3818 {
ebfd146a 3819 if (j == 0)
d092494c
IR
3820 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3821 -1);
ebfd146a
IR
3822 else
3823 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3824
9771b263 3825 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3826 {
3827 /* Arguments are ready, create the new vector stmt. */
3828 if (code1 == CALL_EXPR)
3829 {
3830 new_stmt = gimple_build_call (decl1, 1, vop0);
3831 new_temp = make_ssa_name (vec_dest, new_stmt);
3832 gimple_call_set_lhs (new_stmt, new_temp);
3833 }
3834 else
3835 {
3836 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3837 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3838 new_temp = make_ssa_name (vec_dest, new_stmt);
3839 gimple_assign_set_lhs (new_stmt, new_temp);
3840 }
3841
3842 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843 if (slp_node)
9771b263 3844 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3845 }
3846
ebfd146a
IR
3847 if (j == 0)
3848 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3849 else
3850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3851 prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 }
3853 break;
3854
3855 case WIDEN:
3856 /* In case the vectorization factor (VF) is bigger than the number
3857 of elements that we can fit in a vectype (nunits), we have to
3858 generate more than one vector stmt - i.e - we need to "unroll"
3859 the vector stmt by a factor VF/nunits. */
3860 for (j = 0; j < ncopies; j++)
3861 {
4a00c761 3862 /* Handle uses. */
ebfd146a 3863 if (j == 0)
4a00c761
JJ
3864 {
3865 if (slp_node)
3866 {
3867 if (code == WIDEN_LSHIFT_EXPR)
3868 {
3869 unsigned int k;
ebfd146a 3870
4a00c761
JJ
3871 vec_oprnd1 = op1;
3872 /* Store vec_oprnd1 for every vector stmt to be created
3873 for SLP_NODE. We check during the analysis that all
3874 the shift arguments are the same. */
3875 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3876 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3877
3878 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3879 slp_node, -1);
3880 }
3881 else
3882 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3883 &vec_oprnds1, slp_node, -1);
3884 }
3885 else
3886 {
3887 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3888 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3889 if (op_type == binary_op)
3890 {
3891 if (code == WIDEN_LSHIFT_EXPR)
3892 vec_oprnd1 = op1;
3893 else
3894 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3895 NULL);
9771b263 3896 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3897 }
3898 }
3899 }
ebfd146a 3900 else
4a00c761
JJ
3901 {
3902 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3903 vec_oprnds0.truncate (0);
3904 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3905 if (op_type == binary_op)
3906 {
3907 if (code == WIDEN_LSHIFT_EXPR)
3908 vec_oprnd1 = op1;
3909 else
3910 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3911 vec_oprnd1);
9771b263
DN
3912 vec_oprnds1.truncate (0);
3913 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3914 }
3915 }
ebfd146a 3916
4a00c761
JJ
3917 /* Arguments are ready. Create the new vector stmts. */
3918 for (i = multi_step_cvt; i >= 0; i--)
3919 {
9771b263 3920 tree this_dest = vec_dsts[i];
4a00c761
JJ
3921 enum tree_code c1 = code1, c2 = code2;
3922 if (i == 0 && codecvt2 != ERROR_MARK)
3923 {
3924 c1 = codecvt1;
3925 c2 = codecvt2;
3926 }
3927 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3928 &vec_oprnds1,
3929 stmt, this_dest, gsi,
3930 c1, c2, decl1, decl2,
3931 op_type);
3932 }
3933
9771b263 3934 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3935 {
3936 if (cvt_type)
3937 {
3938 if (codecvt1 == CALL_EXPR)
3939 {
3940 new_stmt = gimple_build_call (decl1, 1, vop0);
3941 new_temp = make_ssa_name (vec_dest, new_stmt);
3942 gimple_call_set_lhs (new_stmt, new_temp);
3943 }
3944 else
3945 {
3946 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 3947 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
3948 new_stmt = gimple_build_assign (new_temp, codecvt1,
3949 vop0);
4a00c761
JJ
3950 }
3951
3952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953 }
3954 else
3955 new_stmt = SSA_NAME_DEF_STMT (vop0);
3956
3957 if (slp_node)
9771b263 3958 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3959 else
3960 {
3961 if (!prev_stmt_info)
3962 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3963 else
3964 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3965 prev_stmt_info = vinfo_for_stmt (new_stmt);
3966 }
3967 }
ebfd146a 3968 }
4a00c761
JJ
3969
3970 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3971 break;
3972
3973 case NARROW:
3974 /* In case the vectorization factor (VF) is bigger than the number
3975 of elements that we can fit in a vectype (nunits), we have to
3976 generate more than one vector stmt - i.e - we need to "unroll"
3977 the vector stmt by a factor VF/nunits. */
3978 for (j = 0; j < ncopies; j++)
3979 {
3980 /* Handle uses. */
4a00c761
JJ
3981 if (slp_node)
3982 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3983 slp_node, -1);
ebfd146a
IR
3984 else
3985 {
9771b263 3986 vec_oprnds0.truncate (0);
4a00c761
JJ
3987 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3988 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3989 }
3990
4a00c761
JJ
3991 /* Arguments are ready. Create the new vector stmts. */
3992 if (cvt_type)
9771b263 3993 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3994 {
3995 if (codecvt1 == CALL_EXPR)
3996 {
3997 new_stmt = gimple_build_call (decl1, 1, vop0);
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_call_set_lhs (new_stmt, new_temp);
4000 }
4001 else
4002 {
4003 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4004 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4005 new_stmt = gimple_build_assign (new_temp, codecvt1,
4006 vop0);
4a00c761 4007 }
ebfd146a 4008
4a00c761 4009 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4010 vec_oprnds0[i] = new_temp;
4a00c761 4011 }
ebfd146a 4012
4a00c761
JJ
4013 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4014 stmt, vec_dsts, gsi,
4015 slp_node, code1,
4016 &prev_stmt_info);
ebfd146a
IR
4017 }
4018
4019 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4020 break;
ebfd146a
IR
4021 }
4022
9771b263
DN
4023 vec_oprnds0.release ();
4024 vec_oprnds1.release ();
4025 vec_dsts.release ();
4026 interm_types.release ();
ebfd146a
IR
4027
4028 return true;
4029}
ff802fa1
IR
4030
4031
ebfd146a
IR
4032/* Function vectorizable_assignment.
4033
b8698a0f
L
4034 Check if STMT performs an assignment (copy) that can be vectorized.
4035 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4036 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4037 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4038
4039static bool
4040vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4041 gimple *vec_stmt, slp_tree slp_node)
4042{
4043 tree vec_dest;
4044 tree scalar_dest;
4045 tree op;
4046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4047 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4048 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4049 tree new_temp;
4050 tree def;
4051 gimple def_stmt;
4052 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 4053 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 4054 int ncopies;
f18b55bd 4055 int i, j;
6e1aa848 4056 vec<tree> vec_oprnds = vNULL;
ebfd146a 4057 tree vop;
a70d6342 4058 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
4059 gimple new_stmt = NULL;
4060 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4061 enum tree_code code;
4062 tree vectype_in;
ebfd146a
IR
4063
4064 /* Multiple types in SLP are handled by creating the appropriate number of
4065 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4066 case of SLP. */
437f4a00 4067 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4068 ncopies = 1;
4069 else
4070 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4071
4072 gcc_assert (ncopies >= 1);
ebfd146a 4073
a70d6342 4074 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4075 return false;
4076
8644a673 4077 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4078 return false;
4079
4080 /* Is vectorizable assignment? */
4081 if (!is_gimple_assign (stmt))
4082 return false;
4083
4084 scalar_dest = gimple_assign_lhs (stmt);
4085 if (TREE_CODE (scalar_dest) != SSA_NAME)
4086 return false;
4087
fde9c428 4088 code = gimple_assign_rhs_code (stmt);
ebfd146a 4089 if (gimple_assign_single_p (stmt)
fde9c428
RG
4090 || code == PAREN_EXPR
4091 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4092 op = gimple_assign_rhs1 (stmt);
4093 else
4094 return false;
4095
7b7ec6c5
RG
4096 if (code == VIEW_CONVERT_EXPR)
4097 op = TREE_OPERAND (op, 0);
4098
24ee1384 4099 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 4100 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 4101 {
73fbfcad 4102 if (dump_enabled_p ())
78c60e3d 4103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4104 "use not simple.\n");
ebfd146a
IR
4105 return false;
4106 }
4107
fde9c428
RG
4108 /* We can handle NOP_EXPR conversions that do not change the number
4109 of elements or the vector size. */
7b7ec6c5
RG
4110 if ((CONVERT_EXPR_CODE_P (code)
4111 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4112 && (!vectype_in
4113 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4114 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4115 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4116 return false;
4117
7b7b1813
RG
4118 /* We do not handle bit-precision changes. */
4119 if ((CONVERT_EXPR_CODE_P (code)
4120 || code == VIEW_CONVERT_EXPR)
4121 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4122 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4124 || ((TYPE_PRECISION (TREE_TYPE (op))
4125 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4126 /* But a conversion that does not change the bit-pattern is ok. */
4127 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4128 > TYPE_PRECISION (TREE_TYPE (op)))
4129 && TYPE_UNSIGNED (TREE_TYPE (op))))
4130 {
73fbfcad 4131 if (dump_enabled_p ())
78c60e3d
SS
4132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4133 "type conversion to/from bit-precision "
e645e942 4134 "unsupported.\n");
7b7b1813
RG
4135 return false;
4136 }
4137
ebfd146a
IR
4138 if (!vec_stmt) /* transformation not required. */
4139 {
4140 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4141 if (dump_enabled_p ())
78c60e3d 4142 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4143 "=== vectorizable_assignment ===\n");
c3e7ee41 4144 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4145 return true;
4146 }
4147
4148 /** Transform. **/
73fbfcad 4149 if (dump_enabled_p ())
e645e942 4150 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4151
4152 /* Handle def. */
4153 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4154
4155 /* Handle use. */
f18b55bd 4156 for (j = 0; j < ncopies; j++)
ebfd146a 4157 {
f18b55bd
IR
4158 /* Handle uses. */
4159 if (j == 0)
d092494c 4160 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4161 else
4162 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4163
4164 /* Arguments are ready. create the new vector stmt. */
9771b263 4165 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4166 {
7b7ec6c5
RG
4167 if (CONVERT_EXPR_CODE_P (code)
4168 || code == VIEW_CONVERT_EXPR)
4a73490d 4169 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4170 new_stmt = gimple_build_assign (vec_dest, vop);
4171 new_temp = make_ssa_name (vec_dest, new_stmt);
4172 gimple_assign_set_lhs (new_stmt, new_temp);
4173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4174 if (slp_node)
9771b263 4175 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4176 }
ebfd146a
IR
4177
4178 if (slp_node)
f18b55bd
IR
4179 continue;
4180
4181 if (j == 0)
4182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4183 else
4184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185
4186 prev_stmt_info = vinfo_for_stmt (new_stmt);
4187 }
b8698a0f 4188
9771b263 4189 vec_oprnds.release ();
ebfd146a
IR
4190 return true;
4191}
4192
9dc3f7de 4193
1107f3ae
IR
4194/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4195 either as shift by a scalar or by a vector. */
4196
4197bool
4198vect_supportable_shift (enum tree_code code, tree scalar_type)
4199{
4200
ef4bddc2 4201 machine_mode vec_mode;
1107f3ae
IR
4202 optab optab;
4203 int icode;
4204 tree vectype;
4205
4206 vectype = get_vectype_for_scalar_type (scalar_type);
4207 if (!vectype)
4208 return false;
4209
4210 optab = optab_for_tree_code (code, vectype, optab_scalar);
4211 if (!optab
4212 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4213 {
4214 optab = optab_for_tree_code (code, vectype, optab_vector);
4215 if (!optab
4216 || (optab_handler (optab, TYPE_MODE (vectype))
4217 == CODE_FOR_nothing))
4218 return false;
4219 }
4220
4221 vec_mode = TYPE_MODE (vectype);
4222 icode = (int) optab_handler (optab, vec_mode);
4223 if (icode == CODE_FOR_nothing)
4224 return false;
4225
4226 return true;
4227}
4228
4229
9dc3f7de
IR
4230/* Function vectorizable_shift.
4231
4232 Check if STMT performs a shift operation that can be vectorized.
4233 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4234 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4236
4237static bool
4238vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4239 gimple *vec_stmt, slp_tree slp_node)
4240{
4241 tree vec_dest;
4242 tree scalar_dest;
4243 tree op0, op1 = NULL;
4244 tree vec_oprnd1 = NULL_TREE;
4245 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4246 tree vectype;
4247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4248 enum tree_code code;
ef4bddc2 4249 machine_mode vec_mode;
9dc3f7de
IR
4250 tree new_temp;
4251 optab optab;
4252 int icode;
ef4bddc2 4253 machine_mode optab_op2_mode;
9dc3f7de
IR
4254 tree def;
4255 gimple def_stmt;
4256 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4257 gimple new_stmt = NULL;
4258 stmt_vec_info prev_stmt_info;
4259 int nunits_in;
4260 int nunits_out;
4261 tree vectype_out;
cede2577 4262 tree op1_vectype;
9dc3f7de
IR
4263 int ncopies;
4264 int j, i;
6e1aa848
DN
4265 vec<tree> vec_oprnds0 = vNULL;
4266 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4267 tree vop0, vop1;
4268 unsigned int k;
49eab32e 4269 bool scalar_shift_arg = true;
9dc3f7de
IR
4270 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4271 int vf;
4272
4273 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4274 return false;
4275
4276 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4277 return false;
4278
4279 /* Is STMT a vectorizable binary/unary operation? */
4280 if (!is_gimple_assign (stmt))
4281 return false;
4282
4283 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4284 return false;
4285
4286 code = gimple_assign_rhs_code (stmt);
4287
4288 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4289 || code == RROTATE_EXPR))
4290 return false;
4291
4292 scalar_dest = gimple_assign_lhs (stmt);
4293 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4294 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4295 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4296 {
73fbfcad 4297 if (dump_enabled_p ())
78c60e3d 4298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4299 "bit-precision shifts not supported.\n");
7b7b1813
RG
4300 return false;
4301 }
9dc3f7de
IR
4302
4303 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4304 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4305 &def_stmt, &def, &dt[0], &vectype))
4306 {
73fbfcad 4307 if (dump_enabled_p ())
78c60e3d 4308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4309 "use not simple.\n");
9dc3f7de
IR
4310 return false;
4311 }
4312 /* If op0 is an external or constant def use a vector type with
4313 the same size as the output vector type. */
4314 if (!vectype)
4315 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4316 if (vec_stmt)
4317 gcc_assert (vectype);
4318 if (!vectype)
4319 {
73fbfcad 4320 if (dump_enabled_p ())
78c60e3d 4321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4322 "no vectype for scalar type\n");
9dc3f7de
IR
4323 return false;
4324 }
4325
4326 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4327 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4328 if (nunits_out != nunits_in)
4329 return false;
4330
4331 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4332 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4333 &def, &dt[1], &op1_vectype))
9dc3f7de 4334 {
73fbfcad 4335 if (dump_enabled_p ())
78c60e3d 4336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4337 "use not simple.\n");
9dc3f7de
IR
4338 return false;
4339 }
4340
4341 if (loop_vinfo)
4342 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4343 else
4344 vf = 1;
4345
4346 /* Multiple types in SLP are handled by creating the appropriate number of
4347 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4348 case of SLP. */
437f4a00 4349 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4350 ncopies = 1;
4351 else
4352 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4353
4354 gcc_assert (ncopies >= 1);
4355
4356 /* Determine whether the shift amount is a vector, or scalar. If the
4357 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4358
49eab32e
JJ
4359 if (dt[1] == vect_internal_def && !slp_node)
4360 scalar_shift_arg = false;
4361 else if (dt[1] == vect_constant_def
4362 || dt[1] == vect_external_def
4363 || dt[1] == vect_internal_def)
4364 {
4365 /* In SLP, need to check whether the shift count is the same,
4366 in loops if it is a constant or invariant, it is always
4367 a scalar shift. */
4368 if (slp_node)
4369 {
9771b263 4370 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4371 gimple slpstmt;
4372
9771b263 4373 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4374 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4375 scalar_shift_arg = false;
4376 }
4377 }
4378 else
4379 {
73fbfcad 4380 if (dump_enabled_p ())
78c60e3d 4381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4382 "operand mode requires invariant argument.\n");
49eab32e
JJ
4383 return false;
4384 }
4385
9dc3f7de 4386 /* Vector shifted by vector. */
49eab32e 4387 if (!scalar_shift_arg)
9dc3f7de
IR
4388 {
4389 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4390 if (dump_enabled_p ())
78c60e3d 4391 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4392 "vector/vector shift/rotate found.\n");
78c60e3d 4393
aa948027
JJ
4394 if (!op1_vectype)
4395 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4396 if (op1_vectype == NULL_TREE
4397 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4398 {
73fbfcad 4399 if (dump_enabled_p ())
78c60e3d
SS
4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401 "unusable type for last operand in"
e645e942 4402 " vector/vector shift/rotate.\n");
cede2577
JJ
4403 return false;
4404 }
9dc3f7de
IR
4405 }
4406 /* See if the machine has a vector shifted by scalar insn and if not
4407 then see if it has a vector shifted by vector insn. */
49eab32e 4408 else
9dc3f7de
IR
4409 {
4410 optab = optab_for_tree_code (code, vectype, optab_scalar);
4411 if (optab
4412 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4413 {
73fbfcad 4414 if (dump_enabled_p ())
78c60e3d 4415 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4416 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4417 }
4418 else
4419 {
4420 optab = optab_for_tree_code (code, vectype, optab_vector);
4421 if (optab
4422 && (optab_handler (optab, TYPE_MODE (vectype))
4423 != CODE_FOR_nothing))
4424 {
49eab32e
JJ
4425 scalar_shift_arg = false;
4426
73fbfcad 4427 if (dump_enabled_p ())
78c60e3d 4428 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4429 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4430
4431 /* Unlike the other binary operators, shifts/rotates have
4432 the rhs being int, instead of the same type as the lhs,
4433 so make sure the scalar is the right type if we are
aa948027 4434 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4435 if (dt[1] == vect_constant_def)
4436 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4437 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4438 TREE_TYPE (op1)))
4439 {
4440 if (slp_node
4441 && TYPE_MODE (TREE_TYPE (vectype))
4442 != TYPE_MODE (TREE_TYPE (op1)))
4443 {
73fbfcad 4444 if (dump_enabled_p ())
78c60e3d
SS
4445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4446 "unusable type for last operand in"
e645e942 4447 " vector/vector shift/rotate.\n");
aa948027
JJ
4448 return false;
4449 }
4450 if (vec_stmt && !slp_node)
4451 {
4452 op1 = fold_convert (TREE_TYPE (vectype), op1);
4453 op1 = vect_init_vector (stmt, op1,
4454 TREE_TYPE (vectype), NULL);
4455 }
4456 }
9dc3f7de
IR
4457 }
4458 }
4459 }
9dc3f7de
IR
4460
4461 /* Supportable by target? */
4462 if (!optab)
4463 {
73fbfcad 4464 if (dump_enabled_p ())
78c60e3d 4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4466 "no optab.\n");
9dc3f7de
IR
4467 return false;
4468 }
4469 vec_mode = TYPE_MODE (vectype);
4470 icode = (int) optab_handler (optab, vec_mode);
4471 if (icode == CODE_FOR_nothing)
4472 {
73fbfcad 4473 if (dump_enabled_p ())
78c60e3d 4474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4475 "op not supported by target.\n");
9dc3f7de
IR
4476 /* Check only during analysis. */
4477 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4478 || (vf < vect_min_worthwhile_factor (code)
4479 && !vec_stmt))
4480 return false;
73fbfcad 4481 if (dump_enabled_p ())
e645e942
TJ
4482 dump_printf_loc (MSG_NOTE, vect_location,
4483 "proceeding using word mode.\n");
9dc3f7de
IR
4484 }
4485
4486 /* Worthwhile without SIMD support? Check only during analysis. */
4487 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4488 && vf < vect_min_worthwhile_factor (code)
4489 && !vec_stmt)
4490 {
73fbfcad 4491 if (dump_enabled_p ())
78c60e3d 4492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4493 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4494 return false;
4495 }
4496
4497 if (!vec_stmt) /* transformation not required. */
4498 {
4499 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4500 if (dump_enabled_p ())
e645e942
TJ
4501 dump_printf_loc (MSG_NOTE, vect_location,
4502 "=== vectorizable_shift ===\n");
c3e7ee41 4503 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4504 return true;
4505 }
4506
4507 /** Transform. **/
4508
73fbfcad 4509 if (dump_enabled_p ())
78c60e3d 4510 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4511 "transform binary/unary operation.\n");
9dc3f7de
IR
4512
4513 /* Handle def. */
4514 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4515
9dc3f7de
IR
4516 prev_stmt_info = NULL;
4517 for (j = 0; j < ncopies; j++)
4518 {
4519 /* Handle uses. */
4520 if (j == 0)
4521 {
4522 if (scalar_shift_arg)
4523 {
4524 /* Vector shl and shr insn patterns can be defined with scalar
4525 operand 2 (shift operand). In this case, use constant or loop
4526 invariant op1 directly, without extending it to vector mode
4527 first. */
4528 optab_op2_mode = insn_data[icode].operand[2].mode;
4529 if (!VECTOR_MODE_P (optab_op2_mode))
4530 {
73fbfcad 4531 if (dump_enabled_p ())
78c60e3d 4532 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4533 "operand 1 using scalar mode.\n");
9dc3f7de 4534 vec_oprnd1 = op1;
8930f723 4535 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4536 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4537 if (slp_node)
4538 {
4539 /* Store vec_oprnd1 for every vector stmt to be created
4540 for SLP_NODE. We check during the analysis that all
4541 the shift arguments are the same.
4542 TODO: Allow different constants for different vector
4543 stmts generated for an SLP instance. */
4544 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4545 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4546 }
4547 }
4548 }
4549
4550 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4551 (a special case for certain kind of vector shifts); otherwise,
4552 operand 1 should be of a vector type (the usual case). */
4553 if (vec_oprnd1)
4554 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4555 slp_node, -1);
9dc3f7de
IR
4556 else
4557 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4558 slp_node, -1);
9dc3f7de
IR
4559 }
4560 else
4561 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4562
4563 /* Arguments are ready. Create the new vector stmt. */
9771b263 4564 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4565 {
9771b263 4566 vop1 = vec_oprnds1[i];
0d0e4a03 4567 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4568 new_temp = make_ssa_name (vec_dest, new_stmt);
4569 gimple_assign_set_lhs (new_stmt, new_temp);
4570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571 if (slp_node)
9771b263 4572 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4573 }
4574
4575 if (slp_node)
4576 continue;
4577
4578 if (j == 0)
4579 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4580 else
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582 prev_stmt_info = vinfo_for_stmt (new_stmt);
4583 }
4584
9771b263
DN
4585 vec_oprnds0.release ();
4586 vec_oprnds1.release ();
9dc3f7de
IR
4587
4588 return true;
4589}
4590
4591
ebfd146a
IR
4592/* Function vectorizable_operation.
4593
16949072
RG
4594 Check if STMT performs a binary, unary or ternary operation that can
4595 be vectorized.
b8698a0f 4596 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4597 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4598 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4599
4600static bool
4601vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4602 gimple *vec_stmt, slp_tree slp_node)
4603{
00f07b86 4604 tree vec_dest;
ebfd146a 4605 tree scalar_dest;
16949072 4606 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4607 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4608 tree vectype;
ebfd146a
IR
4609 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4610 enum tree_code code;
ef4bddc2 4611 machine_mode vec_mode;
ebfd146a
IR
4612 tree new_temp;
4613 int op_type;
00f07b86 4614 optab optab;
ebfd146a 4615 int icode;
ebfd146a
IR
4616 tree def;
4617 gimple def_stmt;
16949072
RG
4618 enum vect_def_type dt[3]
4619 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4620 gimple new_stmt = NULL;
4621 stmt_vec_info prev_stmt_info;
b690cc0f 4622 int nunits_in;
ebfd146a
IR
4623 int nunits_out;
4624 tree vectype_out;
4625 int ncopies;
4626 int j, i;
6e1aa848
DN
4627 vec<tree> vec_oprnds0 = vNULL;
4628 vec<tree> vec_oprnds1 = vNULL;
4629 vec<tree> vec_oprnds2 = vNULL;
16949072 4630 tree vop0, vop1, vop2;
a70d6342
IR
4631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4632 int vf;
4633
a70d6342 4634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4635 return false;
4636
8644a673 4637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4638 return false;
4639
4640 /* Is STMT a vectorizable binary/unary operation? */
4641 if (!is_gimple_assign (stmt))
4642 return false;
4643
4644 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4645 return false;
4646
ebfd146a
IR
4647 code = gimple_assign_rhs_code (stmt);
4648
4649 /* For pointer addition, we should use the normal plus for
4650 the vector addition. */
4651 if (code == POINTER_PLUS_EXPR)
4652 code = PLUS_EXPR;
4653
4654 /* Support only unary or binary operations. */
4655 op_type = TREE_CODE_LENGTH (code);
16949072 4656 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4657 {
73fbfcad 4658 if (dump_enabled_p ())
78c60e3d 4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4660 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4661 op_type);
ebfd146a
IR
4662 return false;
4663 }
4664
b690cc0f
RG
4665 scalar_dest = gimple_assign_lhs (stmt);
4666 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4667
7b7b1813
RG
4668 /* Most operations cannot handle bit-precision types without extra
4669 truncations. */
4670 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4671 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4672 /* Exception are bitwise binary operations. */
4673 && code != BIT_IOR_EXPR
4674 && code != BIT_XOR_EXPR
4675 && code != BIT_AND_EXPR)
4676 {
73fbfcad 4677 if (dump_enabled_p ())
78c60e3d 4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4679 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4680 return false;
4681 }
4682
ebfd146a 4683 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4684 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4685 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4686 {
73fbfcad 4687 if (dump_enabled_p ())
78c60e3d 4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4689 "use not simple.\n");
ebfd146a
IR
4690 return false;
4691 }
b690cc0f
RG
4692 /* If op0 is an external or constant def use a vector type with
4693 the same size as the output vector type. */
4694 if (!vectype)
4695 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4696 if (vec_stmt)
4697 gcc_assert (vectype);
4698 if (!vectype)
4699 {
73fbfcad 4700 if (dump_enabled_p ())
7d8930a0 4701 {
78c60e3d
SS
4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4703 "no vectype for scalar type ");
4704 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4705 TREE_TYPE (op0));
e645e942 4706 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4707 }
4708
4709 return false;
4710 }
b690cc0f
RG
4711
4712 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4713 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4714 if (nunits_out != nunits_in)
4715 return false;
ebfd146a 4716
16949072 4717 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4718 {
4719 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4720 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4721 &def, &dt[1]))
ebfd146a 4722 {
73fbfcad 4723 if (dump_enabled_p ())
78c60e3d 4724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4725 "use not simple.\n");
ebfd146a
IR
4726 return false;
4727 }
4728 }
16949072
RG
4729 if (op_type == ternary_op)
4730 {
4731 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4732 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4733 &def, &dt[2]))
16949072 4734 {
73fbfcad 4735 if (dump_enabled_p ())
78c60e3d 4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4737 "use not simple.\n");
16949072
RG
4738 return false;
4739 }
4740 }
ebfd146a 4741
b690cc0f
RG
4742 if (loop_vinfo)
4743 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4744 else
4745 vf = 1;
4746
4747 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4748 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4749 case of SLP. */
437f4a00 4750 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4751 ncopies = 1;
4752 else
4753 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4754
4755 gcc_assert (ncopies >= 1);
4756
9dc3f7de 4757 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4758 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4759 || code == RROTATE_EXPR)
9dc3f7de 4760 return false;
ebfd146a 4761
ebfd146a 4762 /* Supportable by target? */
00f07b86
RH
4763
4764 vec_mode = TYPE_MODE (vectype);
4765 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4766 {
00f07b86 4767 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4768 icode = LAST_INSN_CODE;
00f07b86
RH
4769 else
4770 icode = CODE_FOR_nothing;
ebfd146a 4771 }
00f07b86
RH
4772 else
4773 {
4774 optab = optab_for_tree_code (code, vectype, optab_default);
4775 if (!optab)
5deb57cb 4776 {
73fbfcad 4777 if (dump_enabled_p ())
78c60e3d 4778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4779 "no optab.\n");
00f07b86 4780 return false;
5deb57cb 4781 }
00f07b86 4782 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4783 }
4784
ebfd146a
IR
4785 if (icode == CODE_FOR_nothing)
4786 {
73fbfcad 4787 if (dump_enabled_p ())
78c60e3d 4788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4789 "op not supported by target.\n");
ebfd146a
IR
4790 /* Check only during analysis. */
4791 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4792 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4793 return false;
73fbfcad 4794 if (dump_enabled_p ())
e645e942
TJ
4795 dump_printf_loc (MSG_NOTE, vect_location,
4796 "proceeding using word mode.\n");
383d9c83
IR
4797 }
4798
4a00c761 4799 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4800 if (!VECTOR_MODE_P (vec_mode)
4801 && !vec_stmt
4802 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4803 {
73fbfcad 4804 if (dump_enabled_p ())
78c60e3d 4805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4806 "not worthwhile without SIMD support.\n");
e34842c6 4807 return false;
7d8930a0 4808 }
ebfd146a 4809
ebfd146a
IR
4810 if (!vec_stmt) /* transformation not required. */
4811 {
4a00c761 4812 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4813 if (dump_enabled_p ())
78c60e3d 4814 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4815 "=== vectorizable_operation ===\n");
c3e7ee41 4816 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4817 return true;
4818 }
4819
4820 /** Transform. **/
4821
73fbfcad 4822 if (dump_enabled_p ())
78c60e3d 4823 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4824 "transform binary/unary operation.\n");
383d9c83 4825
ebfd146a 4826 /* Handle def. */
00f07b86 4827 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4828
ebfd146a
IR
4829 /* In case the vectorization factor (VF) is bigger than the number
4830 of elements that we can fit in a vectype (nunits), we have to generate
4831 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4832 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4833 from one copy of the vector stmt to the next, in the field
4834 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4835 stages to find the correct vector defs to be used when vectorizing
4836 stmts that use the defs of the current stmt. The example below
4837 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4838 we need to create 4 vectorized stmts):
4839
4840 before vectorization:
4841 RELATED_STMT VEC_STMT
4842 S1: x = memref - -
4843 S2: z = x + 1 - -
4844
4845 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4846 there):
4847 RELATED_STMT VEC_STMT
4848 VS1_0: vx0 = memref0 VS1_1 -
4849 VS1_1: vx1 = memref1 VS1_2 -
4850 VS1_2: vx2 = memref2 VS1_3 -
4851 VS1_3: vx3 = memref3 - -
4852 S1: x = load - VS1_0
4853 S2: z = x + 1 - -
4854
4855 step2: vectorize stmt S2 (done here):
4856 To vectorize stmt S2 we first need to find the relevant vector
4857 def for the first operand 'x'. This is, as usual, obtained from
4858 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4859 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4860 relevant vector def 'vx0'. Having found 'vx0' we can generate
4861 the vector stmt VS2_0, and as usual, record it in the
4862 STMT_VINFO_VEC_STMT of stmt S2.
4863 When creating the second copy (VS2_1), we obtain the relevant vector
4864 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4865 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4866 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4867 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4868 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4869 chain of stmts and pointers:
4870 RELATED_STMT VEC_STMT
4871 VS1_0: vx0 = memref0 VS1_1 -
4872 VS1_1: vx1 = memref1 VS1_2 -
4873 VS1_2: vx2 = memref2 VS1_3 -
4874 VS1_3: vx3 = memref3 - -
4875 S1: x = load - VS1_0
4876 VS2_0: vz0 = vx0 + v1 VS2_1 -
4877 VS2_1: vz1 = vx1 + v1 VS2_2 -
4878 VS2_2: vz2 = vx2 + v1 VS2_3 -
4879 VS2_3: vz3 = vx3 + v1 - -
4880 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4881
4882 prev_stmt_info = NULL;
4883 for (j = 0; j < ncopies; j++)
4884 {
4885 /* Handle uses. */
4886 if (j == 0)
4a00c761
JJ
4887 {
4888 if (op_type == binary_op || op_type == ternary_op)
4889 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4890 slp_node, -1);
4891 else
4892 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4893 slp_node, -1);
4894 if (op_type == ternary_op)
36ba4aae 4895 {
9771b263
DN
4896 vec_oprnds2.create (1);
4897 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4898 stmt,
4899 NULL));
36ba4aae 4900 }
4a00c761 4901 }
ebfd146a 4902 else
4a00c761
JJ
4903 {
4904 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4905 if (op_type == ternary_op)
4906 {
9771b263
DN
4907 tree vec_oprnd = vec_oprnds2.pop ();
4908 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4909 vec_oprnd));
4a00c761
JJ
4910 }
4911 }
4912
4913 /* Arguments are ready. Create the new vector stmt. */
9771b263 4914 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4915 {
4a00c761 4916 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4917 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4918 vop2 = ((op_type == ternary_op)
9771b263 4919 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 4920 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
4921 new_temp = make_ssa_name (vec_dest, new_stmt);
4922 gimple_assign_set_lhs (new_stmt, new_temp);
4923 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4924 if (slp_node)
9771b263 4925 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4926 }
4927
4a00c761
JJ
4928 if (slp_node)
4929 continue;
4930
4931 if (j == 0)
4932 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4933 else
4934 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4935 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4936 }
4937
9771b263
DN
4938 vec_oprnds0.release ();
4939 vec_oprnds1.release ();
4940 vec_oprnds2.release ();
ebfd146a 4941
ebfd146a
IR
4942 return true;
4943}
4944
c716e67f
XDL
4945/* A helper function to ensure data reference DR's base alignment
4946 for STMT_INFO. */
4947
4948static void
4949ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4950{
4951 if (!dr->aux)
4952 return;
4953
4954 if (((dataref_aux *)dr->aux)->base_misaligned)
4955 {
4956 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4957 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4958
428f0c67
JH
4959 if (decl_in_symtab_p (base_decl))
4960 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4961 else
4962 {
4963 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4964 DECL_USER_ALIGN (base_decl) = 1;
4965 }
c716e67f
XDL
4966 ((dataref_aux *)dr->aux)->base_misaligned = false;
4967 }
4968}
4969
ebfd146a 4970
09dfa495
BM
4971/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4972 reversal of the vector elements. If that is impossible to do,
4973 returns NULL. */
4974
4975static tree
4976perm_mask_for_reverse (tree vectype)
4977{
4978 int i, nunits;
4979 unsigned char *sel;
4980
4981 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4982 sel = XALLOCAVEC (unsigned char, nunits);
4983
4984 for (i = 0; i < nunits; ++i)
4985 sel[i] = nunits - 1 - i;
4986
557be5a8
AL
4987 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4988 return NULL_TREE;
4989 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
4990}
4991
ebfd146a
IR
4992/* Function vectorizable_store.
4993
b8698a0f
L
4994 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4995 can be vectorized.
4996 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4997 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4998 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4999
5000static bool
5001vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5002 slp_tree slp_node)
ebfd146a
IR
5003{
5004 tree scalar_dest;
5005 tree data_ref;
5006 tree op;
5007 tree vec_oprnd = NULL_TREE;
5008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5010 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5011 tree elem_type;
ebfd146a 5012 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5013 struct loop *loop = NULL;
ef4bddc2 5014 machine_mode vec_mode;
ebfd146a
IR
5015 tree dummy;
5016 enum dr_alignment_support alignment_support_scheme;
5017 tree def;
5018 gimple def_stmt;
5019 enum vect_def_type dt;
5020 stmt_vec_info prev_stmt_info = NULL;
5021 tree dataref_ptr = NULL_TREE;
74bf76ed 5022 tree dataref_offset = NULL_TREE;
fef4d2b3 5023 gimple ptr_incr = NULL;
ebfd146a
IR
5024 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5025 int ncopies;
5026 int j;
5027 gimple next_stmt, first_stmt = NULL;
0d0293ac 5028 bool grouped_store = false;
272c6793 5029 bool store_lanes_p = false;
ebfd146a 5030 unsigned int group_size, i;
6e1aa848
DN
5031 vec<tree> dr_chain = vNULL;
5032 vec<tree> oprnds = vNULL;
5033 vec<tree> result_chain = vNULL;
ebfd146a 5034 bool inv_p;
09dfa495
BM
5035 bool negative = false;
5036 tree offset = NULL_TREE;
6e1aa848 5037 vec<tree> vec_oprnds = vNULL;
ebfd146a 5038 bool slp = (slp_node != NULL);
ebfd146a 5039 unsigned int vec_num;
a70d6342 5040 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 5041 tree aggr_type;
a70d6342
IR
5042
5043 if (loop_vinfo)
5044 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5045
5046 /* Multiple types in SLP are handled by creating the appropriate number of
5047 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5048 case of SLP. */
437f4a00 5049 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5050 ncopies = 1;
5051 else
5052 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5053
5054 gcc_assert (ncopies >= 1);
5055
5056 /* FORNOW. This restriction should be relaxed. */
a70d6342 5057 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 5058 {
73fbfcad 5059 if (dump_enabled_p ())
78c60e3d 5060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5061 "multiple types in nested loop.\n");
ebfd146a
IR
5062 return false;
5063 }
5064
a70d6342 5065 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5066 return false;
5067
8644a673 5068 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5069 return false;
5070
5071 /* Is vectorizable store? */
5072
5073 if (!is_gimple_assign (stmt))
5074 return false;
5075
5076 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5077 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5078 && is_pattern_stmt_p (stmt_info))
5079 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5080 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5081 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5082 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5083 && TREE_CODE (scalar_dest) != COMPONENT_REF
5084 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5085 && TREE_CODE (scalar_dest) != REALPART_EXPR
5086 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5087 return false;
5088
5089 gcc_assert (gimple_assign_single_p (stmt));
5090 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
5091 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5092 &def, &dt))
ebfd146a 5093 {
73fbfcad 5094 if (dump_enabled_p ())
78c60e3d 5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5096 "use not simple.\n");
ebfd146a
IR
5097 return false;
5098 }
5099
272c6793 5100 elem_type = TREE_TYPE (vectype);
ebfd146a 5101 vec_mode = TYPE_MODE (vectype);
7b7b1813 5102
ebfd146a
IR
5103 /* FORNOW. In some cases can vectorize even if data-type not supported
5104 (e.g. - array initialization with 0). */
947131ba 5105 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5106 return false;
5107
5108 if (!STMT_VINFO_DATA_REF (stmt_info))
5109 return false;
5110
09dfa495
BM
5111 negative =
5112 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5113 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5114 size_zero_node) < 0;
5115 if (negative && ncopies > 1)
a1e53f3f 5116 {
73fbfcad 5117 if (dump_enabled_p ())
78c60e3d 5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5119 "multiple types with negative step.\n");
a1e53f3f
L
5120 return false;
5121 }
5122
09dfa495
BM
5123 if (negative)
5124 {
5125 gcc_assert (!grouped_store);
5126 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127 if (alignment_support_scheme != dr_aligned
5128 && alignment_support_scheme != dr_unaligned_supported)
5129 {
5130 if (dump_enabled_p ())
5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5132 "negative step but alignment required.\n");
09dfa495
BM
5133 return false;
5134 }
f234d260
BM
5135 if (dt != vect_constant_def
5136 && dt != vect_external_def
5137 && !perm_mask_for_reverse (vectype))
09dfa495
BM
5138 {
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f234d260 5141 "negative step and reversing not supported.\n");
09dfa495
BM
5142 return false;
5143 }
5144 }
5145
0d0293ac 5146 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5147 {
0d0293ac 5148 grouped_store = true;
e14c1050 5149 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5150 if (!slp && !PURE_SLP_STMT (stmt_info))
5151 {
e14c1050 5152 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5153 if (vect_store_lanes_supported (vectype, group_size))
5154 store_lanes_p = true;
0d0293ac 5155 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5156 return false;
5157 }
b8698a0f 5158
ebfd146a
IR
5159 if (first_stmt == stmt)
5160 {
5161 /* STMT is the leader of the group. Check the operands of all the
5162 stmts of the group. */
e14c1050 5163 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5164 while (next_stmt)
5165 {
5166 gcc_assert (gimple_assign_single_p (next_stmt));
5167 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5168 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5169 &def_stmt, &def, &dt))
ebfd146a 5170 {
73fbfcad 5171 if (dump_enabled_p ())
78c60e3d 5172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5173 "use not simple.\n");
ebfd146a
IR
5174 return false;
5175 }
e14c1050 5176 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5177 }
5178 }
5179 }
5180
5181 if (!vec_stmt) /* transformation not required. */
5182 {
5183 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
5184 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5185 NULL, NULL, NULL);
ebfd146a
IR
5186 return true;
5187 }
5188
5189 /** Transform. **/
5190
c716e67f
XDL
5191 ensure_base_align (stmt_info, dr);
5192
0d0293ac 5193 if (grouped_store)
ebfd146a
IR
5194 {
5195 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5196 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5197
e14c1050 5198 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5199
5200 /* FORNOW */
a70d6342 5201 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5202
5203 /* We vectorize all the stmts of the interleaving group when we
5204 reach the last stmt in the group. */
e14c1050
IR
5205 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5206 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5207 && !slp)
5208 {
5209 *vec_stmt = NULL;
5210 return true;
5211 }
5212
5213 if (slp)
4b5caab7 5214 {
0d0293ac 5215 grouped_store = false;
4b5caab7
IR
5216 /* VEC_NUM is the number of vect stmts to be created for this
5217 group. */
5218 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5219 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5220 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5221 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5222 }
ebfd146a 5223 else
4b5caab7
IR
5224 /* VEC_NUM is the number of vect stmts to be created for this
5225 group. */
ebfd146a
IR
5226 vec_num = group_size;
5227 }
b8698a0f 5228 else
ebfd146a
IR
5229 {
5230 first_stmt = stmt;
5231 first_dr = dr;
5232 group_size = vec_num = 1;
ebfd146a 5233 }
b8698a0f 5234
73fbfcad 5235 if (dump_enabled_p ())
78c60e3d 5236 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5237 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5238
9771b263
DN
5239 dr_chain.create (group_size);
5240 oprnds.create (group_size);
ebfd146a 5241
720f5239 5242 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5243 gcc_assert (alignment_support_scheme);
272c6793
RS
5244 /* Targets with store-lane instructions must not require explicit
5245 realignment. */
5246 gcc_assert (!store_lanes_p
5247 || alignment_support_scheme == dr_aligned
5248 || alignment_support_scheme == dr_unaligned_supported);
5249
09dfa495
BM
5250 if (negative)
5251 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5252
272c6793
RS
5253 if (store_lanes_p)
5254 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5255 else
5256 aggr_type = vectype;
ebfd146a
IR
5257
5258 /* In case the vectorization factor (VF) is bigger than the number
5259 of elements that we can fit in a vectype (nunits), we have to generate
5260 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5261 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5262 vect_get_vec_def_for_copy_stmt. */
5263
0d0293ac 5264 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5265
5266 S1: &base + 2 = x2
5267 S2: &base = x0
5268 S3: &base + 1 = x1
5269 S4: &base + 3 = x3
5270
5271 We create vectorized stores starting from base address (the access of the
5272 first stmt in the chain (S2 in the above example), when the last store stmt
5273 of the chain (S4) is reached:
5274
5275 VS1: &base = vx2
5276 VS2: &base + vec_size*1 = vx0
5277 VS3: &base + vec_size*2 = vx1
5278 VS4: &base + vec_size*3 = vx3
5279
5280 Then permutation statements are generated:
5281
3fcc1b55
JJ
5282 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5283 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5284 ...
b8698a0f 5285
ebfd146a
IR
5286 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5287 (the order of the data-refs in the output of vect_permute_store_chain
5288 corresponds to the order of scalar stmts in the interleaving chain - see
5289 the documentation of vect_permute_store_chain()).
5290
5291 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5292 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5293 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5294 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5295 */
5296
5297 prev_stmt_info = NULL;
5298 for (j = 0; j < ncopies; j++)
5299 {
5300 gimple new_stmt;
ebfd146a
IR
5301
5302 if (j == 0)
5303 {
5304 if (slp)
5305 {
5306 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5307 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5308 NULL, slp_node, -1);
ebfd146a 5309
9771b263 5310 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5311 }
5312 else
5313 {
b8698a0f
L
5314 /* For interleaved stores we collect vectorized defs for all the
5315 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5316 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5317 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5318
0d0293ac 5319 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5320 OPRNDS are of size 1. */
b8698a0f 5321 next_stmt = first_stmt;
ebfd146a
IR
5322 for (i = 0; i < group_size; i++)
5323 {
b8698a0f
L
5324 /* Since gaps are not supported for interleaved stores,
5325 GROUP_SIZE is the exact number of stmts in the chain.
5326 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5327 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5328 iteration of the loop will be executed. */
5329 gcc_assert (next_stmt
5330 && gimple_assign_single_p (next_stmt));
5331 op = gimple_assign_rhs1 (next_stmt);
5332
b8698a0f 5333 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5334 NULL);
9771b263
DN
5335 dr_chain.quick_push (vec_oprnd);
5336 oprnds.quick_push (vec_oprnd);
e14c1050 5337 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5338 }
5339 }
5340
5341 /* We should have catched mismatched types earlier. */
5342 gcc_assert (useless_type_conversion_p (vectype,
5343 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5344 bool simd_lane_access_p
5345 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5346 if (simd_lane_access_p
5347 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5348 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5349 && integer_zerop (DR_OFFSET (first_dr))
5350 && integer_zerop (DR_INIT (first_dr))
5351 && alias_sets_conflict_p (get_alias_set (aggr_type),
5352 get_alias_set (DR_REF (first_dr))))
5353 {
5354 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5355 dataref_offset = build_int_cst (reference_alias_ptr_type
5356 (DR_REF (first_dr)), 0);
8928eff3 5357 inv_p = false;
74bf76ed
JJ
5358 }
5359 else
5360 dataref_ptr
5361 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5362 simd_lane_access_p ? loop : NULL,
09dfa495 5363 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5364 simd_lane_access_p, &inv_p);
a70d6342 5365 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5366 }
b8698a0f 5367 else
ebfd146a 5368 {
b8698a0f
L
5369 /* For interleaved stores we created vectorized defs for all the
5370 defs stored in OPRNDS in the previous iteration (previous copy).
5371 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5372 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5373 next copy.
0d0293ac 5374 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5375 OPRNDS are of size 1. */
5376 for (i = 0; i < group_size; i++)
5377 {
9771b263 5378 op = oprnds[i];
24ee1384
IR
5379 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5380 &def, &dt);
b8698a0f 5381 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5382 dr_chain[i] = vec_oprnd;
5383 oprnds[i] = vec_oprnd;
ebfd146a 5384 }
74bf76ed
JJ
5385 if (dataref_offset)
5386 dataref_offset
5387 = int_const_binop (PLUS_EXPR, dataref_offset,
5388 TYPE_SIZE_UNIT (aggr_type));
5389 else
5390 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5391 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5392 }
5393
272c6793 5394 if (store_lanes_p)
ebfd146a 5395 {
272c6793 5396 tree vec_array;
267d3070 5397
272c6793
RS
5398 /* Combine all the vectors into an array. */
5399 vec_array = create_vector_array (vectype, vec_num);
5400 for (i = 0; i < vec_num; i++)
c2d7ab2a 5401 {
9771b263 5402 vec_oprnd = dr_chain[i];
272c6793 5403 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5404 }
b8698a0f 5405
272c6793
RS
5406 /* Emit:
5407 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5408 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5409 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5410 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5412 }
5413 else
5414 {
5415 new_stmt = NULL;
0d0293ac 5416 if (grouped_store)
272c6793 5417 {
b6b9227d
JJ
5418 if (j == 0)
5419 result_chain.create (group_size);
272c6793
RS
5420 /* Permute. */
5421 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5422 &result_chain);
5423 }
c2d7ab2a 5424
272c6793
RS
5425 next_stmt = first_stmt;
5426 for (i = 0; i < vec_num; i++)
5427 {
644ffefd 5428 unsigned align, misalign;
272c6793
RS
5429
5430 if (i > 0)
5431 /* Bump the vector pointer. */
5432 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5433 stmt, NULL_TREE);
5434
5435 if (slp)
9771b263 5436 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5437 else if (grouped_store)
5438 /* For grouped stores vectorized defs are interleaved in
272c6793 5439 vect_permute_store_chain(). */
9771b263 5440 vec_oprnd = result_chain[i];
272c6793
RS
5441
5442 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
5443 dataref_offset
5444 ? dataref_offset
5445 : build_int_cst (reference_alias_ptr_type
5446 (DR_REF (first_dr)), 0));
644ffefd 5447 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5448 if (aligned_access_p (first_dr))
644ffefd 5449 misalign = 0;
272c6793
RS
5450 else if (DR_MISALIGNMENT (first_dr) == -1)
5451 {
5452 TREE_TYPE (data_ref)
5453 = build_aligned_type (TREE_TYPE (data_ref),
5454 TYPE_ALIGN (elem_type));
644ffefd
MJ
5455 align = TYPE_ALIGN_UNIT (elem_type);
5456 misalign = 0;
272c6793
RS
5457 }
5458 else
5459 {
5460 TREE_TYPE (data_ref)
5461 = build_aligned_type (TREE_TYPE (data_ref),
5462 TYPE_ALIGN (elem_type));
644ffefd 5463 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5464 }
74bf76ed
JJ
5465 if (dataref_offset == NULL_TREE)
5466 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5467 misalign);
c2d7ab2a 5468
f234d260
BM
5469 if (negative
5470 && dt != vect_constant_def
5471 && dt != vect_external_def)
09dfa495
BM
5472 {
5473 tree perm_mask = perm_mask_for_reverse (vectype);
5474 tree perm_dest
5475 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5476 vectype);
b731b390 5477 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
5478
5479 /* Generate the permute statement. */
5480 gimple perm_stmt
0d0e4a03
JJ
5481 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5482 vec_oprnd, perm_mask);
09dfa495
BM
5483 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5484
5485 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5486 vec_oprnd = new_temp;
5487 }
5488
272c6793
RS
5489 /* Arguments are ready. Create the new vector stmt. */
5490 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5492
5493 if (slp)
5494 continue;
5495
e14c1050 5496 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5497 if (!next_stmt)
5498 break;
5499 }
ebfd146a 5500 }
1da0876c
RS
5501 if (!slp)
5502 {
5503 if (j == 0)
5504 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5505 else
5506 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5507 prev_stmt_info = vinfo_for_stmt (new_stmt);
5508 }
ebfd146a
IR
5509 }
5510
9771b263
DN
5511 dr_chain.release ();
5512 oprnds.release ();
5513 result_chain.release ();
5514 vec_oprnds.release ();
ebfd146a
IR
5515
5516 return true;
5517}
5518
557be5a8
AL
5519/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5520 VECTOR_CST mask. No checks are made that the target platform supports the
5521 mask, so callers may wish to test can_vec_perm_p separately, or use
5522 vect_gen_perm_mask_checked. */
a1e53f3f 5523
3fcc1b55 5524tree
557be5a8 5525vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 5526{
d2a12ae7 5527 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5528 int i, nunits;
a1e53f3f 5529
22e4dee7 5530 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 5531
96f9265a
RG
5532 mask_elt_type = lang_hooks.types.type_for_mode
5533 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5534 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5535
d2a12ae7 5536 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5537 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5538 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5539 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5540
2635892a 5541 return mask_vec;
a1e53f3f
L
5542}
5543
cf7aa6a3
AL
5544/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5545 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
5546
5547tree
5548vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5549{
5550 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5551 return vect_gen_perm_mask_any (vectype, sel);
5552}
5553
aec7ae7d
JJ
5554/* Given a vector variable X and Y, that was generated for the scalar
5555 STMT, generate instructions to permute the vector elements of X and Y
5556 using permutation mask MASK_VEC, insert them at *GSI and return the
5557 permuted vector variable. */
a1e53f3f
L
5558
5559static tree
aec7ae7d
JJ
5560permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5561 gimple_stmt_iterator *gsi)
a1e53f3f
L
5562{
5563 tree vectype = TREE_TYPE (x);
aec7ae7d 5564 tree perm_dest, data_ref;
a1e53f3f
L
5565 gimple perm_stmt;
5566
acdcd61b 5567 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 5568 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
5569
5570 /* Generate the permute statement. */
0d0e4a03 5571 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
5572 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5573
5574 return data_ref;
5575}
5576
6b916b36
RB
5577/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5578 inserting them on the loops preheader edge. Returns true if we
5579 were successful in doing so (and thus STMT can be moved then),
5580 otherwise returns false. */
5581
5582static bool
5583hoist_defs_of_uses (gimple stmt, struct loop *loop)
5584{
5585 ssa_op_iter i;
5586 tree op;
5587 bool any = false;
5588
5589 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5590 {
5591 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5592 if (!gimple_nop_p (def_stmt)
5593 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5594 {
5595 /* Make sure we don't need to recurse. While we could do
5596 so in simple cases when there are more complex use webs
5597 we don't have an easy way to preserve stmt order to fulfil
5598 dependencies within them. */
5599 tree op2;
5600 ssa_op_iter i2;
d1417442
JJ
5601 if (gimple_code (def_stmt) == GIMPLE_PHI)
5602 return false;
6b916b36
RB
5603 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5604 {
5605 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5606 if (!gimple_nop_p (def_stmt2)
5607 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5608 return false;
5609 }
5610 any = true;
5611 }
5612 }
5613
5614 if (!any)
5615 return true;
5616
5617 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5618 {
5619 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5620 if (!gimple_nop_p (def_stmt)
5621 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5622 {
5623 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5624 gsi_remove (&gsi, false);
5625 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5626 }
5627 }
5628
5629 return true;
5630}
5631
ebfd146a
IR
5632/* vectorizable_load.
5633
b8698a0f
L
5634 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5635 can be vectorized.
5636 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5637 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5638 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5639
5640static bool
5641vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5642 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5643{
5644 tree scalar_dest;
5645 tree vec_dest = NULL;
5646 tree data_ref = NULL;
5647 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5648 stmt_vec_info prev_stmt_info;
ebfd146a 5649 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5650 struct loop *loop = NULL;
ebfd146a 5651 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5652 bool nested_in_vect_loop = false;
c716e67f 5653 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5654 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5655 tree elem_type;
ebfd146a 5656 tree new_temp;
ef4bddc2 5657 machine_mode mode;
ebfd146a
IR
5658 gimple new_stmt = NULL;
5659 tree dummy;
5660 enum dr_alignment_support alignment_support_scheme;
5661 tree dataref_ptr = NULL_TREE;
74bf76ed 5662 tree dataref_offset = NULL_TREE;
fef4d2b3 5663 gimple ptr_incr = NULL;
ebfd146a
IR
5664 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5665 int ncopies;
a64b9c26 5666 int i, j, group_size, group_gap;
ebfd146a
IR
5667 tree msq = NULL_TREE, lsq;
5668 tree offset = NULL_TREE;
356bbc4c 5669 tree byte_offset = NULL_TREE;
ebfd146a 5670 tree realignment_token = NULL_TREE;
538dd0b7 5671 gphi *phi = NULL;
6e1aa848 5672 vec<tree> dr_chain = vNULL;
0d0293ac 5673 bool grouped_load = false;
272c6793 5674 bool load_lanes_p = false;
ebfd146a 5675 gimple first_stmt;
ebfd146a 5676 bool inv_p;
319e6439 5677 bool negative = false;
ebfd146a
IR
5678 bool compute_in_loop = false;
5679 struct loop *at_loop;
5680 int vec_num;
5681 bool slp = (slp_node != NULL);
5682 bool slp_perm = false;
5683 enum tree_code code;
a70d6342
IR
5684 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5685 int vf;
272c6793 5686 tree aggr_type;
aec7ae7d
JJ
5687 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5688 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5689 int gather_scale = 1;
5690 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5691
5692 if (loop_vinfo)
5693 {
5694 loop = LOOP_VINFO_LOOP (loop_vinfo);
5695 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5696 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5697 }
5698 else
3533e503 5699 vf = 1;
ebfd146a
IR
5700
5701 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5702 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5703 case of SLP. */
437f4a00 5704 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5705 ncopies = 1;
5706 else
5707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5708
5709 gcc_assert (ncopies >= 1);
5710
5711 /* FORNOW. This restriction should be relaxed. */
5712 if (nested_in_vect_loop && ncopies > 1)
5713 {
73fbfcad 5714 if (dump_enabled_p ())
78c60e3d 5715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5716 "multiple types in nested loop.\n");
ebfd146a
IR
5717 return false;
5718 }
5719
f2556b68
RB
5720 /* Invalidate assumptions made by dependence analysis when vectorization
5721 on the unrolled body effectively re-orders stmts. */
5722 if (ncopies > 1
5723 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5724 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5725 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5726 {
5727 if (dump_enabled_p ())
5728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5729 "cannot perform implicit CSE when unrolling "
5730 "with negative dependence distance\n");
5731 return false;
5732 }
5733
a70d6342 5734 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5735 return false;
5736
8644a673 5737 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5738 return false;
5739
5740 /* Is vectorizable load? */
5741 if (!is_gimple_assign (stmt))
5742 return false;
5743
5744 scalar_dest = gimple_assign_lhs (stmt);
5745 if (TREE_CODE (scalar_dest) != SSA_NAME)
5746 return false;
5747
5748 code = gimple_assign_rhs_code (stmt);
5749 if (code != ARRAY_REF
38000232 5750 && code != BIT_FIELD_REF
ebfd146a 5751 && code != INDIRECT_REF
e9dbe7bb
IR
5752 && code != COMPONENT_REF
5753 && code != IMAGPART_EXPR
70f34814 5754 && code != REALPART_EXPR
42373e0b
RG
5755 && code != MEM_REF
5756 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5757 return false;
5758
5759 if (!STMT_VINFO_DATA_REF (stmt_info))
5760 return false;
5761
7b7b1813 5762 elem_type = TREE_TYPE (vectype);
947131ba 5763 mode = TYPE_MODE (vectype);
ebfd146a
IR
5764
5765 /* FORNOW. In some cases can vectorize even if data-type not supported
5766 (e.g. - data copies). */
947131ba 5767 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5768 {
73fbfcad 5769 if (dump_enabled_p ())
78c60e3d 5770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5771 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5772 return false;
5773 }
5774
ebfd146a 5775 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5776 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5777 {
0d0293ac 5778 grouped_load = true;
ebfd146a 5779 /* FORNOW */
aec7ae7d 5780 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5781
e14c1050 5782 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d5f035ea
RB
5783
5784 /* If this is single-element interleaving with an element distance
5785 that leaves unused vector loads around punt - we at least create
5786 very sub-optimal code in that case (and blow up memory,
5787 see PR65518). */
5788 if (first_stmt == stmt
5789 && !GROUP_NEXT_ELEMENT (stmt_info)
5790 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5791 {
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794 "single-element interleaving not supported "
5795 "for not adjacent vector loads\n");
5796 return false;
5797 }
5798
b602d918
RS
5799 if (!slp && !PURE_SLP_STMT (stmt_info))
5800 {
e14c1050 5801 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5802 if (vect_load_lanes_supported (vectype, group_size))
5803 load_lanes_p = true;
0d0293ac 5804 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5805 return false;
5806 }
f2556b68
RB
5807
5808 /* Invalidate assumptions made by dependence analysis when vectorization
5809 on the unrolled body effectively re-orders stmts. */
5810 if (!PURE_SLP_STMT (stmt_info)
5811 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5812 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5813 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5814 {
5815 if (dump_enabled_p ())
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817 "cannot perform implicit CSE when performing "
5818 "group loads with negative dependence distance\n");
5819 return false;
5820 }
96bb56b2
RB
5821
5822 /* Similarly when the stmt is a load that is both part of a SLP
5823 instance and a loop vectorized stmt via the same-dr mechanism
5824 we have to give up. */
5825 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5826 && (STMT_SLP_TYPE (stmt_info)
5827 != STMT_SLP_TYPE (vinfo_for_stmt
5828 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5829 {
5830 if (dump_enabled_p ())
5831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5832 "conflicting SLP types for CSEd load\n");
5833 return false;
5834 }
ebfd146a
IR
5835 }
5836
a1e53f3f 5837
aec7ae7d
JJ
5838 if (STMT_VINFO_GATHER_P (stmt_info))
5839 {
5840 gimple def_stmt;
5841 tree def;
5842 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5843 &gather_off, &gather_scale);
5844 gcc_assert (gather_decl);
24ee1384 5845 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
5846 &def_stmt, &def, &gather_dt,
5847 &gather_off_vectype))
5848 {
73fbfcad 5849 if (dump_enabled_p ())
78c60e3d 5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5851 "gather index use not simple.\n");
aec7ae7d
JJ
5852 return false;
5853 }
5854 }
7d75abc8 5855 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 5856 ;
319e6439
RG
5857 else
5858 {
5859 negative = tree_int_cst_compare (nested_in_vect_loop
5860 ? STMT_VINFO_DR_STEP (stmt_info)
5861 : DR_STEP (dr),
5862 size_zero_node) < 0;
5863 if (negative && ncopies > 1)
5864 {
73fbfcad 5865 if (dump_enabled_p ())
78c60e3d 5866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5867 "multiple types with negative step.\n");
319e6439
RG
5868 return false;
5869 }
5870
5871 if (negative)
5872 {
08940f33
RB
5873 if (grouped_load)
5874 {
5875 if (dump_enabled_p ())
5876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5877 "negative step for group load not supported"
5878 "\n");
08940f33
RB
5879 return false;
5880 }
319e6439
RG
5881 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5882 if (alignment_support_scheme != dr_aligned
5883 && alignment_support_scheme != dr_unaligned_supported)
5884 {
73fbfcad 5885 if (dump_enabled_p ())
78c60e3d 5886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5887 "negative step but alignment required.\n");
319e6439
RG
5888 return false;
5889 }
5890 if (!perm_mask_for_reverse (vectype))
5891 {
73fbfcad 5892 if (dump_enabled_p ())
78c60e3d 5893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5894 "negative step and reversing not supported."
5895 "\n");
319e6439
RG
5896 return false;
5897 }
5898 }
7d75abc8 5899 }
aec7ae7d 5900
ebfd146a
IR
5901 if (!vec_stmt) /* transformation not required. */
5902 {
5903 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 5904 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
5905 return true;
5906 }
5907
73fbfcad 5908 if (dump_enabled_p ())
78c60e3d 5909 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5910 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
5911
5912 /** Transform. **/
5913
c716e67f
XDL
5914 ensure_base_align (stmt_info, dr);
5915
aec7ae7d
JJ
5916 if (STMT_VINFO_GATHER_P (stmt_info))
5917 {
5918 tree vec_oprnd0 = NULL_TREE, op;
5919 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5920 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 5921 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
5922 edge pe = loop_preheader_edge (loop);
5923 gimple_seq seq;
5924 basic_block new_bb;
5925 enum { NARROW, NONE, WIDEN } modifier;
5926 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5927
5928 if (nunits == gather_off_nunits)
5929 modifier = NONE;
5930 else if (nunits == gather_off_nunits / 2)
5931 {
5932 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5933 modifier = WIDEN;
5934
5935 for (i = 0; i < gather_off_nunits; ++i)
5936 sel[i] = i | nunits;
5937
557be5a8 5938 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
5939 }
5940 else if (nunits == gather_off_nunits * 2)
5941 {
5942 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5943 modifier = NARROW;
5944
5945 for (i = 0; i < nunits; ++i)
5946 sel[i] = i < gather_off_nunits
5947 ? i : i + nunits - gather_off_nunits;
5948
557be5a8 5949 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
5950 ncopies *= 2;
5951 }
5952 else
5953 gcc_unreachable ();
5954
5955 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5956 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5957 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5958 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5959 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5960 scaletype = TREE_VALUE (arglist);
d3c2fee0 5961 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
5962
5963 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5964
5965 ptr = fold_convert (ptrtype, gather_base);
5966 if (!is_gimple_min_invariant (ptr))
5967 {
5968 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5969 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5970 gcc_assert (!new_bb);
5971 }
5972
5973 /* Currently we support only unconditional gather loads,
5974 so mask should be all ones. */
d3c2fee0
AI
5975 if (TREE_CODE (masktype) == INTEGER_TYPE)
5976 mask = build_int_cst (masktype, -1);
5977 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5978 {
5979 mask = build_int_cst (TREE_TYPE (masktype), -1);
5980 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5981 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 5982 }
aec7ae7d
JJ
5983 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5984 {
5985 REAL_VALUE_TYPE r;
5986 long tmp[6];
5987 for (j = 0; j < 6; ++j)
5988 tmp[j] = -1;
5989 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5990 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 5991 mask = build_vector_from_val (masktype, mask);
03b9e8e4 5992 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
5993 }
5994 else
5995 gcc_unreachable ();
aec7ae7d
JJ
5996
5997 scale = build_int_cst (scaletype, gather_scale);
5998
d3c2fee0
AI
5999 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6000 merge = build_int_cst (TREE_TYPE (rettype), 0);
6001 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6002 {
6003 REAL_VALUE_TYPE r;
6004 long tmp[6];
6005 for (j = 0; j < 6; ++j)
6006 tmp[j] = 0;
6007 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6008 merge = build_real (TREE_TYPE (rettype), r);
6009 }
6010 else
6011 gcc_unreachable ();
6012 merge = build_vector_from_val (rettype, merge);
6013 merge = vect_init_vector (stmt, merge, rettype, NULL);
6014
aec7ae7d
JJ
6015 prev_stmt_info = NULL;
6016 for (j = 0; j < ncopies; ++j)
6017 {
6018 if (modifier == WIDEN && (j & 1))
6019 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6020 perm_mask, stmt, gsi);
6021 else if (j == 0)
6022 op = vec_oprnd0
6023 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6024 else
6025 op = vec_oprnd0
6026 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6027
6028 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6029 {
6030 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6031 == TYPE_VECTOR_SUBPARTS (idxtype));
6032 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 6033 var = make_ssa_name (var);
aec7ae7d
JJ
6034 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6035 new_stmt
0d0e4a03 6036 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6038 op = var;
6039 }
6040
6041 new_stmt
d3c2fee0 6042 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6043
6044 if (!useless_type_conversion_p (vectype, rettype))
6045 {
6046 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6047 == TYPE_VECTOR_SUBPARTS (rettype));
6048 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
6049 op = make_ssa_name (var, new_stmt);
6050 gimple_call_set_lhs (new_stmt, op);
6051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6052 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6053 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6054 new_stmt
0d0e4a03 6055 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6056 }
6057 else
6058 {
6059 var = make_ssa_name (vec_dest, new_stmt);
6060 gimple_call_set_lhs (new_stmt, var);
6061 }
6062
6063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6064
6065 if (modifier == NARROW)
6066 {
6067 if ((j & 1) == 0)
6068 {
6069 prev_res = var;
6070 continue;
6071 }
6072 var = permute_vec_elements (prev_res, var,
6073 perm_mask, stmt, gsi);
6074 new_stmt = SSA_NAME_DEF_STMT (var);
6075 }
6076
6077 if (prev_stmt_info == NULL)
6078 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6079 else
6080 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6081 prev_stmt_info = vinfo_for_stmt (new_stmt);
6082 }
6083 return true;
6084 }
7d75abc8
MM
6085 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6086 {
6087 gimple_stmt_iterator incr_gsi;
6088 bool insert_after;
6089 gimple incr;
6090 tree offvar;
7d75abc8
MM
6091 tree ivstep;
6092 tree running_off;
9771b263 6093 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6094 gimple_seq stmts = NULL;
14ac6aa2
RB
6095 tree stride_base, stride_step, alias_off;
6096
6097 gcc_assert (!nested_in_vect_loop);
7d75abc8 6098
14ac6aa2
RB
6099 stride_base
6100 = fold_build_pointer_plus
6101 (unshare_expr (DR_BASE_ADDRESS (dr)),
6102 size_binop (PLUS_EXPR,
6103 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 6104 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 6105 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
6106
6107 /* For a load with loop-invariant (but other than power-of-2)
6108 stride (i.e. not a grouped access) like so:
6109
6110 for (i = 0; i < n; i += stride)
6111 ... = array[i];
6112
6113 we generate a new induction variable and new accesses to
6114 form a new vector (or vectors, depending on ncopies):
6115
6116 for (j = 0; ; j += VF*stride)
6117 tmp1 = array[j];
6118 tmp2 = array[j + stride];
6119 ...
6120 vectemp = {tmp1, tmp2, ...}
6121 */
6122
6123 ivstep = stride_step;
6124 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6125 build_int_cst (TREE_TYPE (ivstep), vf));
6126
6127 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6128
6129 create_iv (stride_base, ivstep, NULL,
6130 loop, &incr_gsi, insert_after,
6131 &offvar, NULL);
6132 incr = gsi_stmt (incr_gsi);
6133 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6134
6135 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6136 if (stmts)
6137 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6138
6139 prev_stmt_info = NULL;
6140 running_off = offvar;
14ac6aa2 6141 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
6142 for (j = 0; j < ncopies; j++)
6143 {
6144 tree vec_inv;
6145
9771b263 6146 vec_alloc (v, nunits);
7d75abc8
MM
6147 for (i = 0; i < nunits; i++)
6148 {
6149 tree newref, newoff;
6150 gimple incr;
14ac6aa2
RB
6151 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6152 running_off, alias_off);
7d75abc8
MM
6153
6154 newref = force_gimple_operand_gsi (gsi, newref, true,
6155 NULL_TREE, true,
6156 GSI_SAME_STMT);
6157 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
b731b390 6158 newoff = copy_ssa_name (running_off);
0d0e4a03
JJ
6159 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6160 running_off, stride_step);
7d75abc8
MM
6161 vect_finish_stmt_generation (stmt, incr, gsi);
6162
6163 running_off = newoff;
6164 }
6165
6166 vec_inv = build_constructor (vectype, v);
6167 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6168 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
6169
6170 if (j == 0)
6171 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6172 else
6173 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6174 prev_stmt_info = vinfo_for_stmt (new_stmt);
6175 }
6176 return true;
6177 }
aec7ae7d 6178
0d0293ac 6179 if (grouped_load)
ebfd146a 6180 {
e14c1050 6181 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 6182 if (slp
01d8bf07 6183 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
6184 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6185 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6186
ebfd146a 6187 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6188 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6189 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6190 ??? But we can only do so if there is exactly one
6191 as we have no way to get at the rest. Leave the CSE
6192 opportunity alone.
6193 ??? With the group load eventually participating
6194 in multiple different permutations (having multiple
6195 slp nodes which refer to the same group) the CSE
6196 is even wrong code. See PR56270. */
6197 && !slp)
ebfd146a
IR
6198 {
6199 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6200 return true;
6201 }
6202 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6203 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
6204
6205 /* VEC_NUM is the number of vect stmts to be created for this group. */
6206 if (slp)
6207 {
0d0293ac 6208 grouped_load = false;
ebfd146a 6209 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 6210 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 6211 slp_perm = true;
a64b9c26 6212 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 6213 }
ebfd146a 6214 else
a64b9c26
RB
6215 {
6216 vec_num = group_size;
6217 group_gap = 0;
6218 }
ebfd146a
IR
6219 }
6220 else
6221 {
6222 first_stmt = stmt;
6223 first_dr = dr;
6224 group_size = vec_num = 1;
a64b9c26 6225 group_gap = 0;
ebfd146a
IR
6226 }
6227
720f5239 6228 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6229 gcc_assert (alignment_support_scheme);
272c6793
RS
6230 /* Targets with load-lane instructions must not require explicit
6231 realignment. */
6232 gcc_assert (!load_lanes_p
6233 || alignment_support_scheme == dr_aligned
6234 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6235
6236 /* In case the vectorization factor (VF) is bigger than the number
6237 of elements that we can fit in a vectype (nunits), we have to generate
6238 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6239 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6240 from one copy of the vector stmt to the next, in the field
ff802fa1 6241 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6242 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6243 stmts that use the defs of the current stmt. The example below
6244 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6245 need to create 4 vectorized stmts):
ebfd146a
IR
6246
6247 before vectorization:
6248 RELATED_STMT VEC_STMT
6249 S1: x = memref - -
6250 S2: z = x + 1 - -
6251
6252 step 1: vectorize stmt S1:
6253 We first create the vector stmt VS1_0, and, as usual, record a
6254 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6255 Next, we create the vector stmt VS1_1, and record a pointer to
6256 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6257 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6258 stmts and pointers:
6259 RELATED_STMT VEC_STMT
6260 VS1_0: vx0 = memref0 VS1_1 -
6261 VS1_1: vx1 = memref1 VS1_2 -
6262 VS1_2: vx2 = memref2 VS1_3 -
6263 VS1_3: vx3 = memref3 - -
6264 S1: x = load - VS1_0
6265 S2: z = x + 1 - -
6266
b8698a0f
L
6267 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6268 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6269 stmt S2. */
6270
0d0293ac 6271 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6272
6273 S1: x2 = &base + 2
6274 S2: x0 = &base
6275 S3: x1 = &base + 1
6276 S4: x3 = &base + 3
6277
b8698a0f 6278 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6279 starting from the access of the first stmt of the chain:
6280
6281 VS1: vx0 = &base
6282 VS2: vx1 = &base + vec_size*1
6283 VS3: vx3 = &base + vec_size*2
6284 VS4: vx4 = &base + vec_size*3
6285
6286 Then permutation statements are generated:
6287
e2c83630
RH
6288 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6289 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6290 ...
6291
6292 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6293 (the order of the data-refs in the output of vect_permute_load_chain
6294 corresponds to the order of scalar stmts in the interleaving chain - see
6295 the documentation of vect_permute_load_chain()).
6296 The generation of permutation stmts and recording them in
0d0293ac 6297 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6298
b8698a0f 6299 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6300 permutation stmts above are created for every copy. The result vector
6301 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6302 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6303
6304 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6305 on a target that supports unaligned accesses (dr_unaligned_supported)
6306 we generate the following code:
6307 p = initial_addr;
6308 indx = 0;
6309 loop {
6310 p = p + indx * vectype_size;
6311 vec_dest = *(p);
6312 indx = indx + 1;
6313 }
6314
6315 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6316 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6317 then generate the following code, in which the data in each iteration is
6318 obtained by two vector loads, one from the previous iteration, and one
6319 from the current iteration:
6320 p1 = initial_addr;
6321 msq_init = *(floor(p1))
6322 p2 = initial_addr + VS - 1;
6323 realignment_token = call target_builtin;
6324 indx = 0;
6325 loop {
6326 p2 = p2 + indx * vectype_size
6327 lsq = *(floor(p2))
6328 vec_dest = realign_load (msq, lsq, realignment_token)
6329 indx = indx + 1;
6330 msq = lsq;
6331 } */
6332
6333 /* If the misalignment remains the same throughout the execution of the
6334 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6335 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6336 This can only occur when vectorizing memory accesses in the inner-loop
6337 nested within an outer-loop that is being vectorized. */
6338
d1e4b493 6339 if (nested_in_vect_loop
211bea38 6340 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6341 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6342 {
6343 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6344 compute_in_loop = true;
6345 }
6346
6347 if ((alignment_support_scheme == dr_explicit_realign_optimized
6348 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6349 && !compute_in_loop)
ebfd146a
IR
6350 {
6351 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6352 alignment_support_scheme, NULL_TREE,
6353 &at_loop);
6354 if (alignment_support_scheme == dr_explicit_realign_optimized)
6355 {
538dd0b7 6356 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
6357 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6358 size_one_node);
ebfd146a
IR
6359 }
6360 }
6361 else
6362 at_loop = loop;
6363
a1e53f3f
L
6364 if (negative)
6365 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6366
272c6793
RS
6367 if (load_lanes_p)
6368 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6369 else
6370 aggr_type = vectype;
6371
ebfd146a
IR
6372 prev_stmt_info = NULL;
6373 for (j = 0; j < ncopies; j++)
b8698a0f 6374 {
272c6793 6375 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6376 if (j == 0)
74bf76ed
JJ
6377 {
6378 bool simd_lane_access_p
6379 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6380 if (simd_lane_access_p
6381 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6382 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6383 && integer_zerop (DR_OFFSET (first_dr))
6384 && integer_zerop (DR_INIT (first_dr))
6385 && alias_sets_conflict_p (get_alias_set (aggr_type),
6386 get_alias_set (DR_REF (first_dr)))
6387 && (alignment_support_scheme == dr_aligned
6388 || alignment_support_scheme == dr_unaligned_supported))
6389 {
6390 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6391 dataref_offset = build_int_cst (reference_alias_ptr_type
6392 (DR_REF (first_dr)), 0);
8928eff3 6393 inv_p = false;
74bf76ed
JJ
6394 }
6395 else
6396 dataref_ptr
6397 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6398 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
6399 simd_lane_access_p, &inv_p,
6400 byte_offset);
74bf76ed
JJ
6401 }
6402 else if (dataref_offset)
6403 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6404 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6405 else
272c6793
RS
6406 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6407 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6408
0d0293ac 6409 if (grouped_load || slp_perm)
9771b263 6410 dr_chain.create (vec_num);
5ce1ee7f 6411
272c6793 6412 if (load_lanes_p)
ebfd146a 6413 {
272c6793
RS
6414 tree vec_array;
6415
6416 vec_array = create_vector_array (vectype, vec_num);
6417
6418 /* Emit:
6419 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6420 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6421 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6422 gimple_call_set_lhs (new_stmt, vec_array);
6423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6424
272c6793
RS
6425 /* Extract each vector into an SSA_NAME. */
6426 for (i = 0; i < vec_num; i++)
ebfd146a 6427 {
272c6793
RS
6428 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6429 vec_array, i);
9771b263 6430 dr_chain.quick_push (new_temp);
272c6793
RS
6431 }
6432
6433 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6434 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6435 }
6436 else
6437 {
6438 for (i = 0; i < vec_num; i++)
6439 {
6440 if (i > 0)
6441 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6442 stmt, NULL_TREE);
6443
6444 /* 2. Create the vector-load in the loop. */
6445 switch (alignment_support_scheme)
6446 {
6447 case dr_aligned:
6448 case dr_unaligned_supported:
be1ac4ec 6449 {
644ffefd
MJ
6450 unsigned int align, misalign;
6451
272c6793
RS
6452 data_ref
6453 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
6454 dataref_offset
6455 ? dataref_offset
6456 : build_int_cst (reference_alias_ptr_type
6457 (DR_REF (first_dr)), 0));
644ffefd 6458 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6459 if (alignment_support_scheme == dr_aligned)
6460 {
6461 gcc_assert (aligned_access_p (first_dr));
644ffefd 6462 misalign = 0;
272c6793
RS
6463 }
6464 else if (DR_MISALIGNMENT (first_dr) == -1)
6465 {
6466 TREE_TYPE (data_ref)
6467 = build_aligned_type (TREE_TYPE (data_ref),
6468 TYPE_ALIGN (elem_type));
644ffefd
MJ
6469 align = TYPE_ALIGN_UNIT (elem_type);
6470 misalign = 0;
272c6793
RS
6471 }
6472 else
6473 {
6474 TREE_TYPE (data_ref)
6475 = build_aligned_type (TREE_TYPE (data_ref),
6476 TYPE_ALIGN (elem_type));
644ffefd 6477 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6478 }
74bf76ed
JJ
6479 if (dataref_offset == NULL_TREE)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6481 align, misalign);
272c6793 6482 break;
be1ac4ec 6483 }
272c6793 6484 case dr_explicit_realign:
267d3070 6485 {
272c6793 6486 tree ptr, bump;
272c6793 6487
d88981fc 6488 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
6489
6490 if (compute_in_loop)
6491 msq = vect_setup_realignment (first_stmt, gsi,
6492 &realignment_token,
6493 dr_explicit_realign,
6494 dataref_ptr, NULL);
6495
b731b390 6496 ptr = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6497 new_stmt = gimple_build_assign
6498 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6499 build_int_cst
6500 (TREE_TYPE (dataref_ptr),
6501 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6502 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6503 data_ref
6504 = build2 (MEM_REF, vectype, ptr,
6505 build_int_cst (reference_alias_ptr_type
6506 (DR_REF (first_dr)), 0));
6507 vec_dest = vect_create_destination_var (scalar_dest,
6508 vectype);
6509 new_stmt = gimple_build_assign (vec_dest, data_ref);
6510 new_temp = make_ssa_name (vec_dest, new_stmt);
6511 gimple_assign_set_lhs (new_stmt, new_temp);
6512 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6513 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6514 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6515 msq = new_temp;
6516
d88981fc 6517 bump = size_binop (MULT_EXPR, vs,
7b7b1813 6518 TYPE_SIZE_UNIT (elem_type));
d88981fc 6519 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 6520 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
6521 new_stmt = gimple_build_assign
6522 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
6523 build_int_cst
6524 (TREE_TYPE (ptr),
6525 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 6526 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
6527 gimple_assign_set_lhs (new_stmt, ptr);
6528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6529 data_ref
6530 = build2 (MEM_REF, vectype, ptr,
6531 build_int_cst (reference_alias_ptr_type
6532 (DR_REF (first_dr)), 0));
6533 break;
267d3070 6534 }
272c6793 6535 case dr_explicit_realign_optimized:
b731b390 6536 new_temp = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6537 new_stmt = gimple_build_assign
6538 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6539 build_int_cst
6540 (TREE_TYPE (dataref_ptr),
6541 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6543 data_ref
6544 = build2 (MEM_REF, vectype, new_temp,
6545 build_int_cst (reference_alias_ptr_type
6546 (DR_REF (first_dr)), 0));
6547 break;
6548 default:
6549 gcc_unreachable ();
6550 }
ebfd146a 6551 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6552 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6553 new_temp = make_ssa_name (vec_dest, new_stmt);
6554 gimple_assign_set_lhs (new_stmt, new_temp);
6555 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6556
272c6793
RS
6557 /* 3. Handle explicit realignment if necessary/supported.
6558 Create in loop:
6559 vec_dest = realign_load (msq, lsq, realignment_token) */
6560 if (alignment_support_scheme == dr_explicit_realign_optimized
6561 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6562 {
272c6793
RS
6563 lsq = gimple_assign_lhs (new_stmt);
6564 if (!realignment_token)
6565 realignment_token = dataref_ptr;
6566 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
6567 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6568 msq, lsq, realignment_token);
272c6793
RS
6569 new_temp = make_ssa_name (vec_dest, new_stmt);
6570 gimple_assign_set_lhs (new_stmt, new_temp);
6571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6572
6573 if (alignment_support_scheme == dr_explicit_realign_optimized)
6574 {
6575 gcc_assert (phi);
6576 if (i == vec_num - 1 && j == ncopies - 1)
6577 add_phi_arg (phi, lsq,
6578 loop_latch_edge (containing_loop),
9e227d60 6579 UNKNOWN_LOCATION);
272c6793
RS
6580 msq = lsq;
6581 }
ebfd146a 6582 }
ebfd146a 6583
59fd17e3
RB
6584 /* 4. Handle invariant-load. */
6585 if (inv_p && !bb_vinfo)
6586 {
59fd17e3 6587 gcc_assert (!grouped_load);
d1417442
JJ
6588 /* If we have versioned for aliasing or the loop doesn't
6589 have any data dependencies that would preclude this,
6590 then we are sure this is a loop invariant load and
6591 thus we can insert it on the preheader edge. */
6592 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6593 && !nested_in_vect_loop
6b916b36 6594 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
6595 {
6596 if (dump_enabled_p ())
6597 {
6598 dump_printf_loc (MSG_NOTE, vect_location,
6599 "hoisting out of the vectorized "
6600 "loop: ");
6601 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 6602 }
b731b390 6603 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
6604 gsi_insert_on_edge_immediate
6605 (loop_preheader_edge (loop),
6606 gimple_build_assign (tem,
6607 unshare_expr
6608 (gimple_assign_rhs1 (stmt))));
6609 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6610 }
6611 else
6612 {
6613 gimple_stmt_iterator gsi2 = *gsi;
6614 gsi_next (&gsi2);
6615 new_temp = vect_init_vector (stmt, scalar_dest,
6616 vectype, &gsi2);
6617 }
59fd17e3 6618 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0
RB
6619 set_vinfo_for_stmt (new_stmt,
6620 new_stmt_vec_info (new_stmt, loop_vinfo,
6621 bb_vinfo));
59fd17e3
RB
6622 }
6623
272c6793
RS
6624 if (negative)
6625 {
aec7ae7d
JJ
6626 tree perm_mask = perm_mask_for_reverse (vectype);
6627 new_temp = permute_vec_elements (new_temp, new_temp,
6628 perm_mask, stmt, gsi);
ebfd146a
IR
6629 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6630 }
267d3070 6631
272c6793 6632 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6633 vect_transform_grouped_load (). */
6634 if (grouped_load || slp_perm)
9771b263 6635 dr_chain.quick_push (new_temp);
267d3070 6636
272c6793
RS
6637 /* Store vector loads in the corresponding SLP_NODE. */
6638 if (slp && !slp_perm)
9771b263 6639 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6640 }
a64b9c26
RB
6641 /* Bump the vector pointer to account for a gap. */
6642 if (slp && group_gap != 0)
6643 {
6644 tree bump = size_binop (MULT_EXPR,
6645 TYPE_SIZE_UNIT (elem_type),
6646 size_int (group_gap));
6647 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6648 stmt, bump);
6649 }
ebfd146a
IR
6650 }
6651
6652 if (slp && !slp_perm)
6653 continue;
6654
6655 if (slp_perm)
6656 {
01d8bf07 6657 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6658 slp_node_instance, false))
6659 {
9771b263 6660 dr_chain.release ();
ebfd146a
IR
6661 return false;
6662 }
6663 }
6664 else
6665 {
0d0293ac 6666 if (grouped_load)
ebfd146a 6667 {
272c6793 6668 if (!load_lanes_p)
0d0293ac 6669 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6670 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6671 }
6672 else
6673 {
6674 if (j == 0)
6675 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6676 else
6677 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6678 prev_stmt_info = vinfo_for_stmt (new_stmt);
6679 }
6680 }
9771b263 6681 dr_chain.release ();
ebfd146a
IR
6682 }
6683
ebfd146a
IR
6684 return true;
6685}
6686
6687/* Function vect_is_simple_cond.
b8698a0f 6688
ebfd146a
IR
6689 Input:
6690 LOOP - the loop that is being vectorized.
6691 COND - Condition that is checked for simple use.
6692
e9e1d143
RG
6693 Output:
6694 *COMP_VECTYPE - the vector type for the comparison.
6695
ebfd146a
IR
6696 Returns whether a COND can be vectorized. Checks whether
6697 condition operands are supportable using vec_is_simple_use. */
6698
87aab9b2 6699static bool
24ee1384
IR
6700vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6701 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6702{
6703 tree lhs, rhs;
6704 tree def;
6705 enum vect_def_type dt;
e9e1d143 6706 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6707
6708 if (!COMPARISON_CLASS_P (cond))
6709 return false;
6710
6711 lhs = TREE_OPERAND (cond, 0);
6712 rhs = TREE_OPERAND (cond, 1);
6713
6714 if (TREE_CODE (lhs) == SSA_NAME)
6715 {
6716 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6717 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6718 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6719 return false;
6720 }
6721 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6722 && TREE_CODE (lhs) != FIXED_CST)
6723 return false;
6724
6725 if (TREE_CODE (rhs) == SSA_NAME)
6726 {
6727 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6728 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6729 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6730 return false;
6731 }
f7e531cf 6732 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6733 && TREE_CODE (rhs) != FIXED_CST)
6734 return false;
6735
e9e1d143 6736 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6737 return true;
6738}
6739
6740/* vectorizable_condition.
6741
b8698a0f
L
6742 Check if STMT is conditional modify expression that can be vectorized.
6743 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6744 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6745 at GSI.
6746
6747 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6748 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6749 else caluse if it is 2).
ebfd146a
IR
6750
6751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6752
4bbe8262 6753bool
ebfd146a 6754vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
6755 gimple *vec_stmt, tree reduc_def, int reduc_index,
6756 slp_tree slp_node)
ebfd146a
IR
6757{
6758 tree scalar_dest = NULL_TREE;
6759 tree vec_dest = NULL_TREE;
ebfd146a
IR
6760 tree cond_expr, then_clause, else_clause;
6761 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6762 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 6763 tree comp_vectype = NULL_TREE;
ff802fa1
IR
6764 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6765 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
6766 tree vec_compare, vec_cond_expr;
6767 tree new_temp;
6768 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 6769 tree def;
a855b1b1 6770 enum vect_def_type dt, dts[4];
ebfd146a 6771 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 6772 int ncopies;
ebfd146a 6773 enum tree_code code;
a855b1b1 6774 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
6775 int i, j;
6776 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
6777 vec<tree> vec_oprnds0 = vNULL;
6778 vec<tree> vec_oprnds1 = vNULL;
6779 vec<tree> vec_oprnds2 = vNULL;
6780 vec<tree> vec_oprnds3 = vNULL;
74946978 6781 tree vec_cmp_type;
b8698a0f 6782
f7e531cf
IR
6783 if (slp_node || PURE_SLP_STMT (stmt_info))
6784 ncopies = 1;
6785 else
6786 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 6787
ebfd146a 6788 gcc_assert (ncopies >= 1);
a855b1b1 6789 if (reduc_index && ncopies > 1)
ebfd146a
IR
6790 return false; /* FORNOW */
6791
f7e531cf
IR
6792 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6793 return false;
6794
6795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6796 return false;
6797
4bbe8262
IR
6798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6799 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6800 && reduc_def))
ebfd146a
IR
6801 return false;
6802
ebfd146a 6803 /* FORNOW: not yet supported. */
b8698a0f 6804 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6805 {
73fbfcad 6806 if (dump_enabled_p ())
78c60e3d 6807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6808 "value used after loop.\n");
ebfd146a
IR
6809 return false;
6810 }
6811
6812 /* Is vectorizable conditional operation? */
6813 if (!is_gimple_assign (stmt))
6814 return false;
6815
6816 code = gimple_assign_rhs_code (stmt);
6817
6818 if (code != COND_EXPR)
6819 return false;
6820
4e71066d
RG
6821 cond_expr = gimple_assign_rhs1 (stmt);
6822 then_clause = gimple_assign_rhs2 (stmt);
6823 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 6824
24ee1384
IR
6825 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6826 &comp_vectype)
e9e1d143 6827 || !comp_vectype)
ebfd146a
IR
6828 return false;
6829
6830 if (TREE_CODE (then_clause) == SSA_NAME)
6831 {
6832 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 6833 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6834 &then_def_stmt, &def, &dt))
6835 return false;
6836 }
b8698a0f 6837 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
6838 && TREE_CODE (then_clause) != REAL_CST
6839 && TREE_CODE (then_clause) != FIXED_CST)
6840 return false;
6841
6842 if (TREE_CODE (else_clause) == SSA_NAME)
6843 {
6844 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 6845 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6846 &else_def_stmt, &def, &dt))
6847 return false;
6848 }
b8698a0f 6849 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
6850 && TREE_CODE (else_clause) != REAL_CST
6851 && TREE_CODE (else_clause) != FIXED_CST)
6852 return false;
6853
74946978
MP
6854 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6855 /* The result of a vector comparison should be signed type. */
6856 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6857 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6858 if (vec_cmp_type == NULL_TREE)
6859 return false;
784fb9b3 6860
b8698a0f 6861 if (!vec_stmt)
ebfd146a
IR
6862 {
6863 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 6864 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
6865 }
6866
f7e531cf
IR
6867 /* Transform. */
6868
6869 if (!slp_node)
6870 {
9771b263
DN
6871 vec_oprnds0.create (1);
6872 vec_oprnds1.create (1);
6873 vec_oprnds2.create (1);
6874 vec_oprnds3.create (1);
f7e531cf 6875 }
ebfd146a
IR
6876
6877 /* Handle def. */
6878 scalar_dest = gimple_assign_lhs (stmt);
6879 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6880
6881 /* Handle cond expr. */
a855b1b1
MM
6882 for (j = 0; j < ncopies; j++)
6883 {
538dd0b7 6884 gassign *new_stmt = NULL;
a855b1b1
MM
6885 if (j == 0)
6886 {
f7e531cf
IR
6887 if (slp_node)
6888 {
00f96dc9
TS
6889 auto_vec<tree, 4> ops;
6890 auto_vec<vec<tree>, 4> vec_defs;
9771b263 6891
9771b263
DN
6892 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6893 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6894 ops.safe_push (then_clause);
6895 ops.safe_push (else_clause);
f7e531cf 6896 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
6897 vec_oprnds3 = vec_defs.pop ();
6898 vec_oprnds2 = vec_defs.pop ();
6899 vec_oprnds1 = vec_defs.pop ();
6900 vec_oprnds0 = vec_defs.pop ();
f7e531cf 6901
9771b263
DN
6902 ops.release ();
6903 vec_defs.release ();
f7e531cf
IR
6904 }
6905 else
6906 {
6907 gimple gtemp;
6908 vec_cond_lhs =
a855b1b1
MM
6909 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6910 stmt, NULL);
24ee1384
IR
6911 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6912 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
6913
6914 vec_cond_rhs =
6915 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6916 stmt, NULL);
24ee1384
IR
6917 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6918 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
6919 if (reduc_index == 1)
6920 vec_then_clause = reduc_def;
6921 else
6922 {
6923 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6924 stmt, NULL);
24ee1384 6925 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
6926 NULL, &gtemp, &def, &dts[2]);
6927 }
6928 if (reduc_index == 2)
6929 vec_else_clause = reduc_def;
6930 else
6931 {
6932 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 6933 stmt, NULL);
24ee1384 6934 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 6935 NULL, &gtemp, &def, &dts[3]);
f7e531cf 6936 }
a855b1b1
MM
6937 }
6938 }
6939 else
6940 {
f7e531cf 6941 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 6942 vec_oprnds0.pop ());
f7e531cf 6943 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 6944 vec_oprnds1.pop ());
a855b1b1 6945 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 6946 vec_oprnds2.pop ());
a855b1b1 6947 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 6948 vec_oprnds3.pop ());
f7e531cf
IR
6949 }
6950
6951 if (!slp_node)
6952 {
9771b263
DN
6953 vec_oprnds0.quick_push (vec_cond_lhs);
6954 vec_oprnds1.quick_push (vec_cond_rhs);
6955 vec_oprnds2.quick_push (vec_then_clause);
6956 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
6957 }
6958
9dc3f7de 6959 /* Arguments are ready. Create the new vector stmt. */
9771b263 6960 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 6961 {
9771b263
DN
6962 vec_cond_rhs = vec_oprnds1[i];
6963 vec_then_clause = vec_oprnds2[i];
6964 vec_else_clause = vec_oprnds3[i];
a855b1b1 6965
784fb9b3
JJ
6966 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6967 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
6968 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6969 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 6970
f7e531cf
IR
6971 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6972 new_temp = make_ssa_name (vec_dest, new_stmt);
6973 gimple_assign_set_lhs (new_stmt, new_temp);
6974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6975 if (slp_node)
9771b263 6976 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
6977 }
6978
6979 if (slp_node)
6980 continue;
6981
6982 if (j == 0)
6983 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6984 else
6985 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6986
6987 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 6988 }
b8698a0f 6989
9771b263
DN
6990 vec_oprnds0.release ();
6991 vec_oprnds1.release ();
6992 vec_oprnds2.release ();
6993 vec_oprnds3.release ();
f7e531cf 6994
ebfd146a
IR
6995 return true;
6996}
6997
6998
8644a673 6999/* Make sure the statement is vectorizable. */
ebfd146a
IR
7000
7001bool
a70d6342 7002vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7003{
8644a673 7004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7005 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7006 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7007 bool ok;
a70d6342 7008 tree scalar_type, vectype;
363477c0
JJ
7009 gimple pattern_stmt;
7010 gimple_seq pattern_def_seq;
ebfd146a 7011
73fbfcad 7012 if (dump_enabled_p ())
ebfd146a 7013 {
78c60e3d
SS
7014 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7015 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7016 }
ebfd146a 7017
1825a1f3 7018 if (gimple_has_volatile_ops (stmt))
b8698a0f 7019 {
73fbfcad 7020 if (dump_enabled_p ())
78c60e3d 7021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7022 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7023
7024 return false;
7025 }
b8698a0f
L
7026
7027 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7028 to include:
7029 - the COND_EXPR which is the loop exit condition
7030 - any LABEL_EXPRs in the loop
b8698a0f 7031 - computations that are used only for array indexing or loop control.
8644a673 7032 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7033 instance, therefore, all the statements are relevant.
ebfd146a 7034
d092494c 7035 Pattern statement needs to be analyzed instead of the original statement
83197f37 7036 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7037 statements. In basic blocks we are called from some SLP instance
7038 traversal, don't analyze pattern stmts instead, the pattern stmts
7039 already will be part of SLP instance. */
83197f37
IR
7040
7041 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7042 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7043 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7044 {
9d5e7640 7045 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7046 && pattern_stmt
9d5e7640
IR
7047 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7048 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7049 {
83197f37 7050 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7051 stmt = pattern_stmt;
7052 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7053 if (dump_enabled_p ())
9d5e7640 7054 {
78c60e3d
SS
7055 dump_printf_loc (MSG_NOTE, vect_location,
7056 "==> examining pattern statement: ");
7057 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7058 }
7059 }
7060 else
7061 {
73fbfcad 7062 if (dump_enabled_p ())
e645e942 7063 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7064
9d5e7640
IR
7065 return true;
7066 }
8644a673 7067 }
83197f37 7068 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7069 && node == NULL
83197f37
IR
7070 && pattern_stmt
7071 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7072 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7073 {
7074 /* Analyze PATTERN_STMT too. */
73fbfcad 7075 if (dump_enabled_p ())
83197f37 7076 {
78c60e3d
SS
7077 dump_printf_loc (MSG_NOTE, vect_location,
7078 "==> examining pattern statement: ");
7079 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7080 }
7081
7082 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7083 return false;
7084 }
ebfd146a 7085
1107f3ae 7086 if (is_pattern_stmt_p (stmt_info)
079c527f 7087 && node == NULL
363477c0 7088 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 7089 {
363477c0 7090 gimple_stmt_iterator si;
1107f3ae 7091
363477c0
JJ
7092 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7093 {
7094 gimple pattern_def_stmt = gsi_stmt (si);
7095 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7096 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7097 {
7098 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 7099 if (dump_enabled_p ())
363477c0 7100 {
78c60e3d
SS
7101 dump_printf_loc (MSG_NOTE, vect_location,
7102 "==> examining pattern def statement: ");
7103 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 7104 }
1107f3ae 7105
363477c0
JJ
7106 if (!vect_analyze_stmt (pattern_def_stmt,
7107 need_to_vectorize, node))
7108 return false;
7109 }
7110 }
7111 }
1107f3ae 7112
8644a673
IR
7113 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7114 {
7115 case vect_internal_def:
7116 break;
ebfd146a 7117
8644a673 7118 case vect_reduction_def:
7c5222ff 7119 case vect_nested_cycle:
a70d6342 7120 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 7121 || relevance == vect_used_in_outer_by_reduction
a70d6342 7122 || relevance == vect_unused_in_scope));
8644a673
IR
7123 break;
7124
7125 case vect_induction_def:
7126 case vect_constant_def:
7127 case vect_external_def:
7128 case vect_unknown_def_type:
7129 default:
7130 gcc_unreachable ();
7131 }
ebfd146a 7132
a70d6342
IR
7133 if (bb_vinfo)
7134 {
7135 gcc_assert (PURE_SLP_STMT (stmt_info));
7136
b690cc0f 7137 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 7138 if (dump_enabled_p ())
a70d6342 7139 {
78c60e3d
SS
7140 dump_printf_loc (MSG_NOTE, vect_location,
7141 "get vectype for scalar type: ");
7142 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 7143 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7144 }
7145
7146 vectype = get_vectype_for_scalar_type (scalar_type);
7147 if (!vectype)
7148 {
73fbfcad 7149 if (dump_enabled_p ())
a70d6342 7150 {
78c60e3d
SS
7151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7152 "not SLPed: unsupported data-type ");
7153 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7154 scalar_type);
e645e942 7155 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
7156 }
7157 return false;
7158 }
7159
73fbfcad 7160 if (dump_enabled_p ())
a70d6342 7161 {
78c60e3d
SS
7162 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7163 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 7164 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7165 }
7166
7167 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7168 }
7169
8644a673 7170 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 7171 {
8644a673 7172 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
7173 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7174 || (is_gimple_call (stmt)
7175 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 7176 *need_to_vectorize = true;
ebfd146a
IR
7177 }
7178
8644a673 7179 ok = true;
b8698a0f 7180 if (!bb_vinfo
a70d6342
IR
7181 && (STMT_VINFO_RELEVANT_P (stmt_info)
7182 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
0136f8f0
AH
7183 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7184 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 7185 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
7186 || vectorizable_operation (stmt, NULL, NULL, NULL)
7187 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7188 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 7189 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 7190 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 7191 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 7192 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
7193 else
7194 {
7195 if (bb_vinfo)
0136f8f0
AH
7196 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7197 || vectorizable_conversion (stmt, NULL, NULL, node)
4a00c761 7198 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 7199 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
7200 || vectorizable_assignment (stmt, NULL, NULL, node)
7201 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 7202 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
7203 || vectorizable_store (stmt, NULL, NULL, node)
7204 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 7205 }
8644a673
IR
7206
7207 if (!ok)
ebfd146a 7208 {
73fbfcad 7209 if (dump_enabled_p ())
8644a673 7210 {
78c60e3d
SS
7211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7212 "not vectorized: relevant stmt not ");
7213 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7214 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7215 }
b8698a0f 7216
ebfd146a
IR
7217 return false;
7218 }
7219
a70d6342
IR
7220 if (bb_vinfo)
7221 return true;
7222
8644a673
IR
7223 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7224 need extra handling, except for vectorizable reductions. */
7225 if (STMT_VINFO_LIVE_P (stmt_info)
7226 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7227 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 7228
8644a673 7229 if (!ok)
ebfd146a 7230 {
73fbfcad 7231 if (dump_enabled_p ())
8644a673 7232 {
78c60e3d
SS
7233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7234 "not vectorized: live stmt not ");
7235 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7236 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7237 }
b8698a0f 7238
8644a673 7239 return false;
ebfd146a
IR
7240 }
7241
ebfd146a
IR
7242 return true;
7243}
7244
7245
7246/* Function vect_transform_stmt.
7247
7248 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7249
7250bool
7251vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 7252 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
7253 slp_instance slp_node_instance)
7254{
7255 bool is_store = false;
7256 gimple vec_stmt = NULL;
7257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 7258 bool done;
ebfd146a
IR
7259
7260 switch (STMT_VINFO_TYPE (stmt_info))
7261 {
7262 case type_demotion_vec_info_type:
ebfd146a 7263 case type_promotion_vec_info_type:
ebfd146a
IR
7264 case type_conversion_vec_info_type:
7265 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7266 gcc_assert (done);
7267 break;
7268
7269 case induc_vec_info_type:
7270 gcc_assert (!slp_node);
7271 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7272 gcc_assert (done);
7273 break;
7274
9dc3f7de
IR
7275 case shift_vec_info_type:
7276 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7277 gcc_assert (done);
7278 break;
7279
ebfd146a
IR
7280 case op_vec_info_type:
7281 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7282 gcc_assert (done);
7283 break;
7284
7285 case assignment_vec_info_type:
7286 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7287 gcc_assert (done);
7288 break;
7289
7290 case load_vec_info_type:
b8698a0f 7291 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7292 slp_node_instance);
7293 gcc_assert (done);
7294 break;
7295
7296 case store_vec_info_type:
7297 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7298 gcc_assert (done);
0d0293ac 7299 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7300 {
7301 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7302 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7303 one are skipped, and there vec_stmt_info shouldn't be freed
7304 meanwhile. */
0d0293ac 7305 *grouped_store = true;
ebfd146a
IR
7306 if (STMT_VINFO_VEC_STMT (stmt_info))
7307 is_store = true;
7308 }
7309 else
7310 is_store = true;
7311 break;
7312
7313 case condition_vec_info_type:
f7e531cf 7314 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7315 gcc_assert (done);
7316 break;
7317
7318 case call_vec_info_type:
190c2236 7319 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7320 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7321 if (is_gimple_call (stmt)
7322 && gimple_call_internal_p (stmt)
7323 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7324 is_store = true;
ebfd146a
IR
7325 break;
7326
0136f8f0
AH
7327 case call_simd_clone_vec_info_type:
7328 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7329 stmt = gsi_stmt (*gsi);
7330 break;
7331
ebfd146a 7332 case reduc_vec_info_type:
b5aeb3bb 7333 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7334 gcc_assert (done);
7335 break;
7336
7337 default:
7338 if (!STMT_VINFO_LIVE_P (stmt_info))
7339 {
73fbfcad 7340 if (dump_enabled_p ())
78c60e3d 7341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7342 "stmt not supported.\n");
ebfd146a
IR
7343 gcc_unreachable ();
7344 }
7345 }
7346
7347 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7348 is being vectorized, but outside the immediately enclosing loop. */
7349 if (vec_stmt
a70d6342
IR
7350 && STMT_VINFO_LOOP_VINFO (stmt_info)
7351 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7352 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7353 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7354 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7355 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7356 vect_used_in_outer_by_reduction))
ebfd146a 7357 {
a70d6342
IR
7358 struct loop *innerloop = LOOP_VINFO_LOOP (
7359 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7360 imm_use_iterator imm_iter;
7361 use_operand_p use_p;
7362 tree scalar_dest;
7363 gimple exit_phi;
7364
73fbfcad 7365 if (dump_enabled_p ())
78c60e3d 7366 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7367 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7368
7369 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7370 (to be used when vectorizing outer-loop stmts that use the DEF of
7371 STMT). */
7372 if (gimple_code (stmt) == GIMPLE_PHI)
7373 scalar_dest = PHI_RESULT (stmt);
7374 else
7375 scalar_dest = gimple_assign_lhs (stmt);
7376
7377 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7378 {
7379 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7380 {
7381 exit_phi = USE_STMT (use_p);
7382 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7383 }
7384 }
7385 }
7386
7387 /* Handle stmts whose DEF is used outside the loop-nest that is
7388 being vectorized. */
7389 if (STMT_VINFO_LIVE_P (stmt_info)
7390 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7391 {
7392 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7393 gcc_assert (done);
7394 }
7395
7396 if (vec_stmt)
83197f37 7397 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7398
b8698a0f 7399 return is_store;
ebfd146a
IR
7400}
7401
7402
b8698a0f 7403/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7404 stmt_vec_info. */
7405
7406void
7407vect_remove_stores (gimple first_stmt)
7408{
7409 gimple next = first_stmt;
7410 gimple tmp;
7411 gimple_stmt_iterator next_si;
7412
7413 while (next)
7414 {
78048b1c
JJ
7415 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7416
7417 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7418 if (is_pattern_stmt_p (stmt_info))
7419 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7420 /* Free the attached stmt_vec_info and remove the stmt. */
7421 next_si = gsi_for_stmt (next);
3d3f2249 7422 unlink_stmt_vdef (next);
ebfd146a 7423 gsi_remove (&next_si, true);
3d3f2249 7424 release_defs (next);
ebfd146a
IR
7425 free_stmt_vec_info (next);
7426 next = tmp;
7427 }
7428}
7429
7430
7431/* Function new_stmt_vec_info.
7432
7433 Create and initialize a new stmt_vec_info struct for STMT. */
7434
7435stmt_vec_info
b8698a0f 7436new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7437 bb_vec_info bb_vinfo)
ebfd146a
IR
7438{
7439 stmt_vec_info res;
7440 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7441
7442 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7443 STMT_VINFO_STMT (res) = stmt;
7444 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7445 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7446 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7447 STMT_VINFO_LIVE_P (res) = false;
7448 STMT_VINFO_VECTYPE (res) = NULL;
7449 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7450 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7451 STMT_VINFO_IN_PATTERN_P (res) = false;
7452 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7453 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7454 STMT_VINFO_DATA_REF (res) = NULL;
7455
7456 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7457 STMT_VINFO_DR_OFFSET (res) = NULL;
7458 STMT_VINFO_DR_INIT (res) = NULL;
7459 STMT_VINFO_DR_STEP (res) = NULL;
7460 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7461
7462 if (gimple_code (stmt) == GIMPLE_PHI
7463 && is_loop_header_bb_p (gimple_bb (stmt)))
7464 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7465 else
8644a673
IR
7466 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7467
9771b263 7468 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7469 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7470 GROUP_FIRST_ELEMENT (res) = NULL;
7471 GROUP_NEXT_ELEMENT (res) = NULL;
7472 GROUP_SIZE (res) = 0;
7473 GROUP_STORE_COUNT (res) = 0;
7474 GROUP_GAP (res) = 0;
7475 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7476
7477 return res;
7478}
7479
7480
7481/* Create a hash table for stmt_vec_info. */
7482
7483void
7484init_stmt_vec_info_vec (void)
7485{
9771b263
DN
7486 gcc_assert (!stmt_vec_info_vec.exists ());
7487 stmt_vec_info_vec.create (50);
ebfd146a
IR
7488}
7489
7490
7491/* Free hash table for stmt_vec_info. */
7492
7493void
7494free_stmt_vec_info_vec (void)
7495{
93675444
JJ
7496 unsigned int i;
7497 vec_void_p info;
7498 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7499 if (info != NULL)
7500 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7501 gcc_assert (stmt_vec_info_vec.exists ());
7502 stmt_vec_info_vec.release ();
ebfd146a
IR
7503}
7504
7505
7506/* Free stmt vectorization related info. */
7507
7508void
7509free_stmt_vec_info (gimple stmt)
7510{
7511 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7512
7513 if (!stmt_info)
7514 return;
7515
78048b1c
JJ
7516 /* Check if this statement has a related "pattern stmt"
7517 (introduced by the vectorizer during the pattern recognition
7518 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7519 too. */
7520 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7521 {
7522 stmt_vec_info patt_info
7523 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7524 if (patt_info)
7525 {
363477c0 7526 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
f0281fde
RB
7527 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7528 gimple_set_bb (patt_stmt, NULL);
7529 tree lhs = gimple_get_lhs (patt_stmt);
7530 if (TREE_CODE (lhs) == SSA_NAME)
7531 release_ssa_name (lhs);
363477c0
JJ
7532 if (seq)
7533 {
7534 gimple_stmt_iterator si;
7535 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde
RB
7536 {
7537 gimple seq_stmt = gsi_stmt (si);
7538 gimple_set_bb (seq_stmt, NULL);
7539 lhs = gimple_get_lhs (patt_stmt);
7540 if (TREE_CODE (lhs) == SSA_NAME)
7541 release_ssa_name (lhs);
7542 free_stmt_vec_info (seq_stmt);
7543 }
363477c0 7544 }
f0281fde 7545 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
7546 }
7547 }
7548
9771b263 7549 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 7550 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
7551 set_vinfo_for_stmt (stmt, NULL);
7552 free (stmt_info);
7553}
7554
7555
bb67d9c7 7556/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7557
bb67d9c7 7558 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7559 by the target. */
7560
bb67d9c7
RG
7561static tree
7562get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 7563{
ef4bddc2
RS
7564 machine_mode inner_mode = TYPE_MODE (scalar_type);
7565 machine_mode simd_mode;
2f816591 7566 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7567 int nunits;
7568 tree vectype;
7569
cc4b5170 7570 if (nbytes == 0)
ebfd146a
IR
7571 return NULL_TREE;
7572
48f2e373
RB
7573 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7574 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7575 return NULL_TREE;
7576
7b7b1813
RG
7577 /* For vector types of elements whose mode precision doesn't
7578 match their types precision we use a element type of mode
7579 precision. The vectorization routines will have to make sure
48f2e373
RB
7580 they support the proper result truncation/extension.
7581 We also make sure to build vector types with INTEGER_TYPE
7582 component type only. */
6d7971b8 7583 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7584 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7585 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7586 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7587 TYPE_UNSIGNED (scalar_type));
6d7971b8 7588
ccbf5bb4
RG
7589 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7590 When the component mode passes the above test simply use a type
7591 corresponding to that mode. The theory is that any use that
7592 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7593 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7594 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7595 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7596
7597 /* We can't build a vector type of elements with alignment bigger than
7598 their size. */
dfc2e2ac 7599 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7600 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7601 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7602
dfc2e2ac
RB
7603 /* If we felt back to using the mode fail if there was
7604 no scalar type for it. */
7605 if (scalar_type == NULL_TREE)
7606 return NULL_TREE;
7607
bb67d9c7
RG
7608 /* If no size was supplied use the mode the target prefers. Otherwise
7609 lookup a vector mode of the specified size. */
7610 if (size == 0)
7611 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7612 else
7613 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7614 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7615 if (nunits <= 1)
7616 return NULL_TREE;
ebfd146a
IR
7617
7618 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7619
7620 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7621 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7622 return NULL_TREE;
ebfd146a
IR
7623
7624 return vectype;
7625}
7626
bb67d9c7
RG
7627unsigned int current_vector_size;
7628
7629/* Function get_vectype_for_scalar_type.
7630
7631 Returns the vector type corresponding to SCALAR_TYPE as supported
7632 by the target. */
7633
7634tree
7635get_vectype_for_scalar_type (tree scalar_type)
7636{
7637 tree vectype;
7638 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7639 current_vector_size);
7640 if (vectype
7641 && current_vector_size == 0)
7642 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7643 return vectype;
7644}
7645
b690cc0f
RG
7646/* Function get_same_sized_vectype
7647
7648 Returns a vector type corresponding to SCALAR_TYPE of size
7649 VECTOR_TYPE if supported by the target. */
7650
7651tree
bb67d9c7 7652get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7653{
bb67d9c7
RG
7654 return get_vectype_for_scalar_type_and_size
7655 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7656}
7657
ebfd146a
IR
7658/* Function vect_is_simple_use.
7659
7660 Input:
a70d6342
IR
7661 LOOP_VINFO - the vect info of the loop that is being vectorized.
7662 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7663 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7664 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7665
7666 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7667 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7668 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7669 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7670 is the case in reduction/induction computations).
7671 For basic blocks, supportable operands are constants and bb invariants.
7672 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7673
7674bool
24ee1384 7675vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7676 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7677 tree *def, enum vect_def_type *dt)
b8698a0f 7678{
ebfd146a
IR
7679 basic_block bb;
7680 stmt_vec_info stmt_vinfo;
a70d6342 7681 struct loop *loop = NULL;
b8698a0f 7682
a70d6342
IR
7683 if (loop_vinfo)
7684 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
7685
7686 *def_stmt = NULL;
7687 *def = NULL_TREE;
b8698a0f 7688
73fbfcad 7689 if (dump_enabled_p ())
ebfd146a 7690 {
78c60e3d
SS
7691 dump_printf_loc (MSG_NOTE, vect_location,
7692 "vect_is_simple_use: operand ");
7693 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7694 dump_printf (MSG_NOTE, "\n");
ebfd146a 7695 }
b8698a0f 7696
b758f602 7697 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7698 {
7699 *dt = vect_constant_def;
7700 return true;
7701 }
b8698a0f 7702
ebfd146a
IR
7703 if (is_gimple_min_invariant (operand))
7704 {
7705 *def = operand;
8644a673 7706 *dt = vect_external_def;
ebfd146a
IR
7707 return true;
7708 }
7709
7710 if (TREE_CODE (operand) == PAREN_EXPR)
7711 {
73fbfcad 7712 if (dump_enabled_p ())
e645e942 7713 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
7714 operand = TREE_OPERAND (operand, 0);
7715 }
b8698a0f 7716
ebfd146a
IR
7717 if (TREE_CODE (operand) != SSA_NAME)
7718 {
73fbfcad 7719 if (dump_enabled_p ())
78c60e3d 7720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7721 "not ssa-name.\n");
ebfd146a
IR
7722 return false;
7723 }
b8698a0f 7724
ebfd146a
IR
7725 *def_stmt = SSA_NAME_DEF_STMT (operand);
7726 if (*def_stmt == NULL)
7727 {
73fbfcad 7728 if (dump_enabled_p ())
78c60e3d 7729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7730 "no def_stmt.\n");
ebfd146a
IR
7731 return false;
7732 }
7733
73fbfcad 7734 if (dump_enabled_p ())
ebfd146a 7735 {
78c60e3d
SS
7736 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7737 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
7738 }
7739
8644a673 7740 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
7741 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7742 if (gimple_nop_p (*def_stmt))
7743 {
7744 *def = operand;
8644a673 7745 *dt = vect_external_def;
ebfd146a
IR
7746 return true;
7747 }
7748
7749 bb = gimple_bb (*def_stmt);
a70d6342
IR
7750
7751 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7752 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 7753 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 7754 *dt = vect_external_def;
ebfd146a
IR
7755 else
7756 {
7757 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7758 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7759 }
7760
24ee1384
IR
7761 if (*dt == vect_unknown_def_type
7762 || (stmt
7763 && *dt == vect_double_reduction_def
7764 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 7765 {
73fbfcad 7766 if (dump_enabled_p ())
78c60e3d 7767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7768 "Unsupported pattern.\n");
ebfd146a
IR
7769 return false;
7770 }
7771
73fbfcad 7772 if (dump_enabled_p ())
e645e942 7773 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
7774
7775 switch (gimple_code (*def_stmt))
7776 {
7777 case GIMPLE_PHI:
7778 *def = gimple_phi_result (*def_stmt);
7779 break;
7780
7781 case GIMPLE_ASSIGN:
7782 *def = gimple_assign_lhs (*def_stmt);
7783 break;
7784
7785 case GIMPLE_CALL:
7786 *def = gimple_call_lhs (*def_stmt);
7787 if (*def != NULL)
7788 break;
7789 /* FALLTHRU */
7790 default:
73fbfcad 7791 if (dump_enabled_p ())
78c60e3d 7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7793 "unsupported defining stmt:\n");
ebfd146a
IR
7794 return false;
7795 }
7796
7797 return true;
7798}
7799
b690cc0f
RG
7800/* Function vect_is_simple_use_1.
7801
7802 Same as vect_is_simple_use_1 but also determines the vector operand
7803 type of OPERAND and stores it to *VECTYPE. If the definition of
7804 OPERAND is vect_uninitialized_def, vect_constant_def or
7805 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7806 is responsible to compute the best suited vector type for the
7807 scalar operand. */
7808
7809bool
24ee1384 7810vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
7811 bb_vec_info bb_vinfo, gimple *def_stmt,
7812 tree *def, enum vect_def_type *dt, tree *vectype)
7813{
24ee1384
IR
7814 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7815 def, dt))
b690cc0f
RG
7816 return false;
7817
7818 /* Now get a vector type if the def is internal, otherwise supply
7819 NULL_TREE and leave it up to the caller to figure out a proper
7820 type for the use stmt. */
7821 if (*dt == vect_internal_def
7822 || *dt == vect_induction_def
7823 || *dt == vect_reduction_def
7824 || *dt == vect_double_reduction_def
7825 || *dt == vect_nested_cycle)
7826 {
7827 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
7828
7829 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7830 && !STMT_VINFO_RELEVANT (stmt_info)
7831 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 7832 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 7833
b690cc0f
RG
7834 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7835 gcc_assert (*vectype != NULL_TREE);
7836 }
7837 else if (*dt == vect_uninitialized_def
7838 || *dt == vect_constant_def
7839 || *dt == vect_external_def)
7840 *vectype = NULL_TREE;
7841 else
7842 gcc_unreachable ();
7843
7844 return true;
7845}
7846
ebfd146a
IR
7847
7848/* Function supportable_widening_operation
7849
b8698a0f
L
7850 Check whether an operation represented by the code CODE is a
7851 widening operation that is supported by the target platform in
b690cc0f
RG
7852 vector form (i.e., when operating on arguments of type VECTYPE_IN
7853 producing a result of type VECTYPE_OUT).
b8698a0f 7854
ebfd146a
IR
7855 Widening operations we currently support are NOP (CONVERT), FLOAT
7856 and WIDEN_MULT. This function checks if these operations are supported
7857 by the target platform either directly (via vector tree-codes), or via
7858 target builtins.
7859
7860 Output:
b8698a0f
L
7861 - CODE1 and CODE2 are codes of vector operations to be used when
7862 vectorizing the operation, if available.
ebfd146a
IR
7863 - MULTI_STEP_CVT determines the number of required intermediate steps in
7864 case of multi-step conversion (like char->short->int - in that case
7865 MULTI_STEP_CVT will be 1).
b8698a0f
L
7866 - INTERM_TYPES contains the intermediate type required to perform the
7867 widening operation (short in the above example). */
ebfd146a
IR
7868
7869bool
b690cc0f
RG
7870supportable_widening_operation (enum tree_code code, gimple stmt,
7871 tree vectype_out, tree vectype_in,
ebfd146a
IR
7872 enum tree_code *code1, enum tree_code *code2,
7873 int *multi_step_cvt,
9771b263 7874 vec<tree> *interm_types)
ebfd146a
IR
7875{
7876 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7877 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 7878 struct loop *vect_loop = NULL;
ef4bddc2 7879 machine_mode vec_mode;
81f40b79 7880 enum insn_code icode1, icode2;
ebfd146a 7881 optab optab1, optab2;
b690cc0f
RG
7882 tree vectype = vectype_in;
7883 tree wide_vectype = vectype_out;
ebfd146a 7884 enum tree_code c1, c2;
4a00c761
JJ
7885 int i;
7886 tree prev_type, intermediate_type;
ef4bddc2 7887 machine_mode intermediate_mode, prev_mode;
4a00c761 7888 optab optab3, optab4;
ebfd146a 7889
4a00c761 7890 *multi_step_cvt = 0;
4ef69dfc
IR
7891 if (loop_info)
7892 vect_loop = LOOP_VINFO_LOOP (loop_info);
7893
ebfd146a
IR
7894 switch (code)
7895 {
7896 case WIDEN_MULT_EXPR:
6ae6116f
RH
7897 /* The result of a vectorized widening operation usually requires
7898 two vectors (because the widened results do not fit into one vector).
7899 The generated vector results would normally be expected to be
7900 generated in the same order as in the original scalar computation,
7901 i.e. if 8 results are generated in each vector iteration, they are
7902 to be organized as follows:
7903 vect1: [res1,res2,res3,res4],
7904 vect2: [res5,res6,res7,res8].
7905
7906 However, in the special case that the result of the widening
7907 operation is used in a reduction computation only, the order doesn't
7908 matter (because when vectorizing a reduction we change the order of
7909 the computation). Some targets can take advantage of this and
7910 generate more efficient code. For example, targets like Altivec,
7911 that support widen_mult using a sequence of {mult_even,mult_odd}
7912 generate the following vectors:
7913 vect1: [res1,res3,res5,res7],
7914 vect2: [res2,res4,res6,res8].
7915
7916 When vectorizing outer-loops, we execute the inner-loop sequentially
7917 (each vectorized inner-loop iteration contributes to VF outer-loop
7918 iterations in parallel). We therefore don't allow to change the
7919 order of the computation in the inner-loop during outer-loop
7920 vectorization. */
7921 /* TODO: Another case in which order doesn't *really* matter is when we
7922 widen and then contract again, e.g. (short)((int)x * y >> 8).
7923 Normally, pack_trunc performs an even/odd permute, whereas the
7924 repack from an even/odd expansion would be an interleave, which
7925 would be significantly simpler for e.g. AVX2. */
7926 /* In any case, in order to avoid duplicating the code below, recurse
7927 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7928 are properly set up for the caller. If we fail, we'll continue with
7929 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7930 if (vect_loop
7931 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7932 && !nested_in_vect_loop_p (vect_loop, stmt)
7933 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7934 stmt, vectype_out, vectype_in,
a86ec597
RH
7935 code1, code2, multi_step_cvt,
7936 interm_types))
ebc047a2
CH
7937 {
7938 /* Elements in a vector with vect_used_by_reduction property cannot
7939 be reordered if the use chain with this property does not have the
7940 same operation. One such an example is s += a * b, where elements
7941 in a and b cannot be reordered. Here we check if the vector defined
7942 by STMT is only directly used in the reduction statement. */
7943 tree lhs = gimple_assign_lhs (stmt);
7944 use_operand_p dummy;
7945 gimple use_stmt;
7946 stmt_vec_info use_stmt_info = NULL;
7947 if (single_imm_use (lhs, &dummy, &use_stmt)
7948 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7949 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7950 return true;
7951 }
4a00c761
JJ
7952 c1 = VEC_WIDEN_MULT_LO_EXPR;
7953 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
7954 break;
7955
6ae6116f
RH
7956 case VEC_WIDEN_MULT_EVEN_EXPR:
7957 /* Support the recursion induced just above. */
7958 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7959 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7960 break;
7961
36ba4aae 7962 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
7963 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7964 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
7965 break;
7966
ebfd146a 7967 CASE_CONVERT:
4a00c761
JJ
7968 c1 = VEC_UNPACK_LO_EXPR;
7969 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
7970 break;
7971
7972 case FLOAT_EXPR:
4a00c761
JJ
7973 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7974 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
7975 break;
7976
7977 case FIX_TRUNC_EXPR:
7978 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7979 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7980 computing the operation. */
7981 return false;
7982
7983 default:
7984 gcc_unreachable ();
7985 }
7986
6ae6116f 7987 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
7988 {
7989 enum tree_code ctmp = c1;
7990 c1 = c2;
7991 c2 = ctmp;
7992 }
7993
ebfd146a
IR
7994 if (code == FIX_TRUNC_EXPR)
7995 {
7996 /* The signedness is determined from output operand. */
b690cc0f
RG
7997 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7998 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
7999 }
8000 else
8001 {
8002 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8003 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8004 }
8005
8006 if (!optab1 || !optab2)
8007 return false;
8008
8009 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8010 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8011 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8012 return false;
8013
4a00c761
JJ
8014 *code1 = c1;
8015 *code2 = c2;
8016
8017 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8018 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8019 return true;
8020
b8698a0f 8021 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8022 types. */
ebfd146a 8023
4a00c761
JJ
8024 prev_type = vectype;
8025 prev_mode = vec_mode;
b8698a0f 8026
4a00c761
JJ
8027 if (!CONVERT_EXPR_CODE_P (code))
8028 return false;
b8698a0f 8029
4a00c761
JJ
8030 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8031 intermediate steps in promotion sequence. We try
8032 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8033 not. */
9771b263 8034 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8035 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8036 {
8037 intermediate_mode = insn_data[icode1].operand[0].mode;
8038 intermediate_type
8039 = lang_hooks.types.type_for_mode (intermediate_mode,
8040 TYPE_UNSIGNED (prev_type));
8041 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8042 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8043
8044 if (!optab3 || !optab4
8045 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8046 || insn_data[icode1].operand[0].mode != intermediate_mode
8047 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8048 || insn_data[icode2].operand[0].mode != intermediate_mode
8049 || ((icode1 = optab_handler (optab3, intermediate_mode))
8050 == CODE_FOR_nothing)
8051 || ((icode2 = optab_handler (optab4, intermediate_mode))
8052 == CODE_FOR_nothing))
8053 break;
ebfd146a 8054
9771b263 8055 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8056 (*multi_step_cvt)++;
8057
8058 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8059 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8060 return true;
8061
8062 prev_type = intermediate_type;
8063 prev_mode = intermediate_mode;
ebfd146a
IR
8064 }
8065
9771b263 8066 interm_types->release ();
4a00c761 8067 return false;
ebfd146a
IR
8068}
8069
8070
8071/* Function supportable_narrowing_operation
8072
b8698a0f
L
8073 Check whether an operation represented by the code CODE is a
8074 narrowing operation that is supported by the target platform in
b690cc0f
RG
8075 vector form (i.e., when operating on arguments of type VECTYPE_IN
8076 and producing a result of type VECTYPE_OUT).
b8698a0f 8077
ebfd146a 8078 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 8079 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
8080 the target platform directly via vector tree-codes.
8081
8082 Output:
b8698a0f
L
8083 - CODE1 is the code of a vector operation to be used when
8084 vectorizing the operation, if available.
ebfd146a
IR
8085 - MULTI_STEP_CVT determines the number of required intermediate steps in
8086 case of multi-step conversion (like int->short->char - in that case
8087 MULTI_STEP_CVT will be 1).
8088 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 8089 narrowing operation (short in the above example). */
ebfd146a
IR
8090
8091bool
8092supportable_narrowing_operation (enum tree_code code,
b690cc0f 8093 tree vectype_out, tree vectype_in,
ebfd146a 8094 enum tree_code *code1, int *multi_step_cvt,
9771b263 8095 vec<tree> *interm_types)
ebfd146a 8096{
ef4bddc2 8097 machine_mode vec_mode;
ebfd146a
IR
8098 enum insn_code icode1;
8099 optab optab1, interm_optab;
b690cc0f
RG
8100 tree vectype = vectype_in;
8101 tree narrow_vectype = vectype_out;
ebfd146a 8102 enum tree_code c1;
4a00c761 8103 tree intermediate_type;
ef4bddc2 8104 machine_mode intermediate_mode, prev_mode;
ebfd146a 8105 int i;
4a00c761 8106 bool uns;
ebfd146a 8107
4a00c761 8108 *multi_step_cvt = 0;
ebfd146a
IR
8109 switch (code)
8110 {
8111 CASE_CONVERT:
8112 c1 = VEC_PACK_TRUNC_EXPR;
8113 break;
8114
8115 case FIX_TRUNC_EXPR:
8116 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8117 break;
8118
8119 case FLOAT_EXPR:
8120 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8121 tree code and optabs used for computing the operation. */
8122 return false;
8123
8124 default:
8125 gcc_unreachable ();
8126 }
8127
8128 if (code == FIX_TRUNC_EXPR)
8129 /* The signedness is determined from output operand. */
b690cc0f 8130 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
8131 else
8132 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8133
8134 if (!optab1)
8135 return false;
8136
8137 vec_mode = TYPE_MODE (vectype);
947131ba 8138 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8139 return false;
8140
4a00c761
JJ
8141 *code1 = c1;
8142
8143 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8144 return true;
8145
ebfd146a
IR
8146 /* Check if it's a multi-step conversion that can be done using intermediate
8147 types. */
4a00c761
JJ
8148 prev_mode = vec_mode;
8149 if (code == FIX_TRUNC_EXPR)
8150 uns = TYPE_UNSIGNED (vectype_out);
8151 else
8152 uns = TYPE_UNSIGNED (vectype);
8153
8154 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8155 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8156 costly than signed. */
8157 if (code == FIX_TRUNC_EXPR && uns)
8158 {
8159 enum insn_code icode2;
8160
8161 intermediate_type
8162 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8163 interm_optab
8164 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 8165 if (interm_optab != unknown_optab
4a00c761
JJ
8166 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8167 && insn_data[icode1].operand[0].mode
8168 == insn_data[icode2].operand[0].mode)
8169 {
8170 uns = false;
8171 optab1 = interm_optab;
8172 icode1 = icode2;
8173 }
8174 }
ebfd146a 8175
4a00c761
JJ
8176 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8177 intermediate steps in promotion sequence. We try
8178 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 8179 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8180 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8181 {
8182 intermediate_mode = insn_data[icode1].operand[0].mode;
8183 intermediate_type
8184 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8185 interm_optab
8186 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8187 optab_default);
8188 if (!interm_optab
8189 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8190 || insn_data[icode1].operand[0].mode != intermediate_mode
8191 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8192 == CODE_FOR_nothing))
8193 break;
8194
9771b263 8195 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8196 (*multi_step_cvt)++;
8197
8198 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8199 return true;
8200
8201 prev_mode = intermediate_mode;
8202 optab1 = interm_optab;
ebfd146a
IR
8203 }
8204
9771b263 8205 interm_types->release ();
4a00c761 8206 return false;
ebfd146a 8207}