]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
arc.md (doloop_begin_i): Remove extra alignment; use (.&-4) idiom.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
45b0be94 32#include "gimplify.h"
5be5c238 33#include "gimple-iterator.h"
442b4905
AM
34#include "gimple-ssa.h"
35#include "tree-cfg.h"
36#include "tree-phinodes.h"
37#include "ssa-iterators.h"
38#include "tree-ssanames.h"
e28030cf 39#include "tree-ssa-loop-manip.h"
ebfd146a 40#include "cfgloop.h"
ebfd146a 41#include "expr.h"
7ee2468b 42#include "recog.h" /* FIXME: for insn_data */
ebfd146a 43#include "optabs.h"
718f9c0f 44#include "diagnostic-core.h"
ebfd146a 45#include "tree-vectorizer.h"
7ee2468b 46#include "dumpfile.h"
ebfd146a 47
7ee2468b
SB
48/* For lang_hooks.types.type_for_mode. */
49#include "langhooks.h"
ebfd146a 50
c3e7ee41
BS
51/* Return the vectorized type for the given statement. */
52
53tree
54stmt_vectype (struct _stmt_vec_info *stmt_info)
55{
56 return STMT_VINFO_VECTYPE (stmt_info);
57}
58
59/* Return TRUE iff the given statement is in an inner loop relative to
60 the loop being vectorized. */
61bool
62stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
63{
64 gimple stmt = STMT_VINFO_STMT (stmt_info);
65 basic_block bb = gimple_bb (stmt);
66 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
67 struct loop* loop;
68
69 if (!loop_vinfo)
70 return false;
71
72 loop = LOOP_VINFO_LOOP (loop_vinfo);
73
74 return (bb->loop_father == loop->inner);
75}
76
77/* Record the cost of a statement, either by directly informing the
78 target model or by saving it in a vector for later processing.
79 Return a preliminary estimate of the statement's cost. */
80
81unsigned
92345349 82record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 83 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 84 int misalign, enum vect_cost_model_location where)
c3e7ee41 85{
92345349 86 if (body_cost_vec)
c3e7ee41 87 {
92345349
BS
88 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
89 add_stmt_info_to_vec (body_cost_vec, count, kind,
90 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
91 misalign);
c3e7ee41 92 return (unsigned)
92345349 93 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
94
95 }
96 else
97 {
98 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
99 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
100 void *target_cost_data;
101
102 if (loop_vinfo)
103 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
104 else
105 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
106
92345349
BS
107 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
108 misalign, where);
c3e7ee41
BS
109 }
110}
111
272c6793
RS
112/* Return a variable of type ELEM_TYPE[NELEMS]. */
113
114static tree
115create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116{
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
119}
120
121/* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT and the vector is associated
124 with scalar destination SCALAR_DEST. */
125
126static tree
127read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
128 tree array, unsigned HOST_WIDE_INT n)
129{
130 tree vect_type, vect, vect_name, array_ref;
131 gimple new_stmt;
132
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
139
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
144
145 return vect_name;
146}
147
148/* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT. */
151
152static void
153write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
154 tree array, unsigned HOST_WIDE_INT n)
155{
156 tree array_ref;
157 gimple new_stmt;
158
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
162
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
165}
166
167/* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
170
171static tree
172create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
173{
272c6793
RS
174 tree mem_ref, alias_ptr_type;
175
176 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
644ffefd 179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
180 return mem_ref;
181}
182
ebfd146a
IR
183/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184
185/* Function vect_mark_relevant.
186
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
188
189static void
9771b263 190vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
191 enum vect_relevant relevant, bool live_p,
192 bool used_in_pattern)
ebfd146a
IR
193{
194 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
195 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
196 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 197 gimple pattern_stmt;
ebfd146a 198
73fbfcad 199 if (dump_enabled_p ())
78c60e3d 200 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 201 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 202
83197f37
IR
203 /* If this stmt is an original stmt in a pattern, we might need to mark its
204 related pattern stmt instead of the original stmt. However, such stmts
205 may have their own uses that are not in any pattern, in such cases the
206 stmt itself should be marked. */
ebfd146a
IR
207 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
208 {
83197f37
IR
209 bool found = false;
210 if (!used_in_pattern)
211 {
212 imm_use_iterator imm_iter;
213 use_operand_p use_p;
214 gimple use_stmt;
215 tree lhs;
13c931c9
JJ
216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
217 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 218
83197f37
IR
219 if (is_gimple_assign (stmt))
220 lhs = gimple_assign_lhs (stmt);
221 else
222 lhs = gimple_call_lhs (stmt);
ebfd146a 223
83197f37
IR
224 /* This use is out of pattern use, if LHS has other uses that are
225 pattern uses, we should mark the stmt itself, and not the pattern
226 stmt. */
ab0ef706
JJ
227 if (TREE_CODE (lhs) == SSA_NAME)
228 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
229 {
230 if (is_gimple_debug (USE_STMT (use_p)))
231 continue;
232 use_stmt = USE_STMT (use_p);
233
13c931c9
JJ
234 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
235 continue;
236
ab0ef706
JJ
237 if (vinfo_for_stmt (use_stmt)
238 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
239 {
240 found = true;
241 break;
242 }
243 }
83197f37
IR
244 }
245
246 if (!found)
247 {
248 /* This is the last stmt in a sequence that was detected as a
249 pattern that can potentially be vectorized. Don't mark the stmt
250 as relevant/live because it's not going to be vectorized.
251 Instead mark the pattern-stmt that replaces it. */
252
253 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
254
73fbfcad 255 if (dump_enabled_p ())
78c60e3d
SS
256 dump_printf_loc (MSG_NOTE, vect_location,
257 "last stmt in pattern. don't mark"
e645e942 258 " relevant/live.\n");
83197f37
IR
259 stmt_info = vinfo_for_stmt (pattern_stmt);
260 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
261 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
262 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
263 stmt = pattern_stmt;
264 }
ebfd146a
IR
265 }
266
267 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
268 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
269 STMT_VINFO_RELEVANT (stmt_info) = relevant;
270
271 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
272 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
273 {
73fbfcad 274 if (dump_enabled_p ())
78c60e3d 275 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 276 "already marked relevant/live.\n");
ebfd146a
IR
277 return;
278 }
279
9771b263 280 worklist->safe_push (stmt);
ebfd146a
IR
281}
282
283
284/* Function vect_stmt_relevant_p.
285
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296static bool
297vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
299{
300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
8644a673 306 *relevant = vect_unused_in_scope;
ebfd146a
IR
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
b8698a0f
L
310 if (is_ctrl_stmt (stmt)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
312 != loop_exit_ctrl_vec_info_type)
8644a673 313 *relevant = vect_used_in_scope;
ebfd146a
IR
314
315 /* changing memory. */
316 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 317 if (gimple_vdef (stmt))
ebfd146a 318 {
73fbfcad 319 if (dump_enabled_p ())
78c60e3d 320 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 321 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 322 *relevant = vect_used_in_scope;
ebfd146a
IR
323 }
324
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
327 {
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 {
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
332 {
73fbfcad 333 if (dump_enabled_p ())
78c60e3d 334 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 335 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 336
3157b0c2
AO
337 if (is_gimple_debug (USE_STMT (use_p)))
338 continue;
339
ebfd146a
IR
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
344
345 *live_p = true;
346 }
347 }
348 }
349
350 return (*live_p || *relevant);
351}
352
353
b8698a0f 354/* Function exist_non_indexing_operands_for_use_p
ebfd146a 355
ff802fa1 356 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
357 used in STMT for anything other than indexing an array. */
358
359static bool
360exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
361{
362 tree operand;
363 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 364
ff802fa1 365 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
366 reference in STMT, then any operand that corresponds to USE
367 is not indexing an array. */
368 if (!STMT_VINFO_DATA_REF (stmt_info))
369 return true;
59a05b0c 370
ebfd146a
IR
371 /* STMT has a data_ref. FORNOW this means that its of one of
372 the following forms:
373 -1- ARRAY_REF = var
374 -2- var = ARRAY_REF
375 (This should have been verified in analyze_data_refs).
376
377 'var' in the second case corresponds to a def, not a use,
b8698a0f 378 so USE cannot correspond to any operands that are not used
ebfd146a
IR
379 for array indexing.
380
381 Therefore, all we need to check is if STMT falls into the
382 first case, and whether var corresponds to USE. */
ebfd146a
IR
383
384 if (!gimple_assign_copy_p (stmt))
385 return false;
59a05b0c
EB
386 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
387 return false;
ebfd146a 388 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
389 if (TREE_CODE (operand) != SSA_NAME)
390 return false;
391
392 if (operand == use)
393 return true;
394
395 return false;
396}
397
398
b8698a0f 399/*
ebfd146a
IR
400 Function process_use.
401
402 Inputs:
403 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 404 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 405 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 406 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
407 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
408 be performed.
ebfd146a
IR
409
410 Outputs:
411 Generally, LIVE_P and RELEVANT are used to define the liveness and
412 relevance info of the DEF_STMT of this USE:
413 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
414 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
415 Exceptions:
416 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 417 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 418 of the respective DEF_STMT is left unchanged.
b8698a0f
L
419 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
420 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
421 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
422 be modified accordingly.
423
424 Return true if everything is as expected. Return false otherwise. */
425
426static bool
b8698a0f 427process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 428 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 429 bool force)
ebfd146a
IR
430{
431 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
432 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
433 stmt_vec_info dstmt_vinfo;
434 basic_block bb, def_bb;
435 tree def;
436 gimple def_stmt;
437 enum vect_def_type dt;
438
b8698a0f 439 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 440 that are used for address computation are not considered relevant. */
aec7ae7d 441 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
442 return true;
443
24ee1384 444 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 445 {
73fbfcad 446 if (dump_enabled_p ())
78c60e3d 447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 448 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
449 return false;
450 }
451
452 if (!def_stmt || gimple_nop_p (def_stmt))
453 return true;
454
455 def_bb = gimple_bb (def_stmt);
456 if (!flow_bb_inside_loop_p (loop, def_bb))
457 {
73fbfcad 458 if (dump_enabled_p ())
e645e942 459 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
460 return true;
461 }
462
b8698a0f
L
463 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
464 DEF_STMT must have already been processed, because this should be the
465 only way that STMT, which is a reduction-phi, was put in the worklist,
466 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
467 check that everything is as expected, and we are done. */
468 dstmt_vinfo = vinfo_for_stmt (def_stmt);
469 bb = gimple_bb (stmt);
470 if (gimple_code (stmt) == GIMPLE_PHI
471 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
472 && gimple_code (def_stmt) != GIMPLE_PHI
473 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
474 && bb->loop_father == def_bb->loop_father)
475 {
73fbfcad 476 if (dump_enabled_p ())
78c60e3d 477 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 478 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
479 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
480 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
481 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 482 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 483 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
484 return true;
485 }
486
487 /* case 3a: outer-loop stmt defining an inner-loop stmt:
488 outer-loop-header-bb:
489 d = def_stmt
490 inner-loop:
491 stmt # use (d)
492 outer-loop-tail-bb:
493 ... */
494 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
495 {
73fbfcad 496 if (dump_enabled_p ())
78c60e3d 497 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 498 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 499
ebfd146a
IR
500 switch (relevant)
501 {
8644a673 502 case vect_unused_in_scope:
7c5222ff
IR
503 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
504 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 505 break;
7c5222ff 506
ebfd146a 507 case vect_used_in_outer_by_reduction:
7c5222ff 508 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
509 relevant = vect_used_by_reduction;
510 break;
7c5222ff 511
ebfd146a 512 case vect_used_in_outer:
7c5222ff 513 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 514 relevant = vect_used_in_scope;
ebfd146a 515 break;
7c5222ff 516
8644a673 517 case vect_used_in_scope:
ebfd146a
IR
518 break;
519
520 default:
521 gcc_unreachable ();
b8698a0f 522 }
ebfd146a
IR
523 }
524
525 /* case 3b: inner-loop stmt defining an outer-loop stmt:
526 outer-loop-header-bb:
527 ...
528 inner-loop:
529 d = def_stmt
06066f92 530 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
531 stmt # use (d) */
532 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
533 {
73fbfcad 534 if (dump_enabled_p ())
78c60e3d 535 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 536 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 537
ebfd146a
IR
538 switch (relevant)
539 {
8644a673 540 case vect_unused_in_scope:
b8698a0f 541 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 542 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 543 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
544 break;
545
ebfd146a
IR
546 case vect_used_by_reduction:
547 relevant = vect_used_in_outer_by_reduction;
548 break;
549
8644a673 550 case vect_used_in_scope:
ebfd146a
IR
551 relevant = vect_used_in_outer;
552 break;
553
554 default:
555 gcc_unreachable ();
556 }
557 }
558
83197f37
IR
559 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
560 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
561 return true;
562}
563
564
565/* Function vect_mark_stmts_to_be_vectorized.
566
567 Not all stmts in the loop need to be vectorized. For example:
568
569 for i...
570 for j...
571 1. T0 = i + j
572 2. T1 = a[T0]
573
574 3. j = j + 1
575
576 Stmt 1 and 3 do not need to be vectorized, because loop control and
577 addressing of vectorized data-refs are handled differently.
578
579 This pass detects such stmts. */
580
581bool
582vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
583{
ebfd146a
IR
584 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
585 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
586 unsigned int nbbs = loop->num_nodes;
587 gimple_stmt_iterator si;
588 gimple stmt;
589 unsigned int i;
590 stmt_vec_info stmt_vinfo;
591 basic_block bb;
592 gimple phi;
593 bool live_p;
06066f92
IR
594 enum vect_relevant relevant, tmp_relevant;
595 enum vect_def_type def_type;
ebfd146a 596
73fbfcad 597 if (dump_enabled_p ())
78c60e3d 598 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 599 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 600
07687835 601 stack_vec<gimple, 64> worklist;
ebfd146a
IR
602
603 /* 1. Init worklist. */
604 for (i = 0; i < nbbs; i++)
605 {
606 bb = bbs[i];
607 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 608 {
ebfd146a 609 phi = gsi_stmt (si);
73fbfcad 610 if (dump_enabled_p ())
ebfd146a 611 {
78c60e3d
SS
612 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
613 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 614 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
615 }
616
617 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 618 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
619 }
620 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
621 {
622 stmt = gsi_stmt (si);
73fbfcad 623 if (dump_enabled_p ())
ebfd146a 624 {
78c60e3d
SS
625 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 627 dump_printf (MSG_NOTE, "\n");
b8698a0f 628 }
ebfd146a
IR
629
630 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 631 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
632 }
633 }
634
635 /* 2. Process_worklist */
9771b263 636 while (worklist.length () > 0)
ebfd146a
IR
637 {
638 use_operand_p use_p;
639 ssa_op_iter iter;
640
9771b263 641 stmt = worklist.pop ();
73fbfcad 642 if (dump_enabled_p ())
ebfd146a 643 {
78c60e3d
SS
644 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 646 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
647 }
648
b8698a0f
L
649 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
650 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
651 liveness and relevance properties of STMT. */
652 stmt_vinfo = vinfo_for_stmt (stmt);
653 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
654 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
655
656 /* Generally, the liveness and relevance properties of STMT are
657 propagated as is to the DEF_STMTs of its USEs:
658 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
659 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
660
661 One exception is when STMT has been identified as defining a reduction
662 variable; in this case we set the liveness/relevance as follows:
663 live_p = false
664 relevant = vect_used_by_reduction
665 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 666 those that are used by a reduction computation, and those that are
ff802fa1 667 (also) used by a regular computation. This allows us later on to
b8698a0f 668 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 669 order of the results that they produce does not have to be kept. */
ebfd146a 670
06066f92
IR
671 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
672 tmp_relevant = relevant;
673 switch (def_type)
ebfd146a 674 {
06066f92
IR
675 case vect_reduction_def:
676 switch (tmp_relevant)
677 {
678 case vect_unused_in_scope:
679 relevant = vect_used_by_reduction;
680 break;
681
682 case vect_used_by_reduction:
683 if (gimple_code (stmt) == GIMPLE_PHI)
684 break;
685 /* fall through */
686
687 default:
73fbfcad 688 if (dump_enabled_p ())
78c60e3d 689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 690 "unsupported use of reduction.\n");
06066f92
IR
691 return false;
692 }
693
b8698a0f 694 live_p = false;
06066f92 695 break;
b8698a0f 696
06066f92
IR
697 case vect_nested_cycle:
698 if (tmp_relevant != vect_unused_in_scope
699 && tmp_relevant != vect_used_in_outer_by_reduction
700 && tmp_relevant != vect_used_in_outer)
701 {
73fbfcad 702 if (dump_enabled_p ())
78c60e3d 703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 704 "unsupported use of nested cycle.\n");
7c5222ff 705
06066f92
IR
706 return false;
707 }
7c5222ff 708
b8698a0f
L
709 live_p = false;
710 break;
711
06066f92
IR
712 case vect_double_reduction_def:
713 if (tmp_relevant != vect_unused_in_scope
714 && tmp_relevant != vect_used_by_reduction)
715 {
73fbfcad 716 if (dump_enabled_p ())
78c60e3d 717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 718 "unsupported use of double reduction.\n");
7c5222ff 719
7c5222ff 720 return false;
06066f92
IR
721 }
722
723 live_p = false;
b8698a0f 724 break;
7c5222ff 725
06066f92
IR
726 default:
727 break;
7c5222ff 728 }
b8698a0f 729
aec7ae7d 730 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
731 {
732 /* Pattern statements are not inserted into the code, so
733 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
734 have to scan the RHS or function arguments instead. */
735 if (is_gimple_assign (stmt))
736 {
69d2aade
JJ
737 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
738 tree op = gimple_assign_rhs1 (stmt);
739
740 i = 1;
741 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
742 {
743 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 744 live_p, relevant, &worklist, false)
69d2aade 745 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 746 live_p, relevant, &worklist, false))
07687835 747 return false;
69d2aade
JJ
748 i = 2;
749 }
750 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 751 {
69d2aade 752 op = gimple_op (stmt, i);
9d5e7640 753 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 754 &worklist, false))
07687835 755 return false;
9d5e7640
IR
756 }
757 }
758 else if (is_gimple_call (stmt))
759 {
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 {
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 764 &worklist, false))
07687835 765 return false;
9d5e7640
IR
766 }
767 }
768 }
769 else
770 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
771 {
772 tree op = USE_FROM_PTR (use_p);
773 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 774 &worklist, false))
07687835 775 return false;
9d5e7640 776 }
aec7ae7d
JJ
777
778 if (STMT_VINFO_GATHER_P (stmt_vinfo))
779 {
780 tree off;
781 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
782 gcc_assert (decl);
783 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
784 &worklist, true))
07687835 785 return false;
aec7ae7d 786 }
ebfd146a
IR
787 } /* while worklist */
788
ebfd146a
IR
789 return true;
790}
791
792
b8698a0f 793/* Function vect_model_simple_cost.
ebfd146a 794
b8698a0f 795 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
796 single op. Right now, this does not account for multiple insns that could
797 be generated for the single vector op. We will handle that shortly. */
798
799void
b8698a0f 800vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
801 enum vect_def_type *dt,
802 stmt_vector_for_cost *prologue_cost_vec,
803 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
804{
805 int i;
92345349 806 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
807
808 /* The SLP costs were already calculated during SLP tree build. */
809 if (PURE_SLP_STMT (stmt_info))
810 return;
811
ebfd146a
IR
812 /* FORNOW: Assuming maximum 2 args per stmts. */
813 for (i = 0; i < 2; i++)
92345349
BS
814 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
815 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
816 stmt_info, 0, vect_prologue);
c3e7ee41
BS
817
818 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
819 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
820 stmt_info, 0, vect_body);
c3e7ee41 821
73fbfcad 822 if (dump_enabled_p ())
78c60e3d
SS
823 dump_printf_loc (MSG_NOTE, vect_location,
824 "vect_model_simple_cost: inside_cost = %d, "
e645e942 825 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
826}
827
828
8bd37302
BS
829/* Model cost for type demotion and promotion operations. PWR is normally
830 zero for single-step promotions and demotions. It will be one if
831 two-step promotion/demotion is required, and so on. Each additional
832 step doubles the number of instructions required. */
833
834static void
835vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
836 enum vect_def_type *dt, int pwr)
837{
838 int i, tmp;
92345349 839 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
841 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
842 void *target_cost_data;
8bd37302
BS
843
844 /* The SLP costs were already calculated during SLP tree build. */
845 if (PURE_SLP_STMT (stmt_info))
846 return;
847
c3e7ee41
BS
848 if (loop_vinfo)
849 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
850 else
851 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
852
8bd37302
BS
853 for (i = 0; i < pwr + 1; i++)
854 {
855 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
856 (i + 1) : i;
c3e7ee41 857 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
858 vec_promote_demote, stmt_info, 0,
859 vect_body);
8bd37302
BS
860 }
861
862 /* FORNOW: Assuming maximum 2 args per stmts. */
863 for (i = 0; i < 2; i++)
92345349
BS
864 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
865 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
866 stmt_info, 0, vect_prologue);
8bd37302 867
73fbfcad 868 if (dump_enabled_p ())
78c60e3d
SS
869 dump_printf_loc (MSG_NOTE, vect_location,
870 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 871 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
872}
873
0d0293ac 874/* Function vect_cost_group_size
b8698a0f 875
0d0293ac 876 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
877 load or store of a group, else return 1. This ensures that group size is
878 only returned once per group. */
879
880static int
0d0293ac 881vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 882{
e14c1050 883 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
884
885 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 886 return GROUP_SIZE (stmt_info);
ebfd146a
IR
887
888 return 1;
889}
890
891
892/* Function vect_model_store_cost
893
0d0293ac
MM
894 Models cost for stores. In the case of grouped accesses, one access
895 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
896
897void
b8698a0f 898vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 899 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
900 slp_tree slp_node,
901 stmt_vector_for_cost *prologue_cost_vec,
902 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
903{
904 int group_size;
92345349 905 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
906 struct data_reference *first_dr;
907 gimple first_stmt;
ebfd146a
IR
908
909 /* The SLP costs were already calculated during SLP tree build. */
910 if (PURE_SLP_STMT (stmt_info))
911 return;
912
8644a673 913 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
914 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
915 stmt_info, 0, vect_prologue);
ebfd146a 916
0d0293ac
MM
917 /* Grouped access? */
918 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
919 {
920 if (slp_node)
921 {
9771b263 922 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
923 group_size = 1;
924 }
925 else
926 {
e14c1050 927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 928 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
929 }
930
931 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
932 }
0d0293ac 933 /* Not a grouped access. */
ebfd146a 934 else
720f5239
IR
935 {
936 group_size = 1;
937 first_dr = STMT_VINFO_DATA_REF (stmt_info);
938 }
ebfd146a 939
272c6793 940 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 941 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
942 access is instead being provided by a permute-and-store operation,
943 include the cost of the permutes. */
944 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
945 {
946 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
947
948 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
949 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
950 stmt_info, 0, vect_body);
ebfd146a 951
73fbfcad 952 if (dump_enabled_p ())
78c60e3d 953 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 954 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 955 group_size);
ebfd146a
IR
956 }
957
958 /* Costs of the stores. */
92345349 959 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 960
73fbfcad 961 if (dump_enabled_p ())
78c60e3d
SS
962 dump_printf_loc (MSG_NOTE, vect_location,
963 "vect_model_store_cost: inside_cost = %d, "
e645e942 964 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
965}
966
967
720f5239
IR
968/* Calculate cost of DR's memory access. */
969void
970vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 971 unsigned int *inside_cost,
92345349 972 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
973{
974 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
975 gimple stmt = DR_STMT (dr);
976 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
977
978 switch (alignment_support_scheme)
979 {
980 case dr_aligned:
981 {
92345349
BS
982 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
983 vector_store, stmt_info, 0,
984 vect_body);
720f5239 985
73fbfcad 986 if (dump_enabled_p ())
78c60e3d 987 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 988 "vect_model_store_cost: aligned.\n");
720f5239
IR
989 break;
990 }
991
992 case dr_unaligned_supported:
993 {
720f5239 994 /* Here, we assign an additional cost for the unaligned store. */
92345349 995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 996 unaligned_store, stmt_info,
92345349 997 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 998 if (dump_enabled_p ())
78c60e3d
SS
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: unaligned supported by "
e645e942 1001 "hardware.\n");
720f5239
IR
1002 break;
1003 }
1004
38eec4c6
UW
1005 case dr_unaligned_unsupported:
1006 {
1007 *inside_cost = VECT_MAX_COST;
1008
73fbfcad 1009 if (dump_enabled_p ())
78c60e3d 1010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1011 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1012 break;
1013 }
1014
720f5239
IR
1015 default:
1016 gcc_unreachable ();
1017 }
1018}
1019
1020
ebfd146a
IR
1021/* Function vect_model_load_cost
1022
0d0293ac
MM
1023 Models cost for loads. In the case of grouped accesses, the last access
1024 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1025 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1026 access scheme chosen. */
1027
1028void
92345349
BS
1029vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1030 bool load_lanes_p, slp_tree slp_node,
1031 stmt_vector_for_cost *prologue_cost_vec,
1032 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1033{
1034 int group_size;
ebfd146a
IR
1035 gimple first_stmt;
1036 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1037 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1038
1039 /* The SLP costs were already calculated during SLP tree build. */
1040 if (PURE_SLP_STMT (stmt_info))
1041 return;
1042
0d0293ac 1043 /* Grouped accesses? */
e14c1050 1044 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1045 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1046 {
0d0293ac 1047 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1048 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1049 }
0d0293ac 1050 /* Not a grouped access. */
ebfd146a
IR
1051 else
1052 {
1053 group_size = 1;
1054 first_dr = dr;
1055 }
1056
272c6793 1057 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1058 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1059 access is instead being provided by a load-and-permute operation,
1060 include the cost of the permutes. */
1061 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1062 {
1063 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1064 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1065 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1066 stmt_info, 0, vect_body);
ebfd146a 1067
73fbfcad 1068 if (dump_enabled_p ())
e645e942
TJ
1069 dump_printf_loc (MSG_NOTE, vect_location,
1070 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1071 group_size);
ebfd146a
IR
1072 }
1073
1074 /* The loads themselves. */
a82960aa
RG
1075 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1076 {
a21892ad
BS
1077 /* N scalar loads plus gathering them into a vector. */
1078 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1079 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1080 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1081 scalar_load, stmt_info, 0, vect_body);
1082 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1083 stmt_info, 0, vect_body);
a82960aa
RG
1084 }
1085 else
1086 vect_get_load_cost (first_dr, ncopies,
1087 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1088 || group_size > 1 || slp_node),
92345349
BS
1089 &inside_cost, &prologue_cost,
1090 prologue_cost_vec, body_cost_vec, true);
720f5239 1091
73fbfcad 1092 if (dump_enabled_p ())
78c60e3d
SS
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: inside_cost = %d, "
e645e942 1095 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1096}
1097
1098
1099/* Calculate cost of DR's memory access. */
1100void
1101vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1102 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1103 unsigned int *prologue_cost,
1104 stmt_vector_for_cost *prologue_cost_vec,
1105 stmt_vector_for_cost *body_cost_vec,
1106 bool record_prologue_costs)
720f5239
IR
1107{
1108 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1109 gimple stmt = DR_STMT (dr);
1110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1111
1112 switch (alignment_support_scheme)
ebfd146a
IR
1113 {
1114 case dr_aligned:
1115 {
92345349
BS
1116 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1117 stmt_info, 0, vect_body);
ebfd146a 1118
73fbfcad 1119 if (dump_enabled_p ())
78c60e3d 1120 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1121 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1122
1123 break;
1124 }
1125 case dr_unaligned_supported:
1126 {
720f5239 1127 /* Here, we assign an additional cost for the unaligned load. */
92345349 1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1129 unaligned_load, stmt_info,
92345349 1130 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1131
73fbfcad 1132 if (dump_enabled_p ())
78c60e3d
SS
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: unaligned supported by "
e645e942 1135 "hardware.\n");
ebfd146a
IR
1136
1137 break;
1138 }
1139 case dr_explicit_realign:
1140 {
92345349
BS
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1142 vector_load, stmt_info, 0, vect_body);
1143 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1144 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1145
1146 /* FIXME: If the misalignment remains fixed across the iterations of
1147 the containing loop, the following cost should be added to the
92345349 1148 prologue costs. */
ebfd146a 1149 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1150 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1151 stmt_info, 0, vect_body);
ebfd146a 1152
73fbfcad 1153 if (dump_enabled_p ())
e645e942
TJ
1154 dump_printf_loc (MSG_NOTE, vect_location,
1155 "vect_model_load_cost: explicit realign\n");
8bd37302 1156
ebfd146a
IR
1157 break;
1158 }
1159 case dr_explicit_realign_optimized:
1160 {
73fbfcad 1161 if (dump_enabled_p ())
e645e942 1162 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1163 "vect_model_load_cost: unaligned software "
e645e942 1164 "pipelined.\n");
ebfd146a
IR
1165
1166 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1167 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1168 if this is an access in a group of loads, which provide grouped
ebfd146a 1169 access, then the above cost should only be considered for one
ff802fa1 1170 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1171 and a realignment op. */
1172
92345349 1173 if (add_realign_cost && record_prologue_costs)
ebfd146a 1174 {
92345349
BS
1175 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1176 vector_stmt, stmt_info,
1177 0, vect_prologue);
ebfd146a 1178 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1179 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1180 vector_stmt, stmt_info,
1181 0, vect_prologue);
ebfd146a
IR
1182 }
1183
92345349
BS
1184 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1185 stmt_info, 0, vect_body);
1186 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1187 stmt_info, 0, vect_body);
8bd37302 1188
73fbfcad 1189 if (dump_enabled_p ())
78c60e3d 1190 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1191 "vect_model_load_cost: explicit realign optimized"
1192 "\n");
8bd37302 1193
ebfd146a
IR
1194 break;
1195 }
1196
38eec4c6
UW
1197 case dr_unaligned_unsupported:
1198 {
1199 *inside_cost = VECT_MAX_COST;
1200
73fbfcad 1201 if (dump_enabled_p ())
78c60e3d 1202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1203 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1204 break;
1205 }
1206
ebfd146a
IR
1207 default:
1208 gcc_unreachable ();
1209 }
ebfd146a
IR
1210}
1211
418b7df3
RG
1212/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1213 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1214
418b7df3
RG
1215static void
1216vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1217{
ebfd146a 1218 if (gsi)
418b7df3 1219 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1220 else
1221 {
418b7df3 1222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1223 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1224
a70d6342
IR
1225 if (loop_vinfo)
1226 {
1227 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1228 basic_block new_bb;
1229 edge pe;
a70d6342
IR
1230
1231 if (nested_in_vect_loop_p (loop, stmt))
1232 loop = loop->inner;
b8698a0f 1233
a70d6342 1234 pe = loop_preheader_edge (loop);
418b7df3 1235 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1236 gcc_assert (!new_bb);
1237 }
1238 else
1239 {
1240 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1241 basic_block bb;
1242 gimple_stmt_iterator gsi_bb_start;
1243
1244 gcc_assert (bb_vinfo);
1245 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1246 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1247 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1248 }
ebfd146a
IR
1249 }
1250
73fbfcad 1251 if (dump_enabled_p ())
ebfd146a 1252 {
78c60e3d
SS
1253 dump_printf_loc (MSG_NOTE, vect_location,
1254 "created new init_stmt: ");
1255 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1256 dump_printf (MSG_NOTE, "\n");
ebfd146a 1257 }
418b7df3
RG
1258}
1259
1260/* Function vect_init_vector.
ebfd146a 1261
5467ee52
RG
1262 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1263 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1264 vector type a vector with all elements equal to VAL is created first.
1265 Place the initialization at BSI if it is not NULL. Otherwise, place the
1266 initialization at the loop preheader.
418b7df3
RG
1267 Return the DEF of INIT_STMT.
1268 It will be used in the vectorization of STMT. */
1269
1270tree
5467ee52 1271vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1272{
1273 tree new_var;
1274 gimple init_stmt;
1275 tree vec_oprnd;
1276 tree new_temp;
1277
5467ee52
RG
1278 if (TREE_CODE (type) == VECTOR_TYPE
1279 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1280 {
5467ee52 1281 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1282 {
5467ee52
RG
1283 if (CONSTANT_CLASS_P (val))
1284 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1285 else
1286 {
83d5977e 1287 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1288 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1289 new_temp, val,
418b7df3 1290 NULL_TREE);
418b7df3 1291 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1292 val = new_temp;
418b7df3
RG
1293 }
1294 }
5467ee52 1295 val = build_vector_from_val (type, val);
418b7df3
RG
1296 }
1297
5467ee52 1298 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1299 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1300 new_temp = make_ssa_name (new_var, init_stmt);
1301 gimple_assign_set_lhs (init_stmt, new_temp);
1302 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1303 vec_oprnd = gimple_assign_lhs (init_stmt);
1304 return vec_oprnd;
1305}
1306
a70d6342 1307
ebfd146a
IR
1308/* Function vect_get_vec_def_for_operand.
1309
ff802fa1 1310 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1311 used in the vectorized stmt for STMT.
1312
1313 In the case that OP is an SSA_NAME which is defined in the loop, then
1314 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1315
1316 In case OP is an invariant or constant, a new stmt that creates a vector def
1317 needs to be introduced. */
1318
1319tree
1320vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1321{
1322 tree vec_oprnd;
1323 gimple vec_stmt;
1324 gimple def_stmt;
1325 stmt_vec_info def_stmt_info = NULL;
1326 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1327 unsigned int nunits;
ebfd146a 1328 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1329 tree def;
ebfd146a
IR
1330 enum vect_def_type dt;
1331 bool is_simple_use;
1332 tree vector_type;
1333
73fbfcad 1334 if (dump_enabled_p ())
ebfd146a 1335 {
78c60e3d
SS
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "vect_get_vec_def_for_operand: ");
1338 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1339 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1340 }
1341
24ee1384
IR
1342 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1343 &def_stmt, &def, &dt);
ebfd146a 1344 gcc_assert (is_simple_use);
73fbfcad 1345 if (dump_enabled_p ())
ebfd146a 1346 {
78c60e3d 1347 int loc_printed = 0;
ebfd146a
IR
1348 if (def)
1349 {
78c60e3d
SS
1350 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1351 loc_printed = 1;
1352 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1353 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1354 }
1355 if (def_stmt)
1356 {
78c60e3d
SS
1357 if (loc_printed)
1358 dump_printf (MSG_NOTE, " def_stmt = ");
1359 else
1360 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1361 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1362 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1363 }
1364 }
1365
1366 switch (dt)
1367 {
1368 /* Case 1: operand is a constant. */
1369 case vect_constant_def:
1370 {
7569a6cc
RG
1371 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1372 gcc_assert (vector_type);
9dc3f7de 1373 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1374
b8698a0f 1375 if (scalar_def)
ebfd146a
IR
1376 *scalar_def = op;
1377
1378 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1379 if (dump_enabled_p ())
78c60e3d 1380 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1381 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1382
418b7df3 1383 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1384 }
1385
1386 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1387 case vect_external_def:
ebfd146a
IR
1388 {
1389 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1390 gcc_assert (vector_type);
ebfd146a 1391
b8698a0f 1392 if (scalar_def)
ebfd146a
IR
1393 *scalar_def = def;
1394
1395 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1396 if (dump_enabled_p ())
e645e942 1397 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1398
418b7df3 1399 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1400 }
1401
1402 /* Case 3: operand is defined inside the loop. */
8644a673 1403 case vect_internal_def:
ebfd146a 1404 {
b8698a0f 1405 if (scalar_def)
ebfd146a
IR
1406 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1407
1408 /* Get the def from the vectorized stmt. */
1409 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1410
ebfd146a 1411 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1412 /* Get vectorized pattern statement. */
1413 if (!vec_stmt
1414 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1415 && !STMT_VINFO_RELEVANT (def_stmt_info))
1416 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1417 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1418 gcc_assert (vec_stmt);
1419 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1420 vec_oprnd = PHI_RESULT (vec_stmt);
1421 else if (is_gimple_call (vec_stmt))
1422 vec_oprnd = gimple_call_lhs (vec_stmt);
1423 else
1424 vec_oprnd = gimple_assign_lhs (vec_stmt);
1425 return vec_oprnd;
1426 }
1427
1428 /* Case 4: operand is defined by a loop header phi - reduction */
1429 case vect_reduction_def:
06066f92 1430 case vect_double_reduction_def:
7c5222ff 1431 case vect_nested_cycle:
ebfd146a
IR
1432 {
1433 struct loop *loop;
1434
1435 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1436 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1437
1438 /* Get the def before the loop */
1439 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1440 return get_initial_def_for_reduction (stmt, op, scalar_def);
1441 }
1442
1443 /* Case 5: operand is defined by loop-header phi - induction. */
1444 case vect_induction_def:
1445 {
1446 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1447
1448 /* Get the def from the vectorized stmt. */
1449 def_stmt_info = vinfo_for_stmt (def_stmt);
1450 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1451 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1452 vec_oprnd = PHI_RESULT (vec_stmt);
1453 else
1454 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1455 return vec_oprnd;
1456 }
1457
1458 default:
1459 gcc_unreachable ();
1460 }
1461}
1462
1463
1464/* Function vect_get_vec_def_for_stmt_copy
1465
ff802fa1 1466 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1469 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1471 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1472 DT is the type of the vector def VEC_OPRND.
1473
1474 Context:
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1477 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1478 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1482 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1485
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1487
ebfd146a
IR
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1491 VS1.3: vx.3 = memref3
ebfd146a
IR
1492
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1497
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
b8698a0f
L
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1502 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1503 returns the vector-def 'vx.0'.
1504
b8698a0f
L
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1509
b8698a0f
L
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519
1520tree
1521vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1522{
1523 gimple vec_stmt_for_operand;
1524 stmt_vec_info def_stmt_info;
1525
1526 /* Do nothing; can reuse same def. */
8644a673 1527 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1528 return vec_oprnd;
1529
1530 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1531 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1532 gcc_assert (def_stmt_info);
1533 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1534 gcc_assert (vec_stmt_for_operand);
1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1537 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1538 else
1539 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1540 return vec_oprnd;
1541}
1542
1543
1544/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1545 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1546
1547static void
b8698a0f 1548vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1549 vec<tree> *vec_oprnds0,
1550 vec<tree> *vec_oprnds1)
ebfd146a 1551{
9771b263 1552 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1553
1554 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1555 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1556
9771b263 1557 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1558 {
9771b263 1559 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1561 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1562 }
1563}
1564
1565
d092494c
IR
1566/* Get vectorized definitions for OP0 and OP1.
1567 REDUC_INDEX is the index of reduction operand in case of reduction,
1568 and -1 otherwise. */
ebfd146a 1569
d092494c 1570void
ebfd146a 1571vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1572 vec<tree> *vec_oprnds0,
1573 vec<tree> *vec_oprnds1,
d092494c 1574 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1575{
1576 if (slp_node)
d092494c
IR
1577 {
1578 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1579 vec<tree> ops;
1580 ops.create (nops);
37b5ec8f 1581 vec<vec<tree> > vec_defs;
9771b263 1582 vec_defs.create (nops);
d092494c 1583
9771b263 1584 ops.quick_push (op0);
d092494c 1585 if (op1)
9771b263 1586 ops.quick_push (op1);
d092494c
IR
1587
1588 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1589
37b5ec8f 1590 *vec_oprnds0 = vec_defs[0];
d092494c 1591 if (op1)
37b5ec8f 1592 *vec_oprnds1 = vec_defs[1];
d092494c 1593
9771b263
DN
1594 ops.release ();
1595 vec_defs.release ();
d092494c 1596 }
ebfd146a
IR
1597 else
1598 {
1599 tree vec_oprnd;
1600
9771b263 1601 vec_oprnds0->create (1);
b8698a0f 1602 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1603 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1604
1605 if (op1)
1606 {
9771b263 1607 vec_oprnds1->create (1);
b8698a0f 1608 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1609 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1610 }
1611 }
1612}
1613
1614
1615/* Function vect_finish_stmt_generation.
1616
1617 Insert a new stmt. */
1618
1619void
1620vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1621 gimple_stmt_iterator *gsi)
1622{
1623 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1624 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1625 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1626
1627 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1628
54e8e2c3
RG
1629 if (!gsi_end_p (*gsi)
1630 && gimple_has_mem_ops (vec_stmt))
1631 {
1632 gimple at_stmt = gsi_stmt (*gsi);
1633 tree vuse = gimple_vuse (at_stmt);
1634 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1635 {
1636 tree vdef = gimple_vdef (at_stmt);
1637 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1638 /* If we have an SSA vuse and insert a store, update virtual
1639 SSA form to avoid triggering the renamer. Do so only
1640 if we can easily see all uses - which is what almost always
1641 happens with the way vectorized stmts are inserted. */
1642 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1643 && ((is_gimple_assign (vec_stmt)
1644 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1645 || (is_gimple_call (vec_stmt)
1646 && !(gimple_call_flags (vec_stmt)
1647 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1648 {
1649 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1650 gimple_set_vdef (vec_stmt, new_vdef);
1651 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1652 }
1653 }
1654 }
ebfd146a
IR
1655 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1656
b8698a0f 1657 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1658 bb_vinfo));
ebfd146a 1659
73fbfcad 1660 if (dump_enabled_p ())
ebfd146a 1661 {
78c60e3d
SS
1662 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1664 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1665 }
1666
ad885386 1667 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1668}
1669
1670/* Checks if CALL can be vectorized in type VECTYPE. Returns
1671 a function declaration if the target has a vectorized version
1672 of the function, or NULL_TREE if the function cannot be vectorized. */
1673
1674tree
1675vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1676{
1677 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1678
1679 /* We only handle functions that do not read or clobber memory -- i.e.
1680 const or novops ones. */
1681 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1682 return NULL_TREE;
1683
1684 if (!fndecl
1685 || TREE_CODE (fndecl) != FUNCTION_DECL
1686 || !DECL_BUILT_IN (fndecl))
1687 return NULL_TREE;
1688
62f7fd21 1689 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1690 vectype_in);
1691}
1692
1693/* Function vectorizable_call.
1694
b8698a0f
L
1695 Check if STMT performs a function call that can be vectorized.
1696 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1697 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1698 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1699
1700static bool
190c2236
JJ
1701vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1702 slp_tree slp_node)
ebfd146a
IR
1703{
1704 tree vec_dest;
1705 tree scalar_dest;
1706 tree op, type;
1707 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1708 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1709 tree vectype_out, vectype_in;
1710 int nunits_in;
1711 int nunits_out;
1712 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1713 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1714 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1715 gimple def_stmt;
0502fb85
UB
1716 enum vect_def_type dt[3]
1717 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1718 gimple new_stmt = NULL;
ebfd146a 1719 int ncopies, j;
6e1aa848 1720 vec<tree> vargs = vNULL;
ebfd146a
IR
1721 enum { NARROW, NONE, WIDEN } modifier;
1722 size_t i, nargs;
9d5e7640 1723 tree lhs;
ebfd146a 1724
190c2236 1725 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1726 return false;
1727
8644a673 1728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1729 return false;
1730
ebfd146a
IR
1731 /* Is STMT a vectorizable call? */
1732 if (!is_gimple_call (stmt))
1733 return false;
1734
1735 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1736 return false;
1737
822ba6d7 1738 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1739 return false;
1740
b690cc0f
RG
1741 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1742
ebfd146a
IR
1743 /* Process function arguments. */
1744 rhs_type = NULL_TREE;
b690cc0f 1745 vectype_in = NULL_TREE;
ebfd146a
IR
1746 nargs = gimple_call_num_args (stmt);
1747
1b1562a5
MM
1748 /* Bail out if the function has more than three arguments, we do not have
1749 interesting builtin functions to vectorize with more than two arguments
1750 except for fma. No arguments is also not good. */
1751 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1752 return false;
1753
74bf76ed
JJ
1754 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1755 if (gimple_call_internal_p (stmt)
1756 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1757 {
1758 nargs = 0;
1759 rhs_type = unsigned_type_node;
1760 }
1761
ebfd146a
IR
1762 for (i = 0; i < nargs; i++)
1763 {
b690cc0f
RG
1764 tree opvectype;
1765
ebfd146a
IR
1766 op = gimple_call_arg (stmt, i);
1767
1768 /* We can only handle calls with arguments of the same type. */
1769 if (rhs_type
8533c9d8 1770 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1771 {
73fbfcad 1772 if (dump_enabled_p ())
78c60e3d 1773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1774 "argument types differ.\n");
ebfd146a
IR
1775 return false;
1776 }
b690cc0f
RG
1777 if (!rhs_type)
1778 rhs_type = TREE_TYPE (op);
ebfd146a 1779
24ee1384 1780 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1781 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1782 {
73fbfcad 1783 if (dump_enabled_p ())
78c60e3d 1784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1785 "use not simple.\n");
ebfd146a
IR
1786 return false;
1787 }
ebfd146a 1788
b690cc0f
RG
1789 if (!vectype_in)
1790 vectype_in = opvectype;
1791 else if (opvectype
1792 && opvectype != vectype_in)
1793 {
73fbfcad 1794 if (dump_enabled_p ())
78c60e3d 1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1796 "argument vector types differ.\n");
b690cc0f
RG
1797 return false;
1798 }
1799 }
1800 /* If all arguments are external or constant defs use a vector type with
1801 the same size as the output vector type. */
ebfd146a 1802 if (!vectype_in)
b690cc0f 1803 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1804 if (vec_stmt)
1805 gcc_assert (vectype_in);
1806 if (!vectype_in)
1807 {
73fbfcad 1808 if (dump_enabled_p ())
7d8930a0 1809 {
78c60e3d
SS
1810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1811 "no vectype for scalar type ");
1812 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 1813 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
1814 }
1815
1816 return false;
1817 }
ebfd146a
IR
1818
1819 /* FORNOW */
b690cc0f
RG
1820 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1821 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1822 if (nunits_in == nunits_out / 2)
1823 modifier = NARROW;
1824 else if (nunits_out == nunits_in)
1825 modifier = NONE;
1826 else if (nunits_out == nunits_in / 2)
1827 modifier = WIDEN;
1828 else
1829 return false;
1830
1831 /* For now, we only vectorize functions if a target specific builtin
1832 is available. TODO -- in some cases, it might be profitable to
1833 insert the calls for pieces of the vector, in order to be able
1834 to vectorize other operations in the loop. */
1835 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1836 if (fndecl == NULL_TREE)
1837 {
74bf76ed
JJ
1838 if (gimple_call_internal_p (stmt)
1839 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1840 && !slp_node
1841 && loop_vinfo
1842 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1843 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1844 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1845 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1846 {
1847 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1848 { 0, 1, 2, ... vf - 1 } vector. */
1849 gcc_assert (nargs == 0);
1850 }
1851 else
1852 {
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1855 "function is not vectorizable.\n");
74bf76ed
JJ
1856 return false;
1857 }
ebfd146a
IR
1858 }
1859
5006671f 1860 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1861
190c2236
JJ
1862 if (slp_node || PURE_SLP_STMT (stmt_info))
1863 ncopies = 1;
1864 else if (modifier == NARROW)
ebfd146a
IR
1865 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1866 else
1867 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1868
1869 /* Sanity check: make sure that at least one copy of the vectorized stmt
1870 needs to be generated. */
1871 gcc_assert (ncopies >= 1);
1872
1873 if (!vec_stmt) /* transformation not required. */
1874 {
1875 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1876 if (dump_enabled_p ())
e645e942
TJ
1877 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1878 "\n");
c3e7ee41 1879 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1880 return true;
1881 }
1882
1883 /** Transform. **/
1884
73fbfcad 1885 if (dump_enabled_p ())
e645e942 1886 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
1887
1888 /* Handle def. */
1889 scalar_dest = gimple_call_lhs (stmt);
1890 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1891
1892 prev_stmt_info = NULL;
1893 switch (modifier)
1894 {
1895 case NONE:
1896 for (j = 0; j < ncopies; ++j)
1897 {
1898 /* Build argument list for the vectorized call. */
1899 if (j == 0)
9771b263 1900 vargs.create (nargs);
ebfd146a 1901 else
9771b263 1902 vargs.truncate (0);
ebfd146a 1903
190c2236
JJ
1904 if (slp_node)
1905 {
37b5ec8f 1906 vec<vec<tree> > vec_defs;
9771b263
DN
1907 vec_defs.create (nargs);
1908 vec<tree> vec_oprnds0;
190c2236
JJ
1909
1910 for (i = 0; i < nargs; i++)
9771b263 1911 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1912 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1913 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1914
1915 /* Arguments are ready. Create the new vector stmt. */
9771b263 1916 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1917 {
1918 size_t k;
1919 for (k = 0; k < nargs; k++)
1920 {
37b5ec8f 1921 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1922 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1923 }
1924 new_stmt = gimple_build_call_vec (fndecl, vargs);
1925 new_temp = make_ssa_name (vec_dest, new_stmt);
1926 gimple_call_set_lhs (new_stmt, new_temp);
1927 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1928 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1929 }
1930
1931 for (i = 0; i < nargs; i++)
1932 {
37b5ec8f 1933 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1934 vec_oprndsi.release ();
190c2236 1935 }
9771b263 1936 vec_defs.release ();
190c2236
JJ
1937 continue;
1938 }
1939
ebfd146a
IR
1940 for (i = 0; i < nargs; i++)
1941 {
1942 op = gimple_call_arg (stmt, i);
1943 if (j == 0)
1944 vec_oprnd0
1945 = vect_get_vec_def_for_operand (op, stmt, NULL);
1946 else
63827fb8
IR
1947 {
1948 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1949 vec_oprnd0
1950 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1951 }
ebfd146a 1952
9771b263 1953 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1954 }
1955
74bf76ed
JJ
1956 if (gimple_call_internal_p (stmt)
1957 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1958 {
1959 tree *v = XALLOCAVEC (tree, nunits_out);
1960 int k;
1961 for (k = 0; k < nunits_out; ++k)
1962 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1963 tree cst = build_vector (vectype_out, v);
1964 tree new_var
1965 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1966 gimple init_stmt = gimple_build_assign (new_var, cst);
1967 new_temp = make_ssa_name (new_var, init_stmt);
1968 gimple_assign_set_lhs (init_stmt, new_temp);
1969 vect_init_vector_1 (stmt, init_stmt, NULL);
1970 new_temp = make_ssa_name (vec_dest, NULL);
1971 new_stmt = gimple_build_assign (new_temp,
1972 gimple_assign_lhs (init_stmt));
1973 }
1974 else
1975 {
1976 new_stmt = gimple_build_call_vec (fndecl, vargs);
1977 new_temp = make_ssa_name (vec_dest, new_stmt);
1978 gimple_call_set_lhs (new_stmt, new_temp);
1979 }
ebfd146a
IR
1980 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1981
1982 if (j == 0)
1983 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1984 else
1985 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1986
1987 prev_stmt_info = vinfo_for_stmt (new_stmt);
1988 }
1989
1990 break;
1991
1992 case NARROW:
1993 for (j = 0; j < ncopies; ++j)
1994 {
1995 /* Build argument list for the vectorized call. */
1996 if (j == 0)
9771b263 1997 vargs.create (nargs * 2);
ebfd146a 1998 else
9771b263 1999 vargs.truncate (0);
ebfd146a 2000
190c2236
JJ
2001 if (slp_node)
2002 {
37b5ec8f 2003 vec<vec<tree> > vec_defs;
9771b263
DN
2004 vec_defs.create (nargs);
2005 vec<tree> vec_oprnds0;
190c2236
JJ
2006
2007 for (i = 0; i < nargs; i++)
9771b263 2008 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2009 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2010 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2011
2012 /* Arguments are ready. Create the new vector stmt. */
9771b263 2013 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2014 {
2015 size_t k;
9771b263 2016 vargs.truncate (0);
190c2236
JJ
2017 for (k = 0; k < nargs; k++)
2018 {
37b5ec8f 2019 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2020 vargs.quick_push (vec_oprndsk[i]);
2021 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2022 }
2023 new_stmt = gimple_build_call_vec (fndecl, vargs);
2024 new_temp = make_ssa_name (vec_dest, new_stmt);
2025 gimple_call_set_lhs (new_stmt, new_temp);
2026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2027 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2028 }
2029
2030 for (i = 0; i < nargs; i++)
2031 {
37b5ec8f 2032 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2033 vec_oprndsi.release ();
190c2236 2034 }
9771b263 2035 vec_defs.release ();
190c2236
JJ
2036 continue;
2037 }
2038
ebfd146a
IR
2039 for (i = 0; i < nargs; i++)
2040 {
2041 op = gimple_call_arg (stmt, i);
2042 if (j == 0)
2043 {
2044 vec_oprnd0
2045 = vect_get_vec_def_for_operand (op, stmt, NULL);
2046 vec_oprnd1
63827fb8 2047 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2048 }
2049 else
2050 {
336ecb65 2051 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2052 vec_oprnd0
63827fb8 2053 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2054 vec_oprnd1
63827fb8 2055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2056 }
2057
9771b263
DN
2058 vargs.quick_push (vec_oprnd0);
2059 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2060 }
2061
2062 new_stmt = gimple_build_call_vec (fndecl, vargs);
2063 new_temp = make_ssa_name (vec_dest, new_stmt);
2064 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2066
2067 if (j == 0)
2068 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2069 else
2070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2071
2072 prev_stmt_info = vinfo_for_stmt (new_stmt);
2073 }
2074
2075 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2076
2077 break;
2078
2079 case WIDEN:
2080 /* No current target implements this case. */
2081 return false;
2082 }
2083
9771b263 2084 vargs.release ();
ebfd146a
IR
2085
2086 /* Update the exception handling table with the vector stmt if necessary. */
2087 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2088 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2089
2090 /* The call in STMT might prevent it from being removed in dce.
2091 We however cannot remove it here, due to the way the ssa name
2092 it defines is mapped to the new definition. So just replace
2093 rhs of the statement with something harmless. */
2094
dd34c087
JJ
2095 if (slp_node)
2096 return true;
2097
ebfd146a 2098 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2099 if (is_pattern_stmt_p (stmt_info))
2100 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2101 else
2102 lhs = gimple_call_lhs (stmt);
2103 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2104 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2105 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2106 STMT_VINFO_STMT (stmt_info) = new_stmt;
2107 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2108
2109 return true;
2110}
2111
2112
2113/* Function vect_gen_widened_results_half
2114
2115 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2116 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2117 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2118 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2119 needs to be created (DECL is a function-decl of a target-builtin).
2120 STMT is the original scalar stmt that we are vectorizing. */
2121
2122static gimple
2123vect_gen_widened_results_half (enum tree_code code,
2124 tree decl,
2125 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2126 tree vec_dest, gimple_stmt_iterator *gsi,
2127 gimple stmt)
b8698a0f 2128{
ebfd146a 2129 gimple new_stmt;
b8698a0f
L
2130 tree new_temp;
2131
2132 /* Generate half of the widened result: */
2133 if (code == CALL_EXPR)
2134 {
2135 /* Target specific support */
ebfd146a
IR
2136 if (op_type == binary_op)
2137 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2138 else
2139 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2140 new_temp = make_ssa_name (vec_dest, new_stmt);
2141 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2142 }
2143 else
ebfd146a 2144 {
b8698a0f
L
2145 /* Generic support */
2146 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2147 if (op_type != binary_op)
2148 vec_oprnd1 = NULL;
2149 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2150 vec_oprnd1);
2151 new_temp = make_ssa_name (vec_dest, new_stmt);
2152 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2153 }
ebfd146a
IR
2154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2155
ebfd146a
IR
2156 return new_stmt;
2157}
2158
4a00c761
JJ
2159
2160/* Get vectorized definitions for loop-based vectorization. For the first
2161 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2162 scalar operand), and for the rest we get a copy with
2163 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2164 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2165 The vectors are collected into VEC_OPRNDS. */
2166
2167static void
2168vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2169 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2170{
2171 tree vec_oprnd;
2172
2173 /* Get first vector operand. */
2174 /* All the vector operands except the very first one (that is scalar oprnd)
2175 are stmt copies. */
2176 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2177 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2178 else
2179 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2180
9771b263 2181 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2182
2183 /* Get second vector operand. */
2184 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2185 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2186
2187 *oprnd = vec_oprnd;
2188
2189 /* For conversion in multiple steps, continue to get operands
2190 recursively. */
2191 if (multi_step_cvt)
2192 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2193}
2194
2195
2196/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2197 For multi-step conversions store the resulting vectors and call the function
2198 recursively. */
2199
2200static void
9771b263 2201vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2202 int multi_step_cvt, gimple stmt,
9771b263 2203 vec<tree> vec_dsts,
4a00c761
JJ
2204 gimple_stmt_iterator *gsi,
2205 slp_tree slp_node, enum tree_code code,
2206 stmt_vec_info *prev_stmt_info)
2207{
2208 unsigned int i;
2209 tree vop0, vop1, new_tmp, vec_dest;
2210 gimple new_stmt;
2211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2212
9771b263 2213 vec_dest = vec_dsts.pop ();
4a00c761 2214
9771b263 2215 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2216 {
2217 /* Create demotion operation. */
9771b263
DN
2218 vop0 = (*vec_oprnds)[i];
2219 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2220 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2221 new_tmp = make_ssa_name (vec_dest, new_stmt);
2222 gimple_assign_set_lhs (new_stmt, new_tmp);
2223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2224
2225 if (multi_step_cvt)
2226 /* Store the resulting vector for next recursive call. */
9771b263 2227 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2228 else
2229 {
2230 /* This is the last step of the conversion sequence. Store the
2231 vectors in SLP_NODE or in vector info of the scalar statement
2232 (or in STMT_VINFO_RELATED_STMT chain). */
2233 if (slp_node)
9771b263 2234 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2235 else
2236 {
2237 if (!*prev_stmt_info)
2238 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2239 else
2240 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2241
2242 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2243 }
2244 }
2245 }
2246
2247 /* For multi-step demotion operations we first generate demotion operations
2248 from the source type to the intermediate types, and then combine the
2249 results (stored in VEC_OPRNDS) in demotion operation to the destination
2250 type. */
2251 if (multi_step_cvt)
2252 {
2253 /* At each level of recursion we have half of the operands we had at the
2254 previous level. */
9771b263 2255 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2256 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2257 stmt, vec_dsts, gsi, slp_node,
2258 VEC_PACK_TRUNC_EXPR,
2259 prev_stmt_info);
2260 }
2261
9771b263 2262 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2263}
2264
2265
2266/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2267 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2268 the resulting vectors and call the function recursively. */
2269
2270static void
9771b263
DN
2271vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2272 vec<tree> *vec_oprnds1,
4a00c761
JJ
2273 gimple stmt, tree vec_dest,
2274 gimple_stmt_iterator *gsi,
2275 enum tree_code code1,
2276 enum tree_code code2, tree decl1,
2277 tree decl2, int op_type)
2278{
2279 int i;
2280 tree vop0, vop1, new_tmp1, new_tmp2;
2281 gimple new_stmt1, new_stmt2;
6e1aa848 2282 vec<tree> vec_tmp = vNULL;
4a00c761 2283
9771b263
DN
2284 vec_tmp.create (vec_oprnds0->length () * 2);
2285 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2286 {
2287 if (op_type == binary_op)
9771b263 2288 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2289 else
2290 vop1 = NULL_TREE;
2291
2292 /* Generate the two halves of promotion operation. */
2293 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2294 op_type, vec_dest, gsi, stmt);
2295 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2296 op_type, vec_dest, gsi, stmt);
2297 if (is_gimple_call (new_stmt1))
2298 {
2299 new_tmp1 = gimple_call_lhs (new_stmt1);
2300 new_tmp2 = gimple_call_lhs (new_stmt2);
2301 }
2302 else
2303 {
2304 new_tmp1 = gimple_assign_lhs (new_stmt1);
2305 new_tmp2 = gimple_assign_lhs (new_stmt2);
2306 }
2307
2308 /* Store the results for the next step. */
9771b263
DN
2309 vec_tmp.quick_push (new_tmp1);
2310 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2311 }
2312
689eaba3 2313 vec_oprnds0->release ();
4a00c761
JJ
2314 *vec_oprnds0 = vec_tmp;
2315}
2316
2317
b8698a0f
L
2318/* Check if STMT performs a conversion operation, that can be vectorized.
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2322
2323static bool
2324vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2325 gimple *vec_stmt, slp_tree slp_node)
2326{
2327 tree vec_dest;
2328 tree scalar_dest;
4a00c761 2329 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2330 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2333 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2334 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2335 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2336 tree new_temp;
2337 tree def;
2338 gimple def_stmt;
2339 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2340 gimple new_stmt = NULL;
2341 stmt_vec_info prev_stmt_info;
2342 int nunits_in;
2343 int nunits_out;
2344 tree vectype_out, vectype_in;
4a00c761
JJ
2345 int ncopies, i, j;
2346 tree lhs_type, rhs_type;
ebfd146a 2347 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2348 vec<tree> vec_oprnds0 = vNULL;
2349 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2350 tree vop0;
4a00c761
JJ
2351 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2352 int multi_step_cvt = 0;
6e1aa848
DN
2353 vec<tree> vec_dsts = vNULL;
2354 vec<tree> interm_types = vNULL;
4a00c761
JJ
2355 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2356 int op_type;
2357 enum machine_mode rhs_mode;
2358 unsigned short fltsz;
ebfd146a
IR
2359
2360 /* Is STMT a vectorizable conversion? */
2361
4a00c761 2362 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2363 return false;
2364
8644a673 2365 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2366 return false;
2367
2368 if (!is_gimple_assign (stmt))
2369 return false;
2370
2371 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2372 return false;
2373
2374 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2375 if (!CONVERT_EXPR_CODE_P (code)
2376 && code != FIX_TRUNC_EXPR
2377 && code != FLOAT_EXPR
2378 && code != WIDEN_MULT_EXPR
2379 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2380 return false;
2381
4a00c761
JJ
2382 op_type = TREE_CODE_LENGTH (code);
2383
ebfd146a 2384 /* Check types of lhs and rhs. */
b690cc0f 2385 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2386 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2387 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2388
ebfd146a
IR
2389 op0 = gimple_assign_rhs1 (stmt);
2390 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2391
2392 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2393 && !((INTEGRAL_TYPE_P (lhs_type)
2394 && INTEGRAL_TYPE_P (rhs_type))
2395 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2396 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2397 return false;
2398
2399 if ((INTEGRAL_TYPE_P (lhs_type)
2400 && (TYPE_PRECISION (lhs_type)
2401 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2402 || (INTEGRAL_TYPE_P (rhs_type)
2403 && (TYPE_PRECISION (rhs_type)
2404 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2405 {
73fbfcad 2406 if (dump_enabled_p ())
78c60e3d 2407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2408 "type conversion to/from bit-precision unsupported."
2409 "\n");
4a00c761
JJ
2410 return false;
2411 }
2412
b690cc0f 2413 /* Check the operands of the operation. */
24ee1384 2414 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2415 &def_stmt, &def, &dt[0], &vectype_in))
2416 {
73fbfcad 2417 if (dump_enabled_p ())
78c60e3d 2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2419 "use not simple.\n");
b690cc0f
RG
2420 return false;
2421 }
4a00c761
JJ
2422 if (op_type == binary_op)
2423 {
2424 bool ok;
2425
2426 op1 = gimple_assign_rhs2 (stmt);
2427 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2428 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2429 OP1. */
2430 if (CONSTANT_CLASS_P (op0))
f5709183 2431 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2432 &def_stmt, &def, &dt[1], &vectype_in);
2433 else
f5709183 2434 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2435 &def, &dt[1]);
4a00c761
JJ
2436
2437 if (!ok)
2438 {
73fbfcad 2439 if (dump_enabled_p ())
78c60e3d 2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2441 "use not simple.\n");
4a00c761
JJ
2442 return false;
2443 }
2444 }
2445
b690cc0f
RG
2446 /* If op0 is an external or constant defs use a vector type of
2447 the same size as the output vector type. */
ebfd146a 2448 if (!vectype_in)
b690cc0f 2449 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2450 if (vec_stmt)
2451 gcc_assert (vectype_in);
2452 if (!vectype_in)
2453 {
73fbfcad 2454 if (dump_enabled_p ())
4a00c761 2455 {
78c60e3d
SS
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "no vectype for scalar type ");
2458 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2459 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 2460 }
7d8930a0
IR
2461
2462 return false;
2463 }
ebfd146a 2464
b690cc0f
RG
2465 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2466 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2467 if (nunits_in < nunits_out)
ebfd146a
IR
2468 modifier = NARROW;
2469 else if (nunits_out == nunits_in)
2470 modifier = NONE;
ebfd146a 2471 else
4a00c761 2472 modifier = WIDEN;
ebfd146a 2473
ff802fa1
IR
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2476 case of SLP. */
437f4a00 2477 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2478 ncopies = 1;
4a00c761
JJ
2479 else if (modifier == NARROW)
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2481 else
2482 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2483
ebfd146a
IR
2484 /* Sanity check: make sure that at least one copy of the vectorized stmt
2485 needs to be generated. */
2486 gcc_assert (ncopies >= 1);
2487
ebfd146a 2488 /* Supportable by target? */
4a00c761 2489 switch (modifier)
ebfd146a 2490 {
4a00c761
JJ
2491 case NONE:
2492 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2493 return false;
2494 if (supportable_convert_operation (code, vectype_out, vectype_in,
2495 &decl1, &code1))
2496 break;
2497 /* FALLTHRU */
2498 unsupported:
73fbfcad 2499 if (dump_enabled_p ())
78c60e3d 2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2501 "conversion not supported by target.\n");
ebfd146a 2502 return false;
ebfd146a 2503
4a00c761
JJ
2504 case WIDEN:
2505 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2506 &code1, &code2, &multi_step_cvt,
2507 &interm_types))
4a00c761
JJ
2508 {
2509 /* Binary widening operation can only be supported directly by the
2510 architecture. */
2511 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2512 break;
2513 }
2514
2515 if (code != FLOAT_EXPR
2516 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2517 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2518 goto unsupported;
2519
2520 rhs_mode = TYPE_MODE (rhs_type);
2521 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2522 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2523 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2524 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2525 {
2526 cvt_type
2527 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2528 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2529 if (cvt_type == NULL_TREE)
2530 goto unsupported;
2531
2532 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2533 {
2534 if (!supportable_convert_operation (code, vectype_out,
2535 cvt_type, &decl1, &codecvt1))
2536 goto unsupported;
2537 }
2538 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2539 cvt_type, &codecvt1,
2540 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2541 &interm_types))
2542 continue;
2543 else
2544 gcc_assert (multi_step_cvt == 0);
2545
2546 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2547 vectype_in, &code1, &code2,
2548 &multi_step_cvt, &interm_types))
4a00c761
JJ
2549 break;
2550 }
2551
2552 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2553 goto unsupported;
2554
2555 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2556 codecvt2 = ERROR_MARK;
2557 else
2558 {
2559 multi_step_cvt++;
9771b263 2560 interm_types.safe_push (cvt_type);
4a00c761
JJ
2561 cvt_type = NULL_TREE;
2562 }
2563 break;
2564
2565 case NARROW:
2566 gcc_assert (op_type == unary_op);
2567 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2568 &code1, &multi_step_cvt,
2569 &interm_types))
2570 break;
2571
2572 if (code != FIX_TRUNC_EXPR
2573 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2574 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2575 goto unsupported;
2576
2577 rhs_mode = TYPE_MODE (rhs_type);
2578 cvt_type
2579 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2580 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2581 if (cvt_type == NULL_TREE)
2582 goto unsupported;
2583 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2584 &decl1, &codecvt1))
2585 goto unsupported;
2586 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2587 &code1, &multi_step_cvt,
2588 &interm_types))
2589 break;
2590 goto unsupported;
2591
2592 default:
2593 gcc_unreachable ();
ebfd146a
IR
2594 }
2595
2596 if (!vec_stmt) /* transformation not required. */
2597 {
73fbfcad 2598 if (dump_enabled_p ())
78c60e3d 2599 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2600 "=== vectorizable_conversion ===\n");
4a00c761 2601 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2602 {
2603 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2604 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2605 }
4a00c761
JJ
2606 else if (modifier == NARROW)
2607 {
2608 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2609 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2610 }
2611 else
2612 {
2613 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2614 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2615 }
9771b263 2616 interm_types.release ();
ebfd146a
IR
2617 return true;
2618 }
2619
2620 /** Transform. **/
73fbfcad 2621 if (dump_enabled_p ())
78c60e3d 2622 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2623 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 2624
4a00c761
JJ
2625 if (op_type == binary_op)
2626 {
2627 if (CONSTANT_CLASS_P (op0))
2628 op0 = fold_convert (TREE_TYPE (op1), op0);
2629 else if (CONSTANT_CLASS_P (op1))
2630 op1 = fold_convert (TREE_TYPE (op0), op1);
2631 }
2632
2633 /* In case of multi-step conversion, we first generate conversion operations
2634 to the intermediate types, and then from that types to the final one.
2635 We create vector destinations for the intermediate type (TYPES) received
2636 from supportable_*_operation, and store them in the correct order
2637 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2638 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2639 vec_dest = vect_create_destination_var (scalar_dest,
2640 (cvt_type && modifier == WIDEN)
2641 ? cvt_type : vectype_out);
9771b263 2642 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2643
2644 if (multi_step_cvt)
2645 {
9771b263
DN
2646 for (i = interm_types.length () - 1;
2647 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2648 {
2649 vec_dest = vect_create_destination_var (scalar_dest,
2650 intermediate_type);
9771b263 2651 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2652 }
2653 }
ebfd146a 2654
4a00c761 2655 if (cvt_type)
82294ec1
JJ
2656 vec_dest = vect_create_destination_var (scalar_dest,
2657 modifier == WIDEN
2658 ? vectype_out : cvt_type);
4a00c761
JJ
2659
2660 if (!slp_node)
2661 {
30862efc 2662 if (modifier == WIDEN)
4a00c761 2663 {
c3284718 2664 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 2665 if (op_type == binary_op)
9771b263 2666 vec_oprnds1.create (1);
4a00c761 2667 }
30862efc 2668 else if (modifier == NARROW)
9771b263
DN
2669 vec_oprnds0.create (
2670 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2671 }
2672 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2673 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2674
4a00c761 2675 last_oprnd = op0;
ebfd146a
IR
2676 prev_stmt_info = NULL;
2677 switch (modifier)
2678 {
2679 case NONE:
2680 for (j = 0; j < ncopies; j++)
2681 {
ebfd146a 2682 if (j == 0)
d092494c
IR
2683 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2684 -1);
ebfd146a
IR
2685 else
2686 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2687
9771b263 2688 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2689 {
2690 /* Arguments are ready, create the new vector stmt. */
2691 if (code1 == CALL_EXPR)
2692 {
2693 new_stmt = gimple_build_call (decl1, 1, vop0);
2694 new_temp = make_ssa_name (vec_dest, new_stmt);
2695 gimple_call_set_lhs (new_stmt, new_temp);
2696 }
2697 else
2698 {
2699 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2700 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2701 vop0, NULL);
2702 new_temp = make_ssa_name (vec_dest, new_stmt);
2703 gimple_assign_set_lhs (new_stmt, new_temp);
2704 }
2705
2706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2707 if (slp_node)
9771b263 2708 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2709 }
2710
ebfd146a
IR
2711 if (j == 0)
2712 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2713 else
2714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2715 prev_stmt_info = vinfo_for_stmt (new_stmt);
2716 }
2717 break;
2718
2719 case WIDEN:
2720 /* In case the vectorization factor (VF) is bigger than the number
2721 of elements that we can fit in a vectype (nunits), we have to
2722 generate more than one vector stmt - i.e - we need to "unroll"
2723 the vector stmt by a factor VF/nunits. */
2724 for (j = 0; j < ncopies; j++)
2725 {
4a00c761 2726 /* Handle uses. */
ebfd146a 2727 if (j == 0)
4a00c761
JJ
2728 {
2729 if (slp_node)
2730 {
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 {
2733 unsigned int k;
ebfd146a 2734
4a00c761
JJ
2735 vec_oprnd1 = op1;
2736 /* Store vec_oprnd1 for every vector stmt to be created
2737 for SLP_NODE. We check during the analysis that all
2738 the shift arguments are the same. */
2739 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2740 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2741
2742 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2743 slp_node, -1);
2744 }
2745 else
2746 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2747 &vec_oprnds1, slp_node, -1);
2748 }
2749 else
2750 {
2751 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2752 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2753 if (op_type == binary_op)
2754 {
2755 if (code == WIDEN_LSHIFT_EXPR)
2756 vec_oprnd1 = op1;
2757 else
2758 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2759 NULL);
9771b263 2760 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2761 }
2762 }
2763 }
ebfd146a 2764 else
4a00c761
JJ
2765 {
2766 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2767 vec_oprnds0.truncate (0);
2768 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2769 if (op_type == binary_op)
2770 {
2771 if (code == WIDEN_LSHIFT_EXPR)
2772 vec_oprnd1 = op1;
2773 else
2774 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2775 vec_oprnd1);
9771b263
DN
2776 vec_oprnds1.truncate (0);
2777 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2778 }
2779 }
ebfd146a 2780
4a00c761
JJ
2781 /* Arguments are ready. Create the new vector stmts. */
2782 for (i = multi_step_cvt; i >= 0; i--)
2783 {
9771b263 2784 tree this_dest = vec_dsts[i];
4a00c761
JJ
2785 enum tree_code c1 = code1, c2 = code2;
2786 if (i == 0 && codecvt2 != ERROR_MARK)
2787 {
2788 c1 = codecvt1;
2789 c2 = codecvt2;
2790 }
2791 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2792 &vec_oprnds1,
2793 stmt, this_dest, gsi,
2794 c1, c2, decl1, decl2,
2795 op_type);
2796 }
2797
9771b263 2798 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2799 {
2800 if (cvt_type)
2801 {
2802 if (codecvt1 == CALL_EXPR)
2803 {
2804 new_stmt = gimple_build_call (decl1, 1, vop0);
2805 new_temp = make_ssa_name (vec_dest, new_stmt);
2806 gimple_call_set_lhs (new_stmt, new_temp);
2807 }
2808 else
2809 {
2810 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2811 new_temp = make_ssa_name (vec_dest, NULL);
2812 new_stmt = gimple_build_assign_with_ops (codecvt1,
2813 new_temp,
2814 vop0, NULL);
2815 }
2816
2817 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2818 }
2819 else
2820 new_stmt = SSA_NAME_DEF_STMT (vop0);
2821
2822 if (slp_node)
9771b263 2823 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2824 else
2825 {
2826 if (!prev_stmt_info)
2827 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2828 else
2829 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2830 prev_stmt_info = vinfo_for_stmt (new_stmt);
2831 }
2832 }
ebfd146a 2833 }
4a00c761
JJ
2834
2835 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2836 break;
2837
2838 case NARROW:
2839 /* In case the vectorization factor (VF) is bigger than the number
2840 of elements that we can fit in a vectype (nunits), we have to
2841 generate more than one vector stmt - i.e - we need to "unroll"
2842 the vector stmt by a factor VF/nunits. */
2843 for (j = 0; j < ncopies; j++)
2844 {
2845 /* Handle uses. */
4a00c761
JJ
2846 if (slp_node)
2847 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2848 slp_node, -1);
ebfd146a
IR
2849 else
2850 {
9771b263 2851 vec_oprnds0.truncate (0);
4a00c761
JJ
2852 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2853 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2854 }
2855
4a00c761
JJ
2856 /* Arguments are ready. Create the new vector stmts. */
2857 if (cvt_type)
9771b263 2858 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2859 {
2860 if (codecvt1 == CALL_EXPR)
2861 {
2862 new_stmt = gimple_build_call (decl1, 1, vop0);
2863 new_temp = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, new_temp);
2865 }
2866 else
2867 {
2868 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2869 new_temp = make_ssa_name (vec_dest, NULL);
2870 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2871 vop0, NULL);
2872 }
ebfd146a 2873
4a00c761 2874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2875 vec_oprnds0[i] = new_temp;
4a00c761 2876 }
ebfd146a 2877
4a00c761
JJ
2878 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2879 stmt, vec_dsts, gsi,
2880 slp_node, code1,
2881 &prev_stmt_info);
ebfd146a
IR
2882 }
2883
2884 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2885 break;
ebfd146a
IR
2886 }
2887
9771b263
DN
2888 vec_oprnds0.release ();
2889 vec_oprnds1.release ();
2890 vec_dsts.release ();
2891 interm_types.release ();
ebfd146a
IR
2892
2893 return true;
2894}
ff802fa1
IR
2895
2896
ebfd146a
IR
2897/* Function vectorizable_assignment.
2898
b8698a0f
L
2899 Check if STMT performs an assignment (copy) that can be vectorized.
2900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2902 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2903
2904static bool
2905vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2906 gimple *vec_stmt, slp_tree slp_node)
2907{
2908 tree vec_dest;
2909 tree scalar_dest;
2910 tree op;
2911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2913 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2914 tree new_temp;
2915 tree def;
2916 gimple def_stmt;
2917 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2918 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2919 int ncopies;
f18b55bd 2920 int i, j;
6e1aa848 2921 vec<tree> vec_oprnds = vNULL;
ebfd146a 2922 tree vop;
a70d6342 2923 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2924 gimple new_stmt = NULL;
2925 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2926 enum tree_code code;
2927 tree vectype_in;
ebfd146a
IR
2928
2929 /* Multiple types in SLP are handled by creating the appropriate number of
2930 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2931 case of SLP. */
437f4a00 2932 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2933 ncopies = 1;
2934 else
2935 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2936
2937 gcc_assert (ncopies >= 1);
ebfd146a 2938
a70d6342 2939 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2940 return false;
2941
8644a673 2942 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2943 return false;
2944
2945 /* Is vectorizable assignment? */
2946 if (!is_gimple_assign (stmt))
2947 return false;
2948
2949 scalar_dest = gimple_assign_lhs (stmt);
2950 if (TREE_CODE (scalar_dest) != SSA_NAME)
2951 return false;
2952
fde9c428 2953 code = gimple_assign_rhs_code (stmt);
ebfd146a 2954 if (gimple_assign_single_p (stmt)
fde9c428
RG
2955 || code == PAREN_EXPR
2956 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2957 op = gimple_assign_rhs1 (stmt);
2958 else
2959 return false;
2960
7b7ec6c5
RG
2961 if (code == VIEW_CONVERT_EXPR)
2962 op = TREE_OPERAND (op, 0);
2963
24ee1384 2964 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2965 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2966 {
73fbfcad 2967 if (dump_enabled_p ())
78c60e3d 2968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2969 "use not simple.\n");
ebfd146a
IR
2970 return false;
2971 }
2972
fde9c428
RG
2973 /* We can handle NOP_EXPR conversions that do not change the number
2974 of elements or the vector size. */
7b7ec6c5
RG
2975 if ((CONVERT_EXPR_CODE_P (code)
2976 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2977 && (!vectype_in
2978 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2979 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2980 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2981 return false;
2982
7b7b1813
RG
2983 /* We do not handle bit-precision changes. */
2984 if ((CONVERT_EXPR_CODE_P (code)
2985 || code == VIEW_CONVERT_EXPR)
2986 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2987 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2988 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2989 || ((TYPE_PRECISION (TREE_TYPE (op))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2991 /* But a conversion that does not change the bit-pattern is ok. */
2992 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2993 > TYPE_PRECISION (TREE_TYPE (op)))
2994 && TYPE_UNSIGNED (TREE_TYPE (op))))
2995 {
73fbfcad 2996 if (dump_enabled_p ())
78c60e3d
SS
2997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2998 "type conversion to/from bit-precision "
e645e942 2999 "unsupported.\n");
7b7b1813
RG
3000 return false;
3001 }
3002
ebfd146a
IR
3003 if (!vec_stmt) /* transformation not required. */
3004 {
3005 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3006 if (dump_enabled_p ())
78c60e3d 3007 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3008 "=== vectorizable_assignment ===\n");
c3e7ee41 3009 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3010 return true;
3011 }
3012
3013 /** Transform. **/
73fbfcad 3014 if (dump_enabled_p ())
e645e942 3015 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
3016
3017 /* Handle def. */
3018 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3019
3020 /* Handle use. */
f18b55bd 3021 for (j = 0; j < ncopies; j++)
ebfd146a 3022 {
f18b55bd
IR
3023 /* Handle uses. */
3024 if (j == 0)
d092494c 3025 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3026 else
3027 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3028
3029 /* Arguments are ready. create the new vector stmt. */
9771b263 3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3031 {
7b7ec6c5
RG
3032 if (CONVERT_EXPR_CODE_P (code)
3033 || code == VIEW_CONVERT_EXPR)
4a73490d 3034 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3035 new_stmt = gimple_build_assign (vec_dest, vop);
3036 new_temp = make_ssa_name (vec_dest, new_stmt);
3037 gimple_assign_set_lhs (new_stmt, new_temp);
3038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039 if (slp_node)
9771b263 3040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3041 }
ebfd146a
IR
3042
3043 if (slp_node)
f18b55bd
IR
3044 continue;
3045
3046 if (j == 0)
3047 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3048 else
3049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3050
3051 prev_stmt_info = vinfo_for_stmt (new_stmt);
3052 }
b8698a0f 3053
9771b263 3054 vec_oprnds.release ();
ebfd146a
IR
3055 return true;
3056}
3057
9dc3f7de 3058
1107f3ae
IR
3059/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3060 either as shift by a scalar or by a vector. */
3061
3062bool
3063vect_supportable_shift (enum tree_code code, tree scalar_type)
3064{
3065
3066 enum machine_mode vec_mode;
3067 optab optab;
3068 int icode;
3069 tree vectype;
3070
3071 vectype = get_vectype_for_scalar_type (scalar_type);
3072 if (!vectype)
3073 return false;
3074
3075 optab = optab_for_tree_code (code, vectype, optab_scalar);
3076 if (!optab
3077 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3078 {
3079 optab = optab_for_tree_code (code, vectype, optab_vector);
3080 if (!optab
3081 || (optab_handler (optab, TYPE_MODE (vectype))
3082 == CODE_FOR_nothing))
3083 return false;
3084 }
3085
3086 vec_mode = TYPE_MODE (vectype);
3087 icode = (int) optab_handler (optab, vec_mode);
3088 if (icode == CODE_FOR_nothing)
3089 return false;
3090
3091 return true;
3092}
3093
3094
9dc3f7de
IR
3095/* Function vectorizable_shift.
3096
3097 Check if STMT performs a shift operation that can be vectorized.
3098 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3099 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3100 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3101
3102static bool
3103vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3104 gimple *vec_stmt, slp_tree slp_node)
3105{
3106 tree vec_dest;
3107 tree scalar_dest;
3108 tree op0, op1 = NULL;
3109 tree vec_oprnd1 = NULL_TREE;
3110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3111 tree vectype;
3112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3113 enum tree_code code;
3114 enum machine_mode vec_mode;
3115 tree new_temp;
3116 optab optab;
3117 int icode;
3118 enum machine_mode optab_op2_mode;
3119 tree def;
3120 gimple def_stmt;
3121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3122 gimple new_stmt = NULL;
3123 stmt_vec_info prev_stmt_info;
3124 int nunits_in;
3125 int nunits_out;
3126 tree vectype_out;
cede2577 3127 tree op1_vectype;
9dc3f7de
IR
3128 int ncopies;
3129 int j, i;
6e1aa848
DN
3130 vec<tree> vec_oprnds0 = vNULL;
3131 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3132 tree vop0, vop1;
3133 unsigned int k;
49eab32e 3134 bool scalar_shift_arg = true;
9dc3f7de
IR
3135 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3136 int vf;
3137
3138 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3139 return false;
3140
3141 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3142 return false;
3143
3144 /* Is STMT a vectorizable binary/unary operation? */
3145 if (!is_gimple_assign (stmt))
3146 return false;
3147
3148 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3149 return false;
3150
3151 code = gimple_assign_rhs_code (stmt);
3152
3153 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3154 || code == RROTATE_EXPR))
3155 return false;
3156
3157 scalar_dest = gimple_assign_lhs (stmt);
3158 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3159 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3160 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3161 {
73fbfcad 3162 if (dump_enabled_p ())
78c60e3d 3163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3164 "bit-precision shifts not supported.\n");
7b7b1813
RG
3165 return false;
3166 }
9dc3f7de
IR
3167
3168 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3169 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3170 &def_stmt, &def, &dt[0], &vectype))
3171 {
73fbfcad 3172 if (dump_enabled_p ())
78c60e3d 3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3174 "use not simple.\n");
9dc3f7de
IR
3175 return false;
3176 }
3177 /* If op0 is an external or constant def use a vector type with
3178 the same size as the output vector type. */
3179 if (!vectype)
3180 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3181 if (vec_stmt)
3182 gcc_assert (vectype);
3183 if (!vectype)
3184 {
73fbfcad 3185 if (dump_enabled_p ())
78c60e3d 3186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3187 "no vectype for scalar type\n");
9dc3f7de
IR
3188 return false;
3189 }
3190
3191 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3192 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3193 if (nunits_out != nunits_in)
3194 return false;
3195
3196 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3197 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3198 &def, &dt[1], &op1_vectype))
9dc3f7de 3199 {
73fbfcad 3200 if (dump_enabled_p ())
78c60e3d 3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3202 "use not simple.\n");
9dc3f7de
IR
3203 return false;
3204 }
3205
3206 if (loop_vinfo)
3207 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3208 else
3209 vf = 1;
3210
3211 /* Multiple types in SLP are handled by creating the appropriate number of
3212 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3213 case of SLP. */
437f4a00 3214 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3215 ncopies = 1;
3216 else
3217 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3218
3219 gcc_assert (ncopies >= 1);
3220
3221 /* Determine whether the shift amount is a vector, or scalar. If the
3222 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3223
49eab32e
JJ
3224 if (dt[1] == vect_internal_def && !slp_node)
3225 scalar_shift_arg = false;
3226 else if (dt[1] == vect_constant_def
3227 || dt[1] == vect_external_def
3228 || dt[1] == vect_internal_def)
3229 {
3230 /* In SLP, need to check whether the shift count is the same,
3231 in loops if it is a constant or invariant, it is always
3232 a scalar shift. */
3233 if (slp_node)
3234 {
9771b263 3235 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3236 gimple slpstmt;
3237
9771b263 3238 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3239 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3240 scalar_shift_arg = false;
3241 }
3242 }
3243 else
3244 {
73fbfcad 3245 if (dump_enabled_p ())
78c60e3d 3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3247 "operand mode requires invariant argument.\n");
49eab32e
JJ
3248 return false;
3249 }
3250
9dc3f7de 3251 /* Vector shifted by vector. */
49eab32e 3252 if (!scalar_shift_arg)
9dc3f7de
IR
3253 {
3254 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3255 if (dump_enabled_p ())
78c60e3d 3256 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3257 "vector/vector shift/rotate found.\n");
78c60e3d 3258
aa948027
JJ
3259 if (!op1_vectype)
3260 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3261 if (op1_vectype == NULL_TREE
3262 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3263 {
73fbfcad 3264 if (dump_enabled_p ())
78c60e3d
SS
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 "unusable type for last operand in"
e645e942 3267 " vector/vector shift/rotate.\n");
cede2577
JJ
3268 return false;
3269 }
9dc3f7de
IR
3270 }
3271 /* See if the machine has a vector shifted by scalar insn and if not
3272 then see if it has a vector shifted by vector insn. */
49eab32e 3273 else
9dc3f7de
IR
3274 {
3275 optab = optab_for_tree_code (code, vectype, optab_scalar);
3276 if (optab
3277 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3278 {
73fbfcad 3279 if (dump_enabled_p ())
78c60e3d 3280 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3281 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
3282 }
3283 else
3284 {
3285 optab = optab_for_tree_code (code, vectype, optab_vector);
3286 if (optab
3287 && (optab_handler (optab, TYPE_MODE (vectype))
3288 != CODE_FOR_nothing))
3289 {
49eab32e
JJ
3290 scalar_shift_arg = false;
3291
73fbfcad 3292 if (dump_enabled_p ())
78c60e3d 3293 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3294 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
3295
3296 /* Unlike the other binary operators, shifts/rotates have
3297 the rhs being int, instead of the same type as the lhs,
3298 so make sure the scalar is the right type if we are
aa948027 3299 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3300 if (dt[1] == vect_constant_def)
3301 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3302 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3303 TREE_TYPE (op1)))
3304 {
3305 if (slp_node
3306 && TYPE_MODE (TREE_TYPE (vectype))
3307 != TYPE_MODE (TREE_TYPE (op1)))
3308 {
73fbfcad 3309 if (dump_enabled_p ())
78c60e3d
SS
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "unusable type for last operand in"
e645e942 3312 " vector/vector shift/rotate.\n");
aa948027
JJ
3313 return false;
3314 }
3315 if (vec_stmt && !slp_node)
3316 {
3317 op1 = fold_convert (TREE_TYPE (vectype), op1);
3318 op1 = vect_init_vector (stmt, op1,
3319 TREE_TYPE (vectype), NULL);
3320 }
3321 }
9dc3f7de
IR
3322 }
3323 }
3324 }
9dc3f7de
IR
3325
3326 /* Supportable by target? */
3327 if (!optab)
3328 {
73fbfcad 3329 if (dump_enabled_p ())
78c60e3d 3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3331 "no optab.\n");
9dc3f7de
IR
3332 return false;
3333 }
3334 vec_mode = TYPE_MODE (vectype);
3335 icode = (int) optab_handler (optab, vec_mode);
3336 if (icode == CODE_FOR_nothing)
3337 {
73fbfcad 3338 if (dump_enabled_p ())
78c60e3d 3339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3340 "op not supported by target.\n");
9dc3f7de
IR
3341 /* Check only during analysis. */
3342 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3343 || (vf < vect_min_worthwhile_factor (code)
3344 && !vec_stmt))
3345 return false;
73fbfcad 3346 if (dump_enabled_p ())
e645e942
TJ
3347 dump_printf_loc (MSG_NOTE, vect_location,
3348 "proceeding using word mode.\n");
9dc3f7de
IR
3349 }
3350
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3353 && vf < vect_min_worthwhile_factor (code)
3354 && !vec_stmt)
3355 {
73fbfcad 3356 if (dump_enabled_p ())
78c60e3d 3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3358 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
3359 return false;
3360 }
3361
3362 if (!vec_stmt) /* transformation not required. */
3363 {
3364 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3365 if (dump_enabled_p ())
e645e942
TJ
3366 dump_printf_loc (MSG_NOTE, vect_location,
3367 "=== vectorizable_shift ===\n");
c3e7ee41 3368 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3369 return true;
3370 }
3371
3372 /** Transform. **/
3373
73fbfcad 3374 if (dump_enabled_p ())
78c60e3d 3375 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3376 "transform binary/unary operation.\n");
9dc3f7de
IR
3377
3378 /* Handle def. */
3379 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3380
9dc3f7de
IR
3381 prev_stmt_info = NULL;
3382 for (j = 0; j < ncopies; j++)
3383 {
3384 /* Handle uses. */
3385 if (j == 0)
3386 {
3387 if (scalar_shift_arg)
3388 {
3389 /* Vector shl and shr insn patterns can be defined with scalar
3390 operand 2 (shift operand). In this case, use constant or loop
3391 invariant op1 directly, without extending it to vector mode
3392 first. */
3393 optab_op2_mode = insn_data[icode].operand[2].mode;
3394 if (!VECTOR_MODE_P (optab_op2_mode))
3395 {
73fbfcad 3396 if (dump_enabled_p ())
78c60e3d 3397 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3398 "operand 1 using scalar mode.\n");
9dc3f7de 3399 vec_oprnd1 = op1;
8930f723 3400 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3401 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3402 if (slp_node)
3403 {
3404 /* Store vec_oprnd1 for every vector stmt to be created
3405 for SLP_NODE. We check during the analysis that all
3406 the shift arguments are the same.
3407 TODO: Allow different constants for different vector
3408 stmts generated for an SLP instance. */
3409 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3410 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3411 }
3412 }
3413 }
3414
3415 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3416 (a special case for certain kind of vector shifts); otherwise,
3417 operand 1 should be of a vector type (the usual case). */
3418 if (vec_oprnd1)
3419 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3420 slp_node, -1);
9dc3f7de
IR
3421 else
3422 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3423 slp_node, -1);
9dc3f7de
IR
3424 }
3425 else
3426 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3427
3428 /* Arguments are ready. Create the new vector stmt. */
9771b263 3429 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3430 {
9771b263 3431 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3432 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3433 new_temp = make_ssa_name (vec_dest, new_stmt);
3434 gimple_assign_set_lhs (new_stmt, new_temp);
3435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3436 if (slp_node)
9771b263 3437 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3438 }
3439
3440 if (slp_node)
3441 continue;
3442
3443 if (j == 0)
3444 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3445 else
3446 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3447 prev_stmt_info = vinfo_for_stmt (new_stmt);
3448 }
3449
9771b263
DN
3450 vec_oprnds0.release ();
3451 vec_oprnds1.release ();
9dc3f7de
IR
3452
3453 return true;
3454}
3455
3456
5deb57cb
JJ
3457static tree permute_vec_elements (tree, tree, tree, gimple,
3458 gimple_stmt_iterator *);
3459
3460
ebfd146a
IR
3461/* Function vectorizable_operation.
3462
16949072
RG
3463 Check if STMT performs a binary, unary or ternary operation that can
3464 be vectorized.
b8698a0f 3465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3468
3469static bool
3470vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3471 gimple *vec_stmt, slp_tree slp_node)
3472{
00f07b86 3473 tree vec_dest;
ebfd146a 3474 tree scalar_dest;
16949072 3475 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3477 tree vectype;
ebfd146a
IR
3478 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3479 enum tree_code code;
3480 enum machine_mode vec_mode;
3481 tree new_temp;
3482 int op_type;
00f07b86 3483 optab optab;
ebfd146a 3484 int icode;
ebfd146a
IR
3485 tree def;
3486 gimple def_stmt;
16949072
RG
3487 enum vect_def_type dt[3]
3488 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3489 gimple new_stmt = NULL;
3490 stmt_vec_info prev_stmt_info;
b690cc0f 3491 int nunits_in;
ebfd146a
IR
3492 int nunits_out;
3493 tree vectype_out;
3494 int ncopies;
3495 int j, i;
6e1aa848
DN
3496 vec<tree> vec_oprnds0 = vNULL;
3497 vec<tree> vec_oprnds1 = vNULL;
3498 vec<tree> vec_oprnds2 = vNULL;
16949072 3499 tree vop0, vop1, vop2;
a70d6342
IR
3500 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3501 int vf;
3502
a70d6342 3503 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3504 return false;
3505
8644a673 3506 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3507 return false;
3508
3509 /* Is STMT a vectorizable binary/unary operation? */
3510 if (!is_gimple_assign (stmt))
3511 return false;
3512
3513 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3514 return false;
3515
ebfd146a
IR
3516 code = gimple_assign_rhs_code (stmt);
3517
3518 /* For pointer addition, we should use the normal plus for
3519 the vector addition. */
3520 if (code == POINTER_PLUS_EXPR)
3521 code = PLUS_EXPR;
3522
3523 /* Support only unary or binary operations. */
3524 op_type = TREE_CODE_LENGTH (code);
16949072 3525 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3526 {
73fbfcad 3527 if (dump_enabled_p ())
78c60e3d 3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3529 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 3530 op_type);
ebfd146a
IR
3531 return false;
3532 }
3533
b690cc0f
RG
3534 scalar_dest = gimple_assign_lhs (stmt);
3535 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3536
7b7b1813
RG
3537 /* Most operations cannot handle bit-precision types without extra
3538 truncations. */
3539 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3540 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3541 /* Exception are bitwise binary operations. */
3542 && code != BIT_IOR_EXPR
3543 && code != BIT_XOR_EXPR
3544 && code != BIT_AND_EXPR)
3545 {
73fbfcad 3546 if (dump_enabled_p ())
78c60e3d 3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3548 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
3549 return false;
3550 }
3551
ebfd146a 3552 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3553 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3554 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3555 {
73fbfcad 3556 if (dump_enabled_p ())
78c60e3d 3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3558 "use not simple.\n");
ebfd146a
IR
3559 return false;
3560 }
b690cc0f
RG
3561 /* If op0 is an external or constant def use a vector type with
3562 the same size as the output vector type. */
3563 if (!vectype)
3564 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3565 if (vec_stmt)
3566 gcc_assert (vectype);
3567 if (!vectype)
3568 {
73fbfcad 3569 if (dump_enabled_p ())
7d8930a0 3570 {
78c60e3d
SS
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3572 "no vectype for scalar type ");
3573 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3574 TREE_TYPE (op0));
e645e942 3575 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3576 }
3577
3578 return false;
3579 }
b690cc0f
RG
3580
3581 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3582 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3583 if (nunits_out != nunits_in)
3584 return false;
ebfd146a 3585
16949072 3586 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3587 {
3588 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3589 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3590 &def, &dt[1]))
ebfd146a 3591 {
73fbfcad 3592 if (dump_enabled_p ())
78c60e3d 3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3594 "use not simple.\n");
ebfd146a
IR
3595 return false;
3596 }
3597 }
16949072
RG
3598 if (op_type == ternary_op)
3599 {
3600 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3601 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3602 &def, &dt[2]))
16949072 3603 {
73fbfcad 3604 if (dump_enabled_p ())
78c60e3d 3605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3606 "use not simple.\n");
16949072
RG
3607 return false;
3608 }
3609 }
ebfd146a 3610
b690cc0f
RG
3611 if (loop_vinfo)
3612 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3613 else
3614 vf = 1;
3615
3616 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3618 case of SLP. */
437f4a00 3619 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3620 ncopies = 1;
3621 else
3622 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3623
3624 gcc_assert (ncopies >= 1);
3625
9dc3f7de 3626 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3627 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3628 || code == RROTATE_EXPR)
9dc3f7de 3629 return false;
ebfd146a 3630
ebfd146a 3631 /* Supportable by target? */
00f07b86
RH
3632
3633 vec_mode = TYPE_MODE (vectype);
3634 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3635 {
00f07b86 3636 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3637 icode = LAST_INSN_CODE;
00f07b86
RH
3638 else
3639 icode = CODE_FOR_nothing;
ebfd146a 3640 }
00f07b86
RH
3641 else
3642 {
3643 optab = optab_for_tree_code (code, vectype, optab_default);
3644 if (!optab)
5deb57cb 3645 {
73fbfcad 3646 if (dump_enabled_p ())
78c60e3d 3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3648 "no optab.\n");
00f07b86 3649 return false;
5deb57cb 3650 }
00f07b86 3651 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3652 }
3653
ebfd146a
IR
3654 if (icode == CODE_FOR_nothing)
3655 {
73fbfcad 3656 if (dump_enabled_p ())
78c60e3d 3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3658 "op not supported by target.\n");
ebfd146a
IR
3659 /* Check only during analysis. */
3660 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3661 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3662 return false;
73fbfcad 3663 if (dump_enabled_p ())
e645e942
TJ
3664 dump_printf_loc (MSG_NOTE, vect_location,
3665 "proceeding using word mode.\n");
383d9c83
IR
3666 }
3667
4a00c761 3668 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3669 if (!VECTOR_MODE_P (vec_mode)
3670 && !vec_stmt
3671 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3672 {
73fbfcad 3673 if (dump_enabled_p ())
78c60e3d 3674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3675 "not worthwhile without SIMD support.\n");
e34842c6 3676 return false;
7d8930a0 3677 }
ebfd146a 3678
ebfd146a
IR
3679 if (!vec_stmt) /* transformation not required. */
3680 {
4a00c761 3681 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3682 if (dump_enabled_p ())
78c60e3d 3683 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3684 "=== vectorizable_operation ===\n");
c3e7ee41 3685 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3686 return true;
3687 }
3688
3689 /** Transform. **/
3690
73fbfcad 3691 if (dump_enabled_p ())
78c60e3d 3692 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3693 "transform binary/unary operation.\n");
383d9c83 3694
ebfd146a 3695 /* Handle def. */
00f07b86 3696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3697
ebfd146a
IR
3698 /* In case the vectorization factor (VF) is bigger than the number
3699 of elements that we can fit in a vectype (nunits), we have to generate
3700 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3701 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3702 from one copy of the vector stmt to the next, in the field
3703 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3704 stages to find the correct vector defs to be used when vectorizing
3705 stmts that use the defs of the current stmt. The example below
3706 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3707 we need to create 4 vectorized stmts):
3708
3709 before vectorization:
3710 RELATED_STMT VEC_STMT
3711 S1: x = memref - -
3712 S2: z = x + 1 - -
3713
3714 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3715 there):
3716 RELATED_STMT VEC_STMT
3717 VS1_0: vx0 = memref0 VS1_1 -
3718 VS1_1: vx1 = memref1 VS1_2 -
3719 VS1_2: vx2 = memref2 VS1_3 -
3720 VS1_3: vx3 = memref3 - -
3721 S1: x = load - VS1_0
3722 S2: z = x + 1 - -
3723
3724 step2: vectorize stmt S2 (done here):
3725 To vectorize stmt S2 we first need to find the relevant vector
3726 def for the first operand 'x'. This is, as usual, obtained from
3727 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3728 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3729 relevant vector def 'vx0'. Having found 'vx0' we can generate
3730 the vector stmt VS2_0, and as usual, record it in the
3731 STMT_VINFO_VEC_STMT of stmt S2.
3732 When creating the second copy (VS2_1), we obtain the relevant vector
3733 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3734 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3735 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3736 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3737 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3738 chain of stmts and pointers:
3739 RELATED_STMT VEC_STMT
3740 VS1_0: vx0 = memref0 VS1_1 -
3741 VS1_1: vx1 = memref1 VS1_2 -
3742 VS1_2: vx2 = memref2 VS1_3 -
3743 VS1_3: vx3 = memref3 - -
3744 S1: x = load - VS1_0
3745 VS2_0: vz0 = vx0 + v1 VS2_1 -
3746 VS2_1: vz1 = vx1 + v1 VS2_2 -
3747 VS2_2: vz2 = vx2 + v1 VS2_3 -
3748 VS2_3: vz3 = vx3 + v1 - -
3749 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3750
3751 prev_stmt_info = NULL;
3752 for (j = 0; j < ncopies; j++)
3753 {
3754 /* Handle uses. */
3755 if (j == 0)
4a00c761
JJ
3756 {
3757 if (op_type == binary_op || op_type == ternary_op)
3758 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3759 slp_node, -1);
3760 else
3761 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3762 slp_node, -1);
3763 if (op_type == ternary_op)
36ba4aae 3764 {
9771b263
DN
3765 vec_oprnds2.create (1);
3766 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3767 stmt,
3768 NULL));
36ba4aae 3769 }
4a00c761 3770 }
ebfd146a 3771 else
4a00c761
JJ
3772 {
3773 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3774 if (op_type == ternary_op)
3775 {
9771b263
DN
3776 tree vec_oprnd = vec_oprnds2.pop ();
3777 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3778 vec_oprnd));
4a00c761
JJ
3779 }
3780 }
3781
3782 /* Arguments are ready. Create the new vector stmt. */
9771b263 3783 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3784 {
4a00c761 3785 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3786 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3787 vop2 = ((op_type == ternary_op)
9771b263 3788 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3789 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3790 vop0, vop1, vop2);
4a00c761
JJ
3791 new_temp = make_ssa_name (vec_dest, new_stmt);
3792 gimple_assign_set_lhs (new_stmt, new_temp);
3793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3794 if (slp_node)
9771b263 3795 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3796 }
3797
4a00c761
JJ
3798 if (slp_node)
3799 continue;
3800
3801 if (j == 0)
3802 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3803 else
3804 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3805 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3806 }
3807
9771b263
DN
3808 vec_oprnds0.release ();
3809 vec_oprnds1.release ();
3810 vec_oprnds2.release ();
ebfd146a 3811
ebfd146a
IR
3812 return true;
3813}
3814
c716e67f
XDL
3815/* A helper function to ensure data reference DR's base alignment
3816 for STMT_INFO. */
3817
3818static void
3819ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3820{
3821 if (!dr->aux)
3822 return;
3823
3824 if (((dataref_aux *)dr->aux)->base_misaligned)
3825 {
3826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3827 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3828
3829 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3830 DECL_USER_ALIGN (base_decl) = 1;
3831 ((dataref_aux *)dr->aux)->base_misaligned = false;
3832 }
3833}
3834
ebfd146a
IR
3835
3836/* Function vectorizable_store.
3837
b8698a0f
L
3838 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3839 can be vectorized.
3840 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3841 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3842 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3843
3844static bool
3845vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3846 slp_tree slp_node)
ebfd146a
IR
3847{
3848 tree scalar_dest;
3849 tree data_ref;
3850 tree op;
3851 tree vec_oprnd = NULL_TREE;
3852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3853 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3855 tree elem_type;
ebfd146a 3856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3857 struct loop *loop = NULL;
ebfd146a
IR
3858 enum machine_mode vec_mode;
3859 tree dummy;
3860 enum dr_alignment_support alignment_support_scheme;
3861 tree def;
3862 gimple def_stmt;
3863 enum vect_def_type dt;
3864 stmt_vec_info prev_stmt_info = NULL;
3865 tree dataref_ptr = NULL_TREE;
74bf76ed 3866 tree dataref_offset = NULL_TREE;
fef4d2b3 3867 gimple ptr_incr = NULL;
ebfd146a
IR
3868 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3869 int ncopies;
3870 int j;
3871 gimple next_stmt, first_stmt = NULL;
0d0293ac 3872 bool grouped_store = false;
272c6793 3873 bool store_lanes_p = false;
ebfd146a 3874 unsigned int group_size, i;
6e1aa848
DN
3875 vec<tree> dr_chain = vNULL;
3876 vec<tree> oprnds = vNULL;
3877 vec<tree> result_chain = vNULL;
ebfd146a 3878 bool inv_p;
6e1aa848 3879 vec<tree> vec_oprnds = vNULL;
ebfd146a 3880 bool slp = (slp_node != NULL);
ebfd146a 3881 unsigned int vec_num;
a70d6342 3882 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3883 tree aggr_type;
a70d6342
IR
3884
3885 if (loop_vinfo)
3886 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3887
3888 /* Multiple types in SLP are handled by creating the appropriate number of
3889 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3890 case of SLP. */
437f4a00 3891 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3892 ncopies = 1;
3893 else
3894 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3895
3896 gcc_assert (ncopies >= 1);
3897
3898 /* FORNOW. This restriction should be relaxed. */
a70d6342 3899 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3900 {
73fbfcad 3901 if (dump_enabled_p ())
78c60e3d 3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3903 "multiple types in nested loop.\n");
ebfd146a
IR
3904 return false;
3905 }
3906
a70d6342 3907 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3908 return false;
3909
8644a673 3910 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3911 return false;
3912
3913 /* Is vectorizable store? */
3914
3915 if (!is_gimple_assign (stmt))
3916 return false;
3917
3918 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3919 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3920 && is_pattern_stmt_p (stmt_info))
3921 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3922 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3923 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3924 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3925 && TREE_CODE (scalar_dest) != COMPONENT_REF
3926 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3927 && TREE_CODE (scalar_dest) != REALPART_EXPR
3928 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3929 return false;
3930
3931 gcc_assert (gimple_assign_single_p (stmt));
3932 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3933 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3934 &def, &dt))
ebfd146a 3935 {
73fbfcad 3936 if (dump_enabled_p ())
78c60e3d 3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3938 "use not simple.\n");
ebfd146a
IR
3939 return false;
3940 }
3941
272c6793 3942 elem_type = TREE_TYPE (vectype);
ebfd146a 3943 vec_mode = TYPE_MODE (vectype);
7b7b1813 3944
ebfd146a
IR
3945 /* FORNOW. In some cases can vectorize even if data-type not supported
3946 (e.g. - array initialization with 0). */
947131ba 3947 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3948 return false;
3949
3950 if (!STMT_VINFO_DATA_REF (stmt_info))
3951 return false;
3952
a7ce6ec3
RG
3953 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3954 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3955 size_zero_node) < 0)
a1e53f3f 3956 {
73fbfcad 3957 if (dump_enabled_p ())
78c60e3d 3958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3959 "negative step for store.\n");
a1e53f3f
L
3960 return false;
3961 }
3962
0d0293ac 3963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3964 {
0d0293ac 3965 grouped_store = true;
e14c1050 3966 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3967 if (!slp && !PURE_SLP_STMT (stmt_info))
3968 {
e14c1050 3969 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3970 if (vect_store_lanes_supported (vectype, group_size))
3971 store_lanes_p = true;
0d0293ac 3972 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3973 return false;
3974 }
b8698a0f 3975
ebfd146a
IR
3976 if (first_stmt == stmt)
3977 {
3978 /* STMT is the leader of the group. Check the operands of all the
3979 stmts of the group. */
e14c1050 3980 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3981 while (next_stmt)
3982 {
3983 gcc_assert (gimple_assign_single_p (next_stmt));
3984 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3985 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3986 &def_stmt, &def, &dt))
ebfd146a 3987 {
73fbfcad 3988 if (dump_enabled_p ())
78c60e3d 3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3990 "use not simple.\n");
ebfd146a
IR
3991 return false;
3992 }
e14c1050 3993 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3994 }
3995 }
3996 }
3997
3998 if (!vec_stmt) /* transformation not required. */
3999 {
4000 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
4001 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4002 NULL, NULL, NULL);
ebfd146a
IR
4003 return true;
4004 }
4005
4006 /** Transform. **/
4007
c716e67f
XDL
4008 ensure_base_align (stmt_info, dr);
4009
0d0293ac 4010 if (grouped_store)
ebfd146a
IR
4011 {
4012 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4013 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4014
e14c1050 4015 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4016
4017 /* FORNOW */
a70d6342 4018 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4019
4020 /* We vectorize all the stmts of the interleaving group when we
4021 reach the last stmt in the group. */
e14c1050
IR
4022 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4023 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4024 && !slp)
4025 {
4026 *vec_stmt = NULL;
4027 return true;
4028 }
4029
4030 if (slp)
4b5caab7 4031 {
0d0293ac 4032 grouped_store = false;
4b5caab7
IR
4033 /* VEC_NUM is the number of vect stmts to be created for this
4034 group. */
4035 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4036 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4037 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4038 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4039 }
ebfd146a 4040 else
4b5caab7
IR
4041 /* VEC_NUM is the number of vect stmts to be created for this
4042 group. */
ebfd146a
IR
4043 vec_num = group_size;
4044 }
b8698a0f 4045 else
ebfd146a
IR
4046 {
4047 first_stmt = stmt;
4048 first_dr = dr;
4049 group_size = vec_num = 1;
ebfd146a 4050 }
b8698a0f 4051
73fbfcad 4052 if (dump_enabled_p ())
78c60e3d 4053 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4054 "transform store. ncopies = %d\n", ncopies);
ebfd146a 4055
9771b263
DN
4056 dr_chain.create (group_size);
4057 oprnds.create (group_size);
ebfd146a 4058
720f5239 4059 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4060 gcc_assert (alignment_support_scheme);
272c6793
RS
4061 /* Targets with store-lane instructions must not require explicit
4062 realignment. */
4063 gcc_assert (!store_lanes_p
4064 || alignment_support_scheme == dr_aligned
4065 || alignment_support_scheme == dr_unaligned_supported);
4066
4067 if (store_lanes_p)
4068 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4069 else
4070 aggr_type = vectype;
ebfd146a
IR
4071
4072 /* In case the vectorization factor (VF) is bigger than the number
4073 of elements that we can fit in a vectype (nunits), we have to generate
4074 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4075 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4076 vect_get_vec_def_for_copy_stmt. */
4077
0d0293ac 4078 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4079
4080 S1: &base + 2 = x2
4081 S2: &base = x0
4082 S3: &base + 1 = x1
4083 S4: &base + 3 = x3
4084
4085 We create vectorized stores starting from base address (the access of the
4086 first stmt in the chain (S2 in the above example), when the last store stmt
4087 of the chain (S4) is reached:
4088
4089 VS1: &base = vx2
4090 VS2: &base + vec_size*1 = vx0
4091 VS3: &base + vec_size*2 = vx1
4092 VS4: &base + vec_size*3 = vx3
4093
4094 Then permutation statements are generated:
4095
3fcc1b55
JJ
4096 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4097 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4098 ...
b8698a0f 4099
ebfd146a
IR
4100 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4101 (the order of the data-refs in the output of vect_permute_store_chain
4102 corresponds to the order of scalar stmts in the interleaving chain - see
4103 the documentation of vect_permute_store_chain()).
4104
4105 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4106 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4107 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4108 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4109 */
4110
4111 prev_stmt_info = NULL;
4112 for (j = 0; j < ncopies; j++)
4113 {
4114 gimple new_stmt;
ebfd146a
IR
4115
4116 if (j == 0)
4117 {
4118 if (slp)
4119 {
4120 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4121 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4122 NULL, slp_node, -1);
ebfd146a 4123
9771b263 4124 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4125 }
4126 else
4127 {
b8698a0f
L
4128 /* For interleaved stores we collect vectorized defs for all the
4129 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4130 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4131 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4132
0d0293ac 4133 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4134 OPRNDS are of size 1. */
b8698a0f 4135 next_stmt = first_stmt;
ebfd146a
IR
4136 for (i = 0; i < group_size; i++)
4137 {
b8698a0f
L
4138 /* Since gaps are not supported for interleaved stores,
4139 GROUP_SIZE is the exact number of stmts in the chain.
4140 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4141 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4142 iteration of the loop will be executed. */
4143 gcc_assert (next_stmt
4144 && gimple_assign_single_p (next_stmt));
4145 op = gimple_assign_rhs1 (next_stmt);
4146
b8698a0f 4147 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4148 NULL);
9771b263
DN
4149 dr_chain.quick_push (vec_oprnd);
4150 oprnds.quick_push (vec_oprnd);
e14c1050 4151 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4152 }
4153 }
4154
4155 /* We should have catched mismatched types earlier. */
4156 gcc_assert (useless_type_conversion_p (vectype,
4157 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4158 bool simd_lane_access_p
4159 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4160 if (simd_lane_access_p
4161 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4162 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4163 && integer_zerop (DR_OFFSET (first_dr))
4164 && integer_zerop (DR_INIT (first_dr))
4165 && alias_sets_conflict_p (get_alias_set (aggr_type),
4166 get_alias_set (DR_REF (first_dr))))
4167 {
4168 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4169 dataref_offset = build_int_cst (reference_alias_ptr_type
4170 (DR_REF (first_dr)), 0);
8928eff3 4171 inv_p = false;
74bf76ed
JJ
4172 }
4173 else
4174 dataref_ptr
4175 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4176 simd_lane_access_p ? loop : NULL,
4177 NULL_TREE, &dummy, gsi, &ptr_incr,
4178 simd_lane_access_p, &inv_p);
a70d6342 4179 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4180 }
b8698a0f 4181 else
ebfd146a 4182 {
b8698a0f
L
4183 /* For interleaved stores we created vectorized defs for all the
4184 defs stored in OPRNDS in the previous iteration (previous copy).
4185 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4186 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4187 next copy.
0d0293ac 4188 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4189 OPRNDS are of size 1. */
4190 for (i = 0; i < group_size; i++)
4191 {
9771b263 4192 op = oprnds[i];
24ee1384
IR
4193 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4194 &def, &dt);
b8698a0f 4195 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4196 dr_chain[i] = vec_oprnd;
4197 oprnds[i] = vec_oprnd;
ebfd146a 4198 }
74bf76ed
JJ
4199 if (dataref_offset)
4200 dataref_offset
4201 = int_const_binop (PLUS_EXPR, dataref_offset,
4202 TYPE_SIZE_UNIT (aggr_type));
4203 else
4204 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4205 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4206 }
4207
272c6793 4208 if (store_lanes_p)
ebfd146a 4209 {
272c6793 4210 tree vec_array;
267d3070 4211
272c6793
RS
4212 /* Combine all the vectors into an array. */
4213 vec_array = create_vector_array (vectype, vec_num);
4214 for (i = 0; i < vec_num; i++)
c2d7ab2a 4215 {
9771b263 4216 vec_oprnd = dr_chain[i];
272c6793 4217 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4218 }
b8698a0f 4219
272c6793
RS
4220 /* Emit:
4221 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4222 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4223 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4224 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4226 }
4227 else
4228 {
4229 new_stmt = NULL;
0d0293ac 4230 if (grouped_store)
272c6793 4231 {
b6b9227d
JJ
4232 if (j == 0)
4233 result_chain.create (group_size);
272c6793
RS
4234 /* Permute. */
4235 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4236 &result_chain);
4237 }
c2d7ab2a 4238
272c6793
RS
4239 next_stmt = first_stmt;
4240 for (i = 0; i < vec_num; i++)
4241 {
644ffefd 4242 unsigned align, misalign;
272c6793
RS
4243
4244 if (i > 0)
4245 /* Bump the vector pointer. */
4246 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4247 stmt, NULL_TREE);
4248
4249 if (slp)
9771b263 4250 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4251 else if (grouped_store)
4252 /* For grouped stores vectorized defs are interleaved in
272c6793 4253 vect_permute_store_chain(). */
9771b263 4254 vec_oprnd = result_chain[i];
272c6793
RS
4255
4256 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4257 dataref_offset
4258 ? dataref_offset
4259 : build_int_cst (reference_alias_ptr_type
4260 (DR_REF (first_dr)), 0));
644ffefd 4261 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4262 if (aligned_access_p (first_dr))
644ffefd 4263 misalign = 0;
272c6793
RS
4264 else if (DR_MISALIGNMENT (first_dr) == -1)
4265 {
4266 TREE_TYPE (data_ref)
4267 = build_aligned_type (TREE_TYPE (data_ref),
4268 TYPE_ALIGN (elem_type));
644ffefd
MJ
4269 align = TYPE_ALIGN_UNIT (elem_type);
4270 misalign = 0;
272c6793
RS
4271 }
4272 else
4273 {
4274 TREE_TYPE (data_ref)
4275 = build_aligned_type (TREE_TYPE (data_ref),
4276 TYPE_ALIGN (elem_type));
644ffefd 4277 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4278 }
74bf76ed
JJ
4279 if (dataref_offset == NULL_TREE)
4280 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4281 misalign);
c2d7ab2a 4282
272c6793
RS
4283 /* Arguments are ready. Create the new vector stmt. */
4284 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4286
4287 if (slp)
4288 continue;
4289
e14c1050 4290 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4291 if (!next_stmt)
4292 break;
4293 }
ebfd146a 4294 }
1da0876c
RS
4295 if (!slp)
4296 {
4297 if (j == 0)
4298 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4299 else
4300 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4301 prev_stmt_info = vinfo_for_stmt (new_stmt);
4302 }
ebfd146a
IR
4303 }
4304
9771b263
DN
4305 dr_chain.release ();
4306 oprnds.release ();
4307 result_chain.release ();
4308 vec_oprnds.release ();
ebfd146a
IR
4309
4310 return true;
4311}
4312
aec7ae7d
JJ
4313/* Given a vector type VECTYPE and permutation SEL returns
4314 the VECTOR_CST mask that implements the permutation of the
4315 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4316
3fcc1b55
JJ
4317tree
4318vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4319{
d2a12ae7 4320 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4321 int i, nunits;
a1e53f3f 4322
22e4dee7 4323 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4324
4325 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4326 return NULL;
4327
96f9265a
RG
4328 mask_elt_type = lang_hooks.types.type_for_mode
4329 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4330 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4331
d2a12ae7 4332 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4333 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4334 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4335 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4336
2635892a 4337 return mask_vec;
a1e53f3f
L
4338}
4339
aec7ae7d
JJ
4340/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4341 reversal of the vector elements. If that is impossible to do,
4342 returns NULL. */
4343
4344static tree
4345perm_mask_for_reverse (tree vectype)
4346{
4347 int i, nunits;
4348 unsigned char *sel;
4349
4350 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4351 sel = XALLOCAVEC (unsigned char, nunits);
4352
4353 for (i = 0; i < nunits; ++i)
4354 sel[i] = nunits - 1 - i;
4355
3fcc1b55 4356 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4357}
4358
4359/* Given a vector variable X and Y, that was generated for the scalar
4360 STMT, generate instructions to permute the vector elements of X and Y
4361 using permutation mask MASK_VEC, insert them at *GSI and return the
4362 permuted vector variable. */
a1e53f3f
L
4363
4364static tree
aec7ae7d
JJ
4365permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4366 gimple_stmt_iterator *gsi)
a1e53f3f
L
4367{
4368 tree vectype = TREE_TYPE (x);
aec7ae7d 4369 tree perm_dest, data_ref;
a1e53f3f
L
4370 gimple perm_stmt;
4371
a1e53f3f 4372 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4373 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4374
4375 /* Generate the permute statement. */
73804b12
RG
4376 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4377 x, y, mask_vec);
a1e53f3f
L
4378 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4379
4380 return data_ref;
4381}
4382
ebfd146a
IR
4383/* vectorizable_load.
4384
b8698a0f
L
4385 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4386 can be vectorized.
4387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4390
4391static bool
4392vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4393 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4394{
4395 tree scalar_dest;
4396 tree vec_dest = NULL;
4397 tree data_ref = NULL;
4398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4399 stmt_vec_info prev_stmt_info;
ebfd146a 4400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4401 struct loop *loop = NULL;
ebfd146a 4402 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4403 bool nested_in_vect_loop = false;
c716e67f 4404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4406 tree elem_type;
ebfd146a 4407 tree new_temp;
947131ba 4408 enum machine_mode mode;
ebfd146a
IR
4409 gimple new_stmt = NULL;
4410 tree dummy;
4411 enum dr_alignment_support alignment_support_scheme;
4412 tree dataref_ptr = NULL_TREE;
74bf76ed 4413 tree dataref_offset = NULL_TREE;
fef4d2b3 4414 gimple ptr_incr = NULL;
ebfd146a
IR
4415 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4416 int ncopies;
a64b9c26 4417 int i, j, group_size, group_gap;
ebfd146a
IR
4418 tree msq = NULL_TREE, lsq;
4419 tree offset = NULL_TREE;
4420 tree realignment_token = NULL_TREE;
4421 gimple phi = NULL;
6e1aa848 4422 vec<tree> dr_chain = vNULL;
0d0293ac 4423 bool grouped_load = false;
272c6793 4424 bool load_lanes_p = false;
ebfd146a 4425 gimple first_stmt;
ebfd146a 4426 bool inv_p;
319e6439 4427 bool negative = false;
ebfd146a
IR
4428 bool compute_in_loop = false;
4429 struct loop *at_loop;
4430 int vec_num;
4431 bool slp = (slp_node != NULL);
4432 bool slp_perm = false;
4433 enum tree_code code;
a70d6342
IR
4434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4435 int vf;
272c6793 4436 tree aggr_type;
aec7ae7d
JJ
4437 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4438 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4439 int gather_scale = 1;
4440 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4441
4442 if (loop_vinfo)
4443 {
4444 loop = LOOP_VINFO_LOOP (loop_vinfo);
4445 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4446 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4447 }
4448 else
3533e503 4449 vf = 1;
ebfd146a
IR
4450
4451 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4453 case of SLP. */
437f4a00 4454 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4455 ncopies = 1;
4456 else
4457 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4458
4459 gcc_assert (ncopies >= 1);
4460
4461 /* FORNOW. This restriction should be relaxed. */
4462 if (nested_in_vect_loop && ncopies > 1)
4463 {
73fbfcad 4464 if (dump_enabled_p ())
78c60e3d 4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4466 "multiple types in nested loop.\n");
ebfd146a
IR
4467 return false;
4468 }
4469
a70d6342 4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4471 return false;
4472
8644a673 4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4474 return false;
4475
4476 /* Is vectorizable load? */
4477 if (!is_gimple_assign (stmt))
4478 return false;
4479
4480 scalar_dest = gimple_assign_lhs (stmt);
4481 if (TREE_CODE (scalar_dest) != SSA_NAME)
4482 return false;
4483
4484 code = gimple_assign_rhs_code (stmt);
4485 if (code != ARRAY_REF
38000232 4486 && code != BIT_FIELD_REF
ebfd146a 4487 && code != INDIRECT_REF
e9dbe7bb
IR
4488 && code != COMPONENT_REF
4489 && code != IMAGPART_EXPR
70f34814 4490 && code != REALPART_EXPR
42373e0b
RG
4491 && code != MEM_REF
4492 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4493 return false;
4494
4495 if (!STMT_VINFO_DATA_REF (stmt_info))
4496 return false;
4497
7b7b1813 4498 elem_type = TREE_TYPE (vectype);
947131ba 4499 mode = TYPE_MODE (vectype);
ebfd146a
IR
4500
4501 /* FORNOW. In some cases can vectorize even if data-type not supported
4502 (e.g. - data copies). */
947131ba 4503 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4504 {
73fbfcad 4505 if (dump_enabled_p ())
78c60e3d 4506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4507 "Aligned load, but unsupported type.\n");
ebfd146a
IR
4508 return false;
4509 }
4510
ebfd146a 4511 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4512 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4513 {
0d0293ac 4514 grouped_load = true;
ebfd146a 4515 /* FORNOW */
aec7ae7d 4516 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4517
e14c1050 4518 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4519 if (!slp && !PURE_SLP_STMT (stmt_info))
4520 {
e14c1050 4521 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4522 if (vect_load_lanes_supported (vectype, group_size))
4523 load_lanes_p = true;
0d0293ac 4524 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4525 return false;
4526 }
ebfd146a
IR
4527 }
4528
a1e53f3f 4529
aec7ae7d
JJ
4530 if (STMT_VINFO_GATHER_P (stmt_info))
4531 {
4532 gimple def_stmt;
4533 tree def;
4534 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4535 &gather_off, &gather_scale);
4536 gcc_assert (gather_decl);
24ee1384 4537 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4538 &def_stmt, &def, &gather_dt,
4539 &gather_off_vectype))
4540 {
73fbfcad 4541 if (dump_enabled_p ())
78c60e3d 4542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4543 "gather index use not simple.\n");
aec7ae7d
JJ
4544 return false;
4545 }
4546 }
7d75abc8 4547 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4548 ;
319e6439
RG
4549 else
4550 {
4551 negative = tree_int_cst_compare (nested_in_vect_loop
4552 ? STMT_VINFO_DR_STEP (stmt_info)
4553 : DR_STEP (dr),
4554 size_zero_node) < 0;
4555 if (negative && ncopies > 1)
4556 {
73fbfcad 4557 if (dump_enabled_p ())
78c60e3d 4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4559 "multiple types with negative step.\n");
319e6439
RG
4560 return false;
4561 }
4562
4563 if (negative)
4564 {
08940f33
RB
4565 if (grouped_load)
4566 {
4567 if (dump_enabled_p ())
4568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4569 "negative step for group load not supported"
4570 "\n");
08940f33
RB
4571 return false;
4572 }
319e6439
RG
4573 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4574 if (alignment_support_scheme != dr_aligned
4575 && alignment_support_scheme != dr_unaligned_supported)
4576 {
73fbfcad 4577 if (dump_enabled_p ())
78c60e3d 4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4579 "negative step but alignment required.\n");
319e6439
RG
4580 return false;
4581 }
4582 if (!perm_mask_for_reverse (vectype))
4583 {
73fbfcad 4584 if (dump_enabled_p ())
78c60e3d 4585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4586 "negative step and reversing not supported."
4587 "\n");
319e6439
RG
4588 return false;
4589 }
4590 }
7d75abc8 4591 }
aec7ae7d 4592
ebfd146a
IR
4593 if (!vec_stmt) /* transformation not required. */
4594 {
4595 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4596 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4597 return true;
4598 }
4599
73fbfcad 4600 if (dump_enabled_p ())
78c60e3d 4601 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4602 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
4603
4604 /** Transform. **/
4605
c716e67f
XDL
4606 ensure_base_align (stmt_info, dr);
4607
aec7ae7d
JJ
4608 if (STMT_VINFO_GATHER_P (stmt_info))
4609 {
4610 tree vec_oprnd0 = NULL_TREE, op;
4611 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4612 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4613 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4614 edge pe = loop_preheader_edge (loop);
4615 gimple_seq seq;
4616 basic_block new_bb;
4617 enum { NARROW, NONE, WIDEN } modifier;
4618 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4619
4620 if (nunits == gather_off_nunits)
4621 modifier = NONE;
4622 else if (nunits == gather_off_nunits / 2)
4623 {
4624 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4625 modifier = WIDEN;
4626
4627 for (i = 0; i < gather_off_nunits; ++i)
4628 sel[i] = i | nunits;
4629
3fcc1b55 4630 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4631 gcc_assert (perm_mask != NULL_TREE);
4632 }
4633 else if (nunits == gather_off_nunits * 2)
4634 {
4635 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4636 modifier = NARROW;
4637
4638 for (i = 0; i < nunits; ++i)
4639 sel[i] = i < gather_off_nunits
4640 ? i : i + nunits - gather_off_nunits;
4641
3fcc1b55 4642 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4643 gcc_assert (perm_mask != NULL_TREE);
4644 ncopies *= 2;
4645 }
4646 else
4647 gcc_unreachable ();
4648
4649 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4650 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4651 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4652 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4653 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4654 scaletype = TREE_VALUE (arglist);
4655 gcc_checking_assert (types_compatible_p (srctype, rettype)
4656 && types_compatible_p (srctype, masktype));
4657
4658 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4659
4660 ptr = fold_convert (ptrtype, gather_base);
4661 if (!is_gimple_min_invariant (ptr))
4662 {
4663 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4664 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4665 gcc_assert (!new_bb);
4666 }
4667
4668 /* Currently we support only unconditional gather loads,
4669 so mask should be all ones. */
4670 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4671 mask = build_int_cst (TREE_TYPE (masktype), -1);
4672 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4673 {
4674 REAL_VALUE_TYPE r;
4675 long tmp[6];
4676 for (j = 0; j < 6; ++j)
4677 tmp[j] = -1;
4678 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4679 mask = build_real (TREE_TYPE (masktype), r);
4680 }
4681 else
4682 gcc_unreachable ();
4683 mask = build_vector_from_val (masktype, mask);
4684 mask = vect_init_vector (stmt, mask, masktype, NULL);
4685
4686 scale = build_int_cst (scaletype, gather_scale);
4687
4688 prev_stmt_info = NULL;
4689 for (j = 0; j < ncopies; ++j)
4690 {
4691 if (modifier == WIDEN && (j & 1))
4692 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4693 perm_mask, stmt, gsi);
4694 else if (j == 0)
4695 op = vec_oprnd0
4696 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4697 else
4698 op = vec_oprnd0
4699 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4700
4701 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4702 {
4703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4704 == TYPE_VECTOR_SUBPARTS (idxtype));
4705 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4706 var = make_ssa_name (var, NULL);
4707 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4708 new_stmt
4709 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4710 op, NULL_TREE);
4711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4712 op = var;
4713 }
4714
4715 new_stmt
4716 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4717
4718 if (!useless_type_conversion_p (vectype, rettype))
4719 {
4720 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4721 == TYPE_VECTOR_SUBPARTS (rettype));
4722 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4723 op = make_ssa_name (var, new_stmt);
4724 gimple_call_set_lhs (new_stmt, op);
4725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4726 var = make_ssa_name (vec_dest, NULL);
4727 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4728 new_stmt
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4730 NULL_TREE);
4731 }
4732 else
4733 {
4734 var = make_ssa_name (vec_dest, new_stmt);
4735 gimple_call_set_lhs (new_stmt, var);
4736 }
4737
4738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4739
4740 if (modifier == NARROW)
4741 {
4742 if ((j & 1) == 0)
4743 {
4744 prev_res = var;
4745 continue;
4746 }
4747 var = permute_vec_elements (prev_res, var,
4748 perm_mask, stmt, gsi);
4749 new_stmt = SSA_NAME_DEF_STMT (var);
4750 }
4751
4752 if (prev_stmt_info == NULL)
4753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4754 else
4755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4756 prev_stmt_info = vinfo_for_stmt (new_stmt);
4757 }
4758 return true;
4759 }
7d75abc8
MM
4760 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4761 {
4762 gimple_stmt_iterator incr_gsi;
4763 bool insert_after;
4764 gimple incr;
4765 tree offvar;
7d75abc8
MM
4766 tree ivstep;
4767 tree running_off;
9771b263 4768 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4769 gimple_seq stmts = NULL;
14ac6aa2
RB
4770 tree stride_base, stride_step, alias_off;
4771
4772 gcc_assert (!nested_in_vect_loop);
7d75abc8 4773
14ac6aa2
RB
4774 stride_base
4775 = fold_build_pointer_plus
4776 (unshare_expr (DR_BASE_ADDRESS (dr)),
4777 size_binop (PLUS_EXPR,
4778 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 4779 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 4780 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4781
4782 /* For a load with loop-invariant (but other than power-of-2)
4783 stride (i.e. not a grouped access) like so:
4784
4785 for (i = 0; i < n; i += stride)
4786 ... = array[i];
4787
4788 we generate a new induction variable and new accesses to
4789 form a new vector (or vectors, depending on ncopies):
4790
4791 for (j = 0; ; j += VF*stride)
4792 tmp1 = array[j];
4793 tmp2 = array[j + stride];
4794 ...
4795 vectemp = {tmp1, tmp2, ...}
4796 */
4797
4798 ivstep = stride_step;
4799 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4800 build_int_cst (TREE_TYPE (ivstep), vf));
4801
4802 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4803
4804 create_iv (stride_base, ivstep, NULL,
4805 loop, &incr_gsi, insert_after,
4806 &offvar, NULL);
4807 incr = gsi_stmt (incr_gsi);
4808 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4809
4810 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4811 if (stmts)
4812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4813
4814 prev_stmt_info = NULL;
4815 running_off = offvar;
14ac6aa2 4816 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4817 for (j = 0; j < ncopies; j++)
4818 {
4819 tree vec_inv;
4820
9771b263 4821 vec_alloc (v, nunits);
7d75abc8
MM
4822 for (i = 0; i < nunits; i++)
4823 {
4824 tree newref, newoff;
4825 gimple incr;
14ac6aa2
RB
4826 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4827 running_off, alias_off);
7d75abc8
MM
4828
4829 newref = force_gimple_operand_gsi (gsi, newref, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4832 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4833 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4834 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4835 running_off, stride_step);
7d75abc8
MM
4836 vect_finish_stmt_generation (stmt, incr, gsi);
4837
4838 running_off = newoff;
4839 }
4840
4841 vec_inv = build_constructor (vectype, v);
4842 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4843 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4844
4845 if (j == 0)
4846 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4847 else
4848 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4849 prev_stmt_info = vinfo_for_stmt (new_stmt);
4850 }
4851 return true;
4852 }
aec7ae7d 4853
0d0293ac 4854 if (grouped_load)
ebfd146a 4855 {
e14c1050 4856 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4857 if (slp
01d8bf07 4858 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4859 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4860 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4861
ebfd146a 4862 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4863 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4864 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4865 ??? But we can only do so if there is exactly one
4866 as we have no way to get at the rest. Leave the CSE
4867 opportunity alone.
4868 ??? With the group load eventually participating
4869 in multiple different permutations (having multiple
4870 slp nodes which refer to the same group) the CSE
4871 is even wrong code. See PR56270. */
4872 && !slp)
ebfd146a
IR
4873 {
4874 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4875 return true;
4876 }
4877 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4878 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4879
4880 /* VEC_NUM is the number of vect stmts to be created for this group. */
4881 if (slp)
4882 {
0d0293ac 4883 grouped_load = false;
ebfd146a 4884 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4885 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4886 slp_perm = true;
a64b9c26 4887 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4888 }
ebfd146a 4889 else
a64b9c26
RB
4890 {
4891 vec_num = group_size;
4892 group_gap = 0;
4893 }
ebfd146a
IR
4894 }
4895 else
4896 {
4897 first_stmt = stmt;
4898 first_dr = dr;
4899 group_size = vec_num = 1;
a64b9c26 4900 group_gap = 0;
ebfd146a
IR
4901 }
4902
720f5239 4903 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4904 gcc_assert (alignment_support_scheme);
272c6793
RS
4905 /* Targets with load-lane instructions must not require explicit
4906 realignment. */
4907 gcc_assert (!load_lanes_p
4908 || alignment_support_scheme == dr_aligned
4909 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4910
4911 /* In case the vectorization factor (VF) is bigger than the number
4912 of elements that we can fit in a vectype (nunits), we have to generate
4913 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4914 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4915 from one copy of the vector stmt to the next, in the field
ff802fa1 4916 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4917 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4918 stmts that use the defs of the current stmt. The example below
4919 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4920 need to create 4 vectorized stmts):
ebfd146a
IR
4921
4922 before vectorization:
4923 RELATED_STMT VEC_STMT
4924 S1: x = memref - -
4925 S2: z = x + 1 - -
4926
4927 step 1: vectorize stmt S1:
4928 We first create the vector stmt VS1_0, and, as usual, record a
4929 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4930 Next, we create the vector stmt VS1_1, and record a pointer to
4931 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4932 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4933 stmts and pointers:
4934 RELATED_STMT VEC_STMT
4935 VS1_0: vx0 = memref0 VS1_1 -
4936 VS1_1: vx1 = memref1 VS1_2 -
4937 VS1_2: vx2 = memref2 VS1_3 -
4938 VS1_3: vx3 = memref3 - -
4939 S1: x = load - VS1_0
4940 S2: z = x + 1 - -
4941
b8698a0f
L
4942 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4943 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4944 stmt S2. */
4945
0d0293ac 4946 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4947
4948 S1: x2 = &base + 2
4949 S2: x0 = &base
4950 S3: x1 = &base + 1
4951 S4: x3 = &base + 3
4952
b8698a0f 4953 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4954 starting from the access of the first stmt of the chain:
4955
4956 VS1: vx0 = &base
4957 VS2: vx1 = &base + vec_size*1
4958 VS3: vx3 = &base + vec_size*2
4959 VS4: vx4 = &base + vec_size*3
4960
4961 Then permutation statements are generated:
4962
e2c83630
RH
4963 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4964 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4965 ...
4966
4967 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4968 (the order of the data-refs in the output of vect_permute_load_chain
4969 corresponds to the order of scalar stmts in the interleaving chain - see
4970 the documentation of vect_permute_load_chain()).
4971 The generation of permutation stmts and recording them in
0d0293ac 4972 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4973
b8698a0f 4974 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4975 permutation stmts above are created for every copy. The result vector
4976 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4977 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4978
4979 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4980 on a target that supports unaligned accesses (dr_unaligned_supported)
4981 we generate the following code:
4982 p = initial_addr;
4983 indx = 0;
4984 loop {
4985 p = p + indx * vectype_size;
4986 vec_dest = *(p);
4987 indx = indx + 1;
4988 }
4989
4990 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4991 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4992 then generate the following code, in which the data in each iteration is
4993 obtained by two vector loads, one from the previous iteration, and one
4994 from the current iteration:
4995 p1 = initial_addr;
4996 msq_init = *(floor(p1))
4997 p2 = initial_addr + VS - 1;
4998 realignment_token = call target_builtin;
4999 indx = 0;
5000 loop {
5001 p2 = p2 + indx * vectype_size
5002 lsq = *(floor(p2))
5003 vec_dest = realign_load (msq, lsq, realignment_token)
5004 indx = indx + 1;
5005 msq = lsq;
5006 } */
5007
5008 /* If the misalignment remains the same throughout the execution of the
5009 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5010 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5011 This can only occur when vectorizing memory accesses in the inner-loop
5012 nested within an outer-loop that is being vectorized. */
5013
d1e4b493 5014 if (nested_in_vect_loop
211bea38 5015 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5016 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5017 {
5018 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5019 compute_in_loop = true;
5020 }
5021
5022 if ((alignment_support_scheme == dr_explicit_realign_optimized
5023 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5024 && !compute_in_loop)
ebfd146a
IR
5025 {
5026 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5027 alignment_support_scheme, NULL_TREE,
5028 &at_loop);
5029 if (alignment_support_scheme == dr_explicit_realign_optimized)
5030 {
5031 phi = SSA_NAME_DEF_STMT (msq);
5032 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5033 }
5034 }
5035 else
5036 at_loop = loop;
5037
a1e53f3f
L
5038 if (negative)
5039 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5040
272c6793
RS
5041 if (load_lanes_p)
5042 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5043 else
5044 aggr_type = vectype;
5045
ebfd146a
IR
5046 prev_stmt_info = NULL;
5047 for (j = 0; j < ncopies; j++)
b8698a0f 5048 {
272c6793 5049 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5050 if (j == 0)
74bf76ed
JJ
5051 {
5052 bool simd_lane_access_p
5053 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5054 if (simd_lane_access_p
5055 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5056 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5057 && integer_zerop (DR_OFFSET (first_dr))
5058 && integer_zerop (DR_INIT (first_dr))
5059 && alias_sets_conflict_p (get_alias_set (aggr_type),
5060 get_alias_set (DR_REF (first_dr)))
5061 && (alignment_support_scheme == dr_aligned
5062 || alignment_support_scheme == dr_unaligned_supported))
5063 {
5064 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5065 dataref_offset = build_int_cst (reference_alias_ptr_type
5066 (DR_REF (first_dr)), 0);
8928eff3 5067 inv_p = false;
74bf76ed
JJ
5068 }
5069 else
5070 dataref_ptr
5071 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5072 offset, &dummy, gsi, &ptr_incr,
5073 simd_lane_access_p, &inv_p);
5074 }
5075 else if (dataref_offset)
5076 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5077 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5078 else
272c6793
RS
5079 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5080 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5081
0d0293ac 5082 if (grouped_load || slp_perm)
9771b263 5083 dr_chain.create (vec_num);
5ce1ee7f 5084
272c6793 5085 if (load_lanes_p)
ebfd146a 5086 {
272c6793
RS
5087 tree vec_array;
5088
5089 vec_array = create_vector_array (vectype, vec_num);
5090
5091 /* Emit:
5092 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5093 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5094 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5095 gimple_call_set_lhs (new_stmt, vec_array);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5097
272c6793
RS
5098 /* Extract each vector into an SSA_NAME. */
5099 for (i = 0; i < vec_num; i++)
ebfd146a 5100 {
272c6793
RS
5101 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5102 vec_array, i);
9771b263 5103 dr_chain.quick_push (new_temp);
272c6793
RS
5104 }
5105
5106 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5107 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5108 }
5109 else
5110 {
5111 for (i = 0; i < vec_num; i++)
5112 {
5113 if (i > 0)
5114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5115 stmt, NULL_TREE);
5116
5117 /* 2. Create the vector-load in the loop. */
5118 switch (alignment_support_scheme)
5119 {
5120 case dr_aligned:
5121 case dr_unaligned_supported:
be1ac4ec 5122 {
644ffefd
MJ
5123 unsigned int align, misalign;
5124
272c6793
RS
5125 data_ref
5126 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5127 dataref_offset
5128 ? dataref_offset
5129 : build_int_cst (reference_alias_ptr_type
5130 (DR_REF (first_dr)), 0));
644ffefd 5131 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5132 if (alignment_support_scheme == dr_aligned)
5133 {
5134 gcc_assert (aligned_access_p (first_dr));
644ffefd 5135 misalign = 0;
272c6793
RS
5136 }
5137 else if (DR_MISALIGNMENT (first_dr) == -1)
5138 {
5139 TREE_TYPE (data_ref)
5140 = build_aligned_type (TREE_TYPE (data_ref),
5141 TYPE_ALIGN (elem_type));
644ffefd
MJ
5142 align = TYPE_ALIGN_UNIT (elem_type);
5143 misalign = 0;
272c6793
RS
5144 }
5145 else
5146 {
5147 TREE_TYPE (data_ref)
5148 = build_aligned_type (TREE_TYPE (data_ref),
5149 TYPE_ALIGN (elem_type));
644ffefd 5150 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5151 }
74bf76ed
JJ
5152 if (dataref_offset == NULL_TREE)
5153 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5154 align, misalign);
272c6793 5155 break;
be1ac4ec 5156 }
272c6793 5157 case dr_explicit_realign:
267d3070 5158 {
272c6793
RS
5159 tree ptr, bump;
5160 tree vs_minus_1;
5161
5162 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5163
5164 if (compute_in_loop)
5165 msq = vect_setup_realignment (first_stmt, gsi,
5166 &realignment_token,
5167 dr_explicit_realign,
5168 dataref_ptr, NULL);
5169
070ecdfd 5170 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5171 new_stmt = gimple_build_assign_with_ops
070ecdfd 5172 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5173 build_int_cst
5174 (TREE_TYPE (dataref_ptr),
5175 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5177 data_ref
5178 = build2 (MEM_REF, vectype, ptr,
5179 build_int_cst (reference_alias_ptr_type
5180 (DR_REF (first_dr)), 0));
5181 vec_dest = vect_create_destination_var (scalar_dest,
5182 vectype);
5183 new_stmt = gimple_build_assign (vec_dest, data_ref);
5184 new_temp = make_ssa_name (vec_dest, new_stmt);
5185 gimple_assign_set_lhs (new_stmt, new_temp);
5186 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5187 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5189 msq = new_temp;
5190
5191 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5192 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5193 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5194 new_stmt = gimple_build_assign_with_ops
5195 (BIT_AND_EXPR, NULL_TREE, ptr,
5196 build_int_cst
5197 (TREE_TYPE (ptr),
5198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5199 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5200 gimple_assign_set_lhs (new_stmt, ptr);
5201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5202 data_ref
5203 = build2 (MEM_REF, vectype, ptr,
5204 build_int_cst (reference_alias_ptr_type
5205 (DR_REF (first_dr)), 0));
5206 break;
267d3070 5207 }
272c6793 5208 case dr_explicit_realign_optimized:
070ecdfd 5209 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5210 new_stmt = gimple_build_assign_with_ops
070ecdfd 5211 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5212 build_int_cst
5213 (TREE_TYPE (dataref_ptr),
5214 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5216 data_ref
5217 = build2 (MEM_REF, vectype, new_temp,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr)), 0));
5220 break;
5221 default:
5222 gcc_unreachable ();
5223 }
ebfd146a 5224 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5225 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5226 new_temp = make_ssa_name (vec_dest, new_stmt);
5227 gimple_assign_set_lhs (new_stmt, new_temp);
5228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5229
272c6793
RS
5230 /* 3. Handle explicit realignment if necessary/supported.
5231 Create in loop:
5232 vec_dest = realign_load (msq, lsq, realignment_token) */
5233 if (alignment_support_scheme == dr_explicit_realign_optimized
5234 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5235 {
272c6793
RS
5236 lsq = gimple_assign_lhs (new_stmt);
5237 if (!realignment_token)
5238 realignment_token = dataref_ptr;
5239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5240 new_stmt
73804b12
RG
5241 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5242 vec_dest, msq, lsq,
5243 realignment_token);
272c6793
RS
5244 new_temp = make_ssa_name (vec_dest, new_stmt);
5245 gimple_assign_set_lhs (new_stmt, new_temp);
5246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5247
5248 if (alignment_support_scheme == dr_explicit_realign_optimized)
5249 {
5250 gcc_assert (phi);
5251 if (i == vec_num - 1 && j == ncopies - 1)
5252 add_phi_arg (phi, lsq,
5253 loop_latch_edge (containing_loop),
9e227d60 5254 UNKNOWN_LOCATION);
272c6793
RS
5255 msq = lsq;
5256 }
ebfd146a 5257 }
ebfd146a 5258
59fd17e3
RB
5259 /* 4. Handle invariant-load. */
5260 if (inv_p && !bb_vinfo)
5261 {
5262 gimple_stmt_iterator gsi2 = *gsi;
5263 gcc_assert (!grouped_load);
5264 gsi_next (&gsi2);
5265 new_temp = vect_init_vector (stmt, scalar_dest,
5266 vectype, &gsi2);
5267 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5268 }
5269
272c6793
RS
5270 if (negative)
5271 {
aec7ae7d
JJ
5272 tree perm_mask = perm_mask_for_reverse (vectype);
5273 new_temp = permute_vec_elements (new_temp, new_temp,
5274 perm_mask, stmt, gsi);
ebfd146a
IR
5275 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5276 }
267d3070 5277
272c6793 5278 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5279 vect_transform_grouped_load (). */
5280 if (grouped_load || slp_perm)
9771b263 5281 dr_chain.quick_push (new_temp);
267d3070 5282
272c6793
RS
5283 /* Store vector loads in the corresponding SLP_NODE. */
5284 if (slp && !slp_perm)
9771b263 5285 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5286 }
a64b9c26
RB
5287 /* Bump the vector pointer to account for a gap. */
5288 if (slp && group_gap != 0)
5289 {
5290 tree bump = size_binop (MULT_EXPR,
5291 TYPE_SIZE_UNIT (elem_type),
5292 size_int (group_gap));
5293 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5294 stmt, bump);
5295 }
ebfd146a
IR
5296 }
5297
5298 if (slp && !slp_perm)
5299 continue;
5300
5301 if (slp_perm)
5302 {
01d8bf07 5303 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5304 slp_node_instance, false))
5305 {
9771b263 5306 dr_chain.release ();
ebfd146a
IR
5307 return false;
5308 }
5309 }
5310 else
5311 {
0d0293ac 5312 if (grouped_load)
ebfd146a 5313 {
272c6793 5314 if (!load_lanes_p)
0d0293ac 5315 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5316 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5317 }
5318 else
5319 {
5320 if (j == 0)
5321 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5322 else
5323 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5324 prev_stmt_info = vinfo_for_stmt (new_stmt);
5325 }
5326 }
9771b263 5327 dr_chain.release ();
ebfd146a
IR
5328 }
5329
ebfd146a
IR
5330 return true;
5331}
5332
5333/* Function vect_is_simple_cond.
b8698a0f 5334
ebfd146a
IR
5335 Input:
5336 LOOP - the loop that is being vectorized.
5337 COND - Condition that is checked for simple use.
5338
e9e1d143
RG
5339 Output:
5340 *COMP_VECTYPE - the vector type for the comparison.
5341
ebfd146a
IR
5342 Returns whether a COND can be vectorized. Checks whether
5343 condition operands are supportable using vec_is_simple_use. */
5344
87aab9b2 5345static bool
24ee1384
IR
5346vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5347 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5348{
5349 tree lhs, rhs;
5350 tree def;
5351 enum vect_def_type dt;
e9e1d143 5352 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5353
5354 if (!COMPARISON_CLASS_P (cond))
5355 return false;
5356
5357 lhs = TREE_OPERAND (cond, 0);
5358 rhs = TREE_OPERAND (cond, 1);
5359
5360 if (TREE_CODE (lhs) == SSA_NAME)
5361 {
5362 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5363 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5364 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5365 return false;
5366 }
5367 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5368 && TREE_CODE (lhs) != FIXED_CST)
5369 return false;
5370
5371 if (TREE_CODE (rhs) == SSA_NAME)
5372 {
5373 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5374 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5375 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5376 return false;
5377 }
f7e531cf 5378 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5379 && TREE_CODE (rhs) != FIXED_CST)
5380 return false;
5381
e9e1d143 5382 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5383 return true;
5384}
5385
5386/* vectorizable_condition.
5387
b8698a0f
L
5388 Check if STMT is conditional modify expression that can be vectorized.
5389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5390 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5391 at GSI.
5392
5393 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5394 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5395 else caluse if it is 2).
ebfd146a
IR
5396
5397 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5398
4bbe8262 5399bool
ebfd146a 5400vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5401 gimple *vec_stmt, tree reduc_def, int reduc_index,
5402 slp_tree slp_node)
ebfd146a
IR
5403{
5404 tree scalar_dest = NULL_TREE;
5405 tree vec_dest = NULL_TREE;
ebfd146a
IR
5406 tree cond_expr, then_clause, else_clause;
5407 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5408 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5409 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5410 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5411 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5412 tree vec_compare, vec_cond_expr;
5413 tree new_temp;
5414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5415 tree def;
a855b1b1 5416 enum vect_def_type dt, dts[4];
ebfd146a 5417 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5418 int ncopies;
ebfd146a 5419 enum tree_code code;
a855b1b1 5420 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5421 int i, j;
5422 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5423 vec<tree> vec_oprnds0 = vNULL;
5424 vec<tree> vec_oprnds1 = vNULL;
5425 vec<tree> vec_oprnds2 = vNULL;
5426 vec<tree> vec_oprnds3 = vNULL;
74946978 5427 tree vec_cmp_type;
b8698a0f 5428
f7e531cf
IR
5429 if (slp_node || PURE_SLP_STMT (stmt_info))
5430 ncopies = 1;
5431 else
5432 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5433
ebfd146a 5434 gcc_assert (ncopies >= 1);
a855b1b1 5435 if (reduc_index && ncopies > 1)
ebfd146a
IR
5436 return false; /* FORNOW */
5437
f7e531cf
IR
5438 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5439 return false;
5440
5441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5442 return false;
5443
4bbe8262
IR
5444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5445 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5446 && reduc_def))
ebfd146a
IR
5447 return false;
5448
ebfd146a 5449 /* FORNOW: not yet supported. */
b8698a0f 5450 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5451 {
73fbfcad 5452 if (dump_enabled_p ())
78c60e3d 5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5454 "value used after loop.\n");
ebfd146a
IR
5455 return false;
5456 }
5457
5458 /* Is vectorizable conditional operation? */
5459 if (!is_gimple_assign (stmt))
5460 return false;
5461
5462 code = gimple_assign_rhs_code (stmt);
5463
5464 if (code != COND_EXPR)
5465 return false;
5466
4e71066d
RG
5467 cond_expr = gimple_assign_rhs1 (stmt);
5468 then_clause = gimple_assign_rhs2 (stmt);
5469 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5470
24ee1384
IR
5471 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5472 &comp_vectype)
e9e1d143 5473 || !comp_vectype)
ebfd146a
IR
5474 return false;
5475
5476 if (TREE_CODE (then_clause) == SSA_NAME)
5477 {
5478 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5479 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5480 &then_def_stmt, &def, &dt))
5481 return false;
5482 }
b8698a0f 5483 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5484 && TREE_CODE (then_clause) != REAL_CST
5485 && TREE_CODE (then_clause) != FIXED_CST)
5486 return false;
5487
5488 if (TREE_CODE (else_clause) == SSA_NAME)
5489 {
5490 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5491 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5492 &else_def_stmt, &def, &dt))
5493 return false;
5494 }
b8698a0f 5495 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5496 && TREE_CODE (else_clause) != REAL_CST
5497 && TREE_CODE (else_clause) != FIXED_CST)
5498 return false;
5499
74946978
MP
5500 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5501 /* The result of a vector comparison should be signed type. */
5502 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5503 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5504 if (vec_cmp_type == NULL_TREE)
5505 return false;
784fb9b3 5506
b8698a0f 5507 if (!vec_stmt)
ebfd146a
IR
5508 {
5509 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5510 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5511 }
5512
f7e531cf
IR
5513 /* Transform. */
5514
5515 if (!slp_node)
5516 {
9771b263
DN
5517 vec_oprnds0.create (1);
5518 vec_oprnds1.create (1);
5519 vec_oprnds2.create (1);
5520 vec_oprnds3.create (1);
f7e531cf 5521 }
ebfd146a
IR
5522
5523 /* Handle def. */
5524 scalar_dest = gimple_assign_lhs (stmt);
5525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5526
5527 /* Handle cond expr. */
a855b1b1
MM
5528 for (j = 0; j < ncopies; j++)
5529 {
f7e531cf 5530 gimple new_stmt = NULL;
a855b1b1
MM
5531 if (j == 0)
5532 {
f7e531cf
IR
5533 if (slp_node)
5534 {
07687835
TS
5535 stack_vec<tree, 4> ops;
5536 stack_vec<vec<tree>, 4> vec_defs;
9771b263 5537
9771b263
DN
5538 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5539 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5540 ops.safe_push (then_clause);
5541 ops.safe_push (else_clause);
f7e531cf 5542 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5543 vec_oprnds3 = vec_defs.pop ();
5544 vec_oprnds2 = vec_defs.pop ();
5545 vec_oprnds1 = vec_defs.pop ();
5546 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5547
9771b263
DN
5548 ops.release ();
5549 vec_defs.release ();
f7e531cf
IR
5550 }
5551 else
5552 {
5553 gimple gtemp;
5554 vec_cond_lhs =
a855b1b1
MM
5555 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5556 stmt, NULL);
24ee1384
IR
5557 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5558 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5559
5560 vec_cond_rhs =
5561 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5562 stmt, NULL);
24ee1384
IR
5563 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5564 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5565 if (reduc_index == 1)
5566 vec_then_clause = reduc_def;
5567 else
5568 {
5569 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5570 stmt, NULL);
24ee1384 5571 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5572 NULL, &gtemp, &def, &dts[2]);
5573 }
5574 if (reduc_index == 2)
5575 vec_else_clause = reduc_def;
5576 else
5577 {
5578 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5579 stmt, NULL);
24ee1384 5580 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5581 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5582 }
a855b1b1
MM
5583 }
5584 }
5585 else
5586 {
f7e531cf 5587 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5588 vec_oprnds0.pop ());
f7e531cf 5589 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5590 vec_oprnds1.pop ());
a855b1b1 5591 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5592 vec_oprnds2.pop ());
a855b1b1 5593 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5594 vec_oprnds3.pop ());
f7e531cf
IR
5595 }
5596
5597 if (!slp_node)
5598 {
9771b263
DN
5599 vec_oprnds0.quick_push (vec_cond_lhs);
5600 vec_oprnds1.quick_push (vec_cond_rhs);
5601 vec_oprnds2.quick_push (vec_then_clause);
5602 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5603 }
5604
9dc3f7de 5605 /* Arguments are ready. Create the new vector stmt. */
9771b263 5606 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5607 {
9771b263
DN
5608 vec_cond_rhs = vec_oprnds1[i];
5609 vec_then_clause = vec_oprnds2[i];
5610 vec_else_clause = vec_oprnds3[i];
a855b1b1 5611
784fb9b3
JJ
5612 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5613 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5614 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5615 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5616
f7e531cf
IR
5617 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5618 new_temp = make_ssa_name (vec_dest, new_stmt);
5619 gimple_assign_set_lhs (new_stmt, new_temp);
5620 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5621 if (slp_node)
9771b263 5622 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5623 }
5624
5625 if (slp_node)
5626 continue;
5627
5628 if (j == 0)
5629 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5630 else
5631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5632
5633 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5634 }
b8698a0f 5635
9771b263
DN
5636 vec_oprnds0.release ();
5637 vec_oprnds1.release ();
5638 vec_oprnds2.release ();
5639 vec_oprnds3.release ();
f7e531cf 5640
ebfd146a
IR
5641 return true;
5642}
5643
5644
8644a673 5645/* Make sure the statement is vectorizable. */
ebfd146a
IR
5646
5647bool
a70d6342 5648vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5649{
8644a673 5650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5652 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5653 bool ok;
a70d6342 5654 tree scalar_type, vectype;
363477c0
JJ
5655 gimple pattern_stmt;
5656 gimple_seq pattern_def_seq;
ebfd146a 5657
73fbfcad 5658 if (dump_enabled_p ())
ebfd146a 5659 {
78c60e3d
SS
5660 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5662 dump_printf (MSG_NOTE, "\n");
8644a673 5663 }
ebfd146a 5664
1825a1f3 5665 if (gimple_has_volatile_ops (stmt))
b8698a0f 5666 {
73fbfcad 5667 if (dump_enabled_p ())
78c60e3d 5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5669 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
5670
5671 return false;
5672 }
b8698a0f
L
5673
5674 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5675 to include:
5676 - the COND_EXPR which is the loop exit condition
5677 - any LABEL_EXPRs in the loop
b8698a0f 5678 - computations that are used only for array indexing or loop control.
8644a673 5679 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5680 instance, therefore, all the statements are relevant.
ebfd146a 5681
d092494c 5682 Pattern statement needs to be analyzed instead of the original statement
83197f37 5683 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5684 statements. In basic blocks we are called from some SLP instance
5685 traversal, don't analyze pattern stmts instead, the pattern stmts
5686 already will be part of SLP instance. */
83197f37
IR
5687
5688 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5689 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5690 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5691 {
9d5e7640 5692 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5693 && pattern_stmt
9d5e7640
IR
5694 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5695 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5696 {
83197f37 5697 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5698 stmt = pattern_stmt;
5699 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5700 if (dump_enabled_p ())
9d5e7640 5701 {
78c60e3d
SS
5702 dump_printf_loc (MSG_NOTE, vect_location,
5703 "==> examining pattern statement: ");
5704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5705 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
5706 }
5707 }
5708 else
5709 {
73fbfcad 5710 if (dump_enabled_p ())
e645e942 5711 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 5712
9d5e7640
IR
5713 return true;
5714 }
8644a673 5715 }
83197f37 5716 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5717 && node == NULL
83197f37
IR
5718 && pattern_stmt
5719 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5720 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5721 {
5722 /* Analyze PATTERN_STMT too. */
73fbfcad 5723 if (dump_enabled_p ())
83197f37 5724 {
78c60e3d
SS
5725 dump_printf_loc (MSG_NOTE, vect_location,
5726 "==> examining pattern statement: ");
5727 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5728 dump_printf (MSG_NOTE, "\n");
83197f37
IR
5729 }
5730
5731 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5732 return false;
5733 }
ebfd146a 5734
1107f3ae 5735 if (is_pattern_stmt_p (stmt_info)
079c527f 5736 && node == NULL
363477c0 5737 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5738 {
363477c0 5739 gimple_stmt_iterator si;
1107f3ae 5740
363477c0
JJ
5741 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5742 {
5743 gimple pattern_def_stmt = gsi_stmt (si);
5744 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5745 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5746 {
5747 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5748 if (dump_enabled_p ())
363477c0 5749 {
78c60e3d
SS
5750 dump_printf_loc (MSG_NOTE, vect_location,
5751 "==> examining pattern def statement: ");
5752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 5753 dump_printf (MSG_NOTE, "\n");
363477c0 5754 }
1107f3ae 5755
363477c0
JJ
5756 if (!vect_analyze_stmt (pattern_def_stmt,
5757 need_to_vectorize, node))
5758 return false;
5759 }
5760 }
5761 }
1107f3ae 5762
8644a673
IR
5763 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5764 {
5765 case vect_internal_def:
5766 break;
ebfd146a 5767
8644a673 5768 case vect_reduction_def:
7c5222ff 5769 case vect_nested_cycle:
a70d6342 5770 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5771 || relevance == vect_used_in_outer_by_reduction
a70d6342 5772 || relevance == vect_unused_in_scope));
8644a673
IR
5773 break;
5774
5775 case vect_induction_def:
5776 case vect_constant_def:
5777 case vect_external_def:
5778 case vect_unknown_def_type:
5779 default:
5780 gcc_unreachable ();
5781 }
ebfd146a 5782
a70d6342
IR
5783 if (bb_vinfo)
5784 {
5785 gcc_assert (PURE_SLP_STMT (stmt_info));
5786
b690cc0f 5787 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5788 if (dump_enabled_p ())
a70d6342 5789 {
78c60e3d
SS
5790 dump_printf_loc (MSG_NOTE, vect_location,
5791 "get vectype for scalar type: ");
5792 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 5793 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5794 }
5795
5796 vectype = get_vectype_for_scalar_type (scalar_type);
5797 if (!vectype)
5798 {
73fbfcad 5799 if (dump_enabled_p ())
a70d6342 5800 {
78c60e3d
SS
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5802 "not SLPed: unsupported data-type ");
5803 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5804 scalar_type);
e645e942 5805 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
5806 }
5807 return false;
5808 }
5809
73fbfcad 5810 if (dump_enabled_p ())
a70d6342 5811 {
78c60e3d
SS
5812 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5813 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 5814 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5815 }
5816
5817 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5818 }
5819
8644a673 5820 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5821 {
8644a673
IR
5822 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5823 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5824 *need_to_vectorize = true;
ebfd146a
IR
5825 }
5826
8644a673 5827 ok = true;
b8698a0f 5828 if (!bb_vinfo
a70d6342
IR
5829 && (STMT_VINFO_RELEVANT_P (stmt_info)
5830 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5831 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5832 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5833 || vectorizable_operation (stmt, NULL, NULL, NULL)
5834 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5835 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5836 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5837 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5838 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5839 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5840 else
5841 {
5842 if (bb_vinfo)
4a00c761
JJ
5843 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5844 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5845 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5846 || vectorizable_assignment (stmt, NULL, NULL, node)
5847 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5848 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5849 || vectorizable_store (stmt, NULL, NULL, node)
5850 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5851 }
8644a673
IR
5852
5853 if (!ok)
ebfd146a 5854 {
73fbfcad 5855 if (dump_enabled_p ())
8644a673 5856 {
78c60e3d
SS
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5858 "not vectorized: relevant stmt not ");
5859 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5861 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5862 }
b8698a0f 5863
ebfd146a
IR
5864 return false;
5865 }
5866
a70d6342
IR
5867 if (bb_vinfo)
5868 return true;
5869
8644a673
IR
5870 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5871 need extra handling, except for vectorizable reductions. */
5872 if (STMT_VINFO_LIVE_P (stmt_info)
5873 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5874 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5875
8644a673 5876 if (!ok)
ebfd146a 5877 {
73fbfcad 5878 if (dump_enabled_p ())
8644a673 5879 {
78c60e3d
SS
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "not vectorized: live stmt not ");
5882 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5884 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5885 }
b8698a0f 5886
8644a673 5887 return false;
ebfd146a
IR
5888 }
5889
ebfd146a
IR
5890 return true;
5891}
5892
5893
5894/* Function vect_transform_stmt.
5895
5896 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5897
5898bool
5899vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5900 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5901 slp_instance slp_node_instance)
5902{
5903 bool is_store = false;
5904 gimple vec_stmt = NULL;
5905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5906 bool done;
ebfd146a
IR
5907
5908 switch (STMT_VINFO_TYPE (stmt_info))
5909 {
5910 case type_demotion_vec_info_type:
ebfd146a 5911 case type_promotion_vec_info_type:
ebfd146a
IR
5912 case type_conversion_vec_info_type:
5913 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5914 gcc_assert (done);
5915 break;
5916
5917 case induc_vec_info_type:
5918 gcc_assert (!slp_node);
5919 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5920 gcc_assert (done);
5921 break;
5922
9dc3f7de
IR
5923 case shift_vec_info_type:
5924 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5925 gcc_assert (done);
5926 break;
5927
ebfd146a
IR
5928 case op_vec_info_type:
5929 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5930 gcc_assert (done);
5931 break;
5932
5933 case assignment_vec_info_type:
5934 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5935 gcc_assert (done);
5936 break;
5937
5938 case load_vec_info_type:
b8698a0f 5939 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5940 slp_node_instance);
5941 gcc_assert (done);
5942 break;
5943
5944 case store_vec_info_type:
5945 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5946 gcc_assert (done);
0d0293ac 5947 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5948 {
5949 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5950 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5951 one are skipped, and there vec_stmt_info shouldn't be freed
5952 meanwhile. */
0d0293ac 5953 *grouped_store = true;
ebfd146a
IR
5954 if (STMT_VINFO_VEC_STMT (stmt_info))
5955 is_store = true;
5956 }
5957 else
5958 is_store = true;
5959 break;
5960
5961 case condition_vec_info_type:
f7e531cf 5962 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5963 gcc_assert (done);
5964 break;
5965
5966 case call_vec_info_type:
190c2236 5967 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5968 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5969 break;
5970
5971 case reduc_vec_info_type:
b5aeb3bb 5972 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5973 gcc_assert (done);
5974 break;
5975
5976 default:
5977 if (!STMT_VINFO_LIVE_P (stmt_info))
5978 {
73fbfcad 5979 if (dump_enabled_p ())
78c60e3d 5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5981 "stmt not supported.\n");
ebfd146a
IR
5982 gcc_unreachable ();
5983 }
5984 }
5985
5986 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5987 is being vectorized, but outside the immediately enclosing loop. */
5988 if (vec_stmt
a70d6342
IR
5989 && STMT_VINFO_LOOP_VINFO (stmt_info)
5990 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5991 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5992 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5993 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5994 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5995 vect_used_in_outer_by_reduction))
ebfd146a 5996 {
a70d6342
IR
5997 struct loop *innerloop = LOOP_VINFO_LOOP (
5998 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5999 imm_use_iterator imm_iter;
6000 use_operand_p use_p;
6001 tree scalar_dest;
6002 gimple exit_phi;
6003
73fbfcad 6004 if (dump_enabled_p ())
78c60e3d 6005 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6006 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
6007
6008 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6009 (to be used when vectorizing outer-loop stmts that use the DEF of
6010 STMT). */
6011 if (gimple_code (stmt) == GIMPLE_PHI)
6012 scalar_dest = PHI_RESULT (stmt);
6013 else
6014 scalar_dest = gimple_assign_lhs (stmt);
6015
6016 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6017 {
6018 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6019 {
6020 exit_phi = USE_STMT (use_p);
6021 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6022 }
6023 }
6024 }
6025
6026 /* Handle stmts whose DEF is used outside the loop-nest that is
6027 being vectorized. */
6028 if (STMT_VINFO_LIVE_P (stmt_info)
6029 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6030 {
6031 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6032 gcc_assert (done);
6033 }
6034
6035 if (vec_stmt)
83197f37 6036 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6037
b8698a0f 6038 return is_store;
ebfd146a
IR
6039}
6040
6041
b8698a0f 6042/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6043 stmt_vec_info. */
6044
6045void
6046vect_remove_stores (gimple first_stmt)
6047{
6048 gimple next = first_stmt;
6049 gimple tmp;
6050 gimple_stmt_iterator next_si;
6051
6052 while (next)
6053 {
78048b1c
JJ
6054 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6055
6056 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6057 if (is_pattern_stmt_p (stmt_info))
6058 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6059 /* Free the attached stmt_vec_info and remove the stmt. */
6060 next_si = gsi_for_stmt (next);
3d3f2249 6061 unlink_stmt_vdef (next);
ebfd146a 6062 gsi_remove (&next_si, true);
3d3f2249 6063 release_defs (next);
ebfd146a
IR
6064 free_stmt_vec_info (next);
6065 next = tmp;
6066 }
6067}
6068
6069
6070/* Function new_stmt_vec_info.
6071
6072 Create and initialize a new stmt_vec_info struct for STMT. */
6073
6074stmt_vec_info
b8698a0f 6075new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6076 bb_vec_info bb_vinfo)
ebfd146a
IR
6077{
6078 stmt_vec_info res;
6079 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6080
6081 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6082 STMT_VINFO_STMT (res) = stmt;
6083 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6084 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6085 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6086 STMT_VINFO_LIVE_P (res) = false;
6087 STMT_VINFO_VECTYPE (res) = NULL;
6088 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6089 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6090 STMT_VINFO_IN_PATTERN_P (res) = false;
6091 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6092 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6093 STMT_VINFO_DATA_REF (res) = NULL;
6094
6095 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6096 STMT_VINFO_DR_OFFSET (res) = NULL;
6097 STMT_VINFO_DR_INIT (res) = NULL;
6098 STMT_VINFO_DR_STEP (res) = NULL;
6099 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6100
6101 if (gimple_code (stmt) == GIMPLE_PHI
6102 && is_loop_header_bb_p (gimple_bb (stmt)))
6103 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6104 else
8644a673
IR
6105 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6106
9771b263 6107 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6108 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6109 GROUP_FIRST_ELEMENT (res) = NULL;
6110 GROUP_NEXT_ELEMENT (res) = NULL;
6111 GROUP_SIZE (res) = 0;
6112 GROUP_STORE_COUNT (res) = 0;
6113 GROUP_GAP (res) = 0;
6114 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6115
6116 return res;
6117}
6118
6119
6120/* Create a hash table for stmt_vec_info. */
6121
6122void
6123init_stmt_vec_info_vec (void)
6124{
9771b263
DN
6125 gcc_assert (!stmt_vec_info_vec.exists ());
6126 stmt_vec_info_vec.create (50);
ebfd146a
IR
6127}
6128
6129
6130/* Free hash table for stmt_vec_info. */
6131
6132void
6133free_stmt_vec_info_vec (void)
6134{
93675444
JJ
6135 unsigned int i;
6136 vec_void_p info;
6137 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6138 if (info != NULL)
6139 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6140 gcc_assert (stmt_vec_info_vec.exists ());
6141 stmt_vec_info_vec.release ();
ebfd146a
IR
6142}
6143
6144
6145/* Free stmt vectorization related info. */
6146
6147void
6148free_stmt_vec_info (gimple stmt)
6149{
6150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6151
6152 if (!stmt_info)
6153 return;
6154
78048b1c
JJ
6155 /* Check if this statement has a related "pattern stmt"
6156 (introduced by the vectorizer during the pattern recognition
6157 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6158 too. */
6159 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6160 {
6161 stmt_vec_info patt_info
6162 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6163 if (patt_info)
6164 {
363477c0
JJ
6165 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6166 if (seq)
6167 {
6168 gimple_stmt_iterator si;
6169 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6170 free_stmt_vec_info (gsi_stmt (si));
6171 }
78048b1c
JJ
6172 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6173 }
6174 }
6175
9771b263 6176 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6177 set_vinfo_for_stmt (stmt, NULL);
6178 free (stmt_info);
6179}
6180
6181
bb67d9c7 6182/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6183
bb67d9c7 6184 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6185 by the target. */
6186
bb67d9c7
RG
6187static tree
6188get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6189{
6190 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6191 enum machine_mode simd_mode;
2f816591 6192 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6193 int nunits;
6194 tree vectype;
6195
cc4b5170 6196 if (nbytes == 0)
ebfd146a
IR
6197 return NULL_TREE;
6198
48f2e373
RB
6199 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6200 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6201 return NULL_TREE;
6202
7b7b1813
RG
6203 /* For vector types of elements whose mode precision doesn't
6204 match their types precision we use a element type of mode
6205 precision. The vectorization routines will have to make sure
48f2e373
RB
6206 they support the proper result truncation/extension.
6207 We also make sure to build vector types with INTEGER_TYPE
6208 component type only. */
6d7971b8 6209 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6210 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6211 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6212 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6213 TYPE_UNSIGNED (scalar_type));
6d7971b8 6214
ccbf5bb4
RG
6215 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6216 When the component mode passes the above test simply use a type
6217 corresponding to that mode. The theory is that any use that
6218 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 6219 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 6220 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
6221 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6222
6223 /* We can't build a vector type of elements with alignment bigger than
6224 their size. */
dfc2e2ac 6225 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6226 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6227 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6228
dfc2e2ac
RB
6229 /* If we felt back to using the mode fail if there was
6230 no scalar type for it. */
6231 if (scalar_type == NULL_TREE)
6232 return NULL_TREE;
6233
bb67d9c7
RG
6234 /* If no size was supplied use the mode the target prefers. Otherwise
6235 lookup a vector mode of the specified size. */
6236 if (size == 0)
6237 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6238 else
6239 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6240 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6241 if (nunits <= 1)
6242 return NULL_TREE;
ebfd146a
IR
6243
6244 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6245
6246 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6247 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6248 return NULL_TREE;
ebfd146a
IR
6249
6250 return vectype;
6251}
6252
bb67d9c7
RG
6253unsigned int current_vector_size;
6254
6255/* Function get_vectype_for_scalar_type.
6256
6257 Returns the vector type corresponding to SCALAR_TYPE as supported
6258 by the target. */
6259
6260tree
6261get_vectype_for_scalar_type (tree scalar_type)
6262{
6263 tree vectype;
6264 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6265 current_vector_size);
6266 if (vectype
6267 && current_vector_size == 0)
6268 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6269 return vectype;
6270}
6271
b690cc0f
RG
6272/* Function get_same_sized_vectype
6273
6274 Returns a vector type corresponding to SCALAR_TYPE of size
6275 VECTOR_TYPE if supported by the target. */
6276
6277tree
bb67d9c7 6278get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6279{
bb67d9c7
RG
6280 return get_vectype_for_scalar_type_and_size
6281 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6282}
6283
ebfd146a
IR
6284/* Function vect_is_simple_use.
6285
6286 Input:
a70d6342
IR
6287 LOOP_VINFO - the vect info of the loop that is being vectorized.
6288 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6289 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6290 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6291
6292 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6293 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6294 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6295 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6296 is the case in reduction/induction computations).
6297 For basic blocks, supportable operands are constants and bb invariants.
6298 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6299
6300bool
24ee1384 6301vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6302 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6303 tree *def, enum vect_def_type *dt)
b8698a0f 6304{
ebfd146a
IR
6305 basic_block bb;
6306 stmt_vec_info stmt_vinfo;
a70d6342 6307 struct loop *loop = NULL;
b8698a0f 6308
a70d6342
IR
6309 if (loop_vinfo)
6310 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6311
6312 *def_stmt = NULL;
6313 *def = NULL_TREE;
b8698a0f 6314
73fbfcad 6315 if (dump_enabled_p ())
ebfd146a 6316 {
78c60e3d
SS
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "vect_is_simple_use: operand ");
6319 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 6320 dump_printf (MSG_NOTE, "\n");
ebfd146a 6321 }
b8698a0f 6322
b758f602 6323 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6324 {
6325 *dt = vect_constant_def;
6326 return true;
6327 }
b8698a0f 6328
ebfd146a
IR
6329 if (is_gimple_min_invariant (operand))
6330 {
6331 *def = operand;
8644a673 6332 *dt = vect_external_def;
ebfd146a
IR
6333 return true;
6334 }
6335
6336 if (TREE_CODE (operand) == PAREN_EXPR)
6337 {
73fbfcad 6338 if (dump_enabled_p ())
e645e942 6339 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
6340 operand = TREE_OPERAND (operand, 0);
6341 }
b8698a0f 6342
ebfd146a
IR
6343 if (TREE_CODE (operand) != SSA_NAME)
6344 {
73fbfcad 6345 if (dump_enabled_p ())
78c60e3d 6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6347 "not ssa-name.\n");
ebfd146a
IR
6348 return false;
6349 }
b8698a0f 6350
ebfd146a
IR
6351 *def_stmt = SSA_NAME_DEF_STMT (operand);
6352 if (*def_stmt == NULL)
6353 {
73fbfcad 6354 if (dump_enabled_p ())
78c60e3d 6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6356 "no def_stmt.\n");
ebfd146a
IR
6357 return false;
6358 }
6359
73fbfcad 6360 if (dump_enabled_p ())
ebfd146a 6361 {
78c60e3d
SS
6362 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 6364 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
6365 }
6366
8644a673 6367 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6368 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6369 if (gimple_nop_p (*def_stmt))
6370 {
6371 *def = operand;
8644a673 6372 *dt = vect_external_def;
ebfd146a
IR
6373 return true;
6374 }
6375
6376 bb = gimple_bb (*def_stmt);
a70d6342
IR
6377
6378 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6379 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6380 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6381 *dt = vect_external_def;
ebfd146a
IR
6382 else
6383 {
6384 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6385 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6386 }
6387
24ee1384
IR
6388 if (*dt == vect_unknown_def_type
6389 || (stmt
6390 && *dt == vect_double_reduction_def
6391 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6392 {
73fbfcad 6393 if (dump_enabled_p ())
78c60e3d 6394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6395 "Unsupported pattern.\n");
ebfd146a
IR
6396 return false;
6397 }
6398
73fbfcad 6399 if (dump_enabled_p ())
e645e942 6400 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
6401
6402 switch (gimple_code (*def_stmt))
6403 {
6404 case GIMPLE_PHI:
6405 *def = gimple_phi_result (*def_stmt);
6406 break;
6407
6408 case GIMPLE_ASSIGN:
6409 *def = gimple_assign_lhs (*def_stmt);
6410 break;
6411
6412 case GIMPLE_CALL:
6413 *def = gimple_call_lhs (*def_stmt);
6414 if (*def != NULL)
6415 break;
6416 /* FALLTHRU */
6417 default:
73fbfcad 6418 if (dump_enabled_p ())
78c60e3d 6419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6420 "unsupported defining stmt:\n");
ebfd146a
IR
6421 return false;
6422 }
6423
6424 return true;
6425}
6426
b690cc0f
RG
6427/* Function vect_is_simple_use_1.
6428
6429 Same as vect_is_simple_use_1 but also determines the vector operand
6430 type of OPERAND and stores it to *VECTYPE. If the definition of
6431 OPERAND is vect_uninitialized_def, vect_constant_def or
6432 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6433 is responsible to compute the best suited vector type for the
6434 scalar operand. */
6435
6436bool
24ee1384 6437vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6438 bb_vec_info bb_vinfo, gimple *def_stmt,
6439 tree *def, enum vect_def_type *dt, tree *vectype)
6440{
24ee1384
IR
6441 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6442 def, dt))
b690cc0f
RG
6443 return false;
6444
6445 /* Now get a vector type if the def is internal, otherwise supply
6446 NULL_TREE and leave it up to the caller to figure out a proper
6447 type for the use stmt. */
6448 if (*dt == vect_internal_def
6449 || *dt == vect_induction_def
6450 || *dt == vect_reduction_def
6451 || *dt == vect_double_reduction_def
6452 || *dt == vect_nested_cycle)
6453 {
6454 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6455
6456 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6457 && !STMT_VINFO_RELEVANT (stmt_info)
6458 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6459 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6460
b690cc0f
RG
6461 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6462 gcc_assert (*vectype != NULL_TREE);
6463 }
6464 else if (*dt == vect_uninitialized_def
6465 || *dt == vect_constant_def
6466 || *dt == vect_external_def)
6467 *vectype = NULL_TREE;
6468 else
6469 gcc_unreachable ();
6470
6471 return true;
6472}
6473
ebfd146a
IR
6474
6475/* Function supportable_widening_operation
6476
b8698a0f
L
6477 Check whether an operation represented by the code CODE is a
6478 widening operation that is supported by the target platform in
b690cc0f
RG
6479 vector form (i.e., when operating on arguments of type VECTYPE_IN
6480 producing a result of type VECTYPE_OUT).
b8698a0f 6481
ebfd146a
IR
6482 Widening operations we currently support are NOP (CONVERT), FLOAT
6483 and WIDEN_MULT. This function checks if these operations are supported
6484 by the target platform either directly (via vector tree-codes), or via
6485 target builtins.
6486
6487 Output:
b8698a0f
L
6488 - CODE1 and CODE2 are codes of vector operations to be used when
6489 vectorizing the operation, if available.
ebfd146a
IR
6490 - MULTI_STEP_CVT determines the number of required intermediate steps in
6491 case of multi-step conversion (like char->short->int - in that case
6492 MULTI_STEP_CVT will be 1).
b8698a0f
L
6493 - INTERM_TYPES contains the intermediate type required to perform the
6494 widening operation (short in the above example). */
ebfd146a
IR
6495
6496bool
b690cc0f
RG
6497supportable_widening_operation (enum tree_code code, gimple stmt,
6498 tree vectype_out, tree vectype_in,
ebfd146a
IR
6499 enum tree_code *code1, enum tree_code *code2,
6500 int *multi_step_cvt,
9771b263 6501 vec<tree> *interm_types)
ebfd146a
IR
6502{
6503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6504 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6505 struct loop *vect_loop = NULL;
ebfd146a 6506 enum machine_mode vec_mode;
81f40b79 6507 enum insn_code icode1, icode2;
ebfd146a 6508 optab optab1, optab2;
b690cc0f
RG
6509 tree vectype = vectype_in;
6510 tree wide_vectype = vectype_out;
ebfd146a 6511 enum tree_code c1, c2;
4a00c761
JJ
6512 int i;
6513 tree prev_type, intermediate_type;
6514 enum machine_mode intermediate_mode, prev_mode;
6515 optab optab3, optab4;
ebfd146a 6516
4a00c761 6517 *multi_step_cvt = 0;
4ef69dfc
IR
6518 if (loop_info)
6519 vect_loop = LOOP_VINFO_LOOP (loop_info);
6520
ebfd146a
IR
6521 switch (code)
6522 {
6523 case WIDEN_MULT_EXPR:
6ae6116f
RH
6524 /* The result of a vectorized widening operation usually requires
6525 two vectors (because the widened results do not fit into one vector).
6526 The generated vector results would normally be expected to be
6527 generated in the same order as in the original scalar computation,
6528 i.e. if 8 results are generated in each vector iteration, they are
6529 to be organized as follows:
6530 vect1: [res1,res2,res3,res4],
6531 vect2: [res5,res6,res7,res8].
6532
6533 However, in the special case that the result of the widening
6534 operation is used in a reduction computation only, the order doesn't
6535 matter (because when vectorizing a reduction we change the order of
6536 the computation). Some targets can take advantage of this and
6537 generate more efficient code. For example, targets like Altivec,
6538 that support widen_mult using a sequence of {mult_even,mult_odd}
6539 generate the following vectors:
6540 vect1: [res1,res3,res5,res7],
6541 vect2: [res2,res4,res6,res8].
6542
6543 When vectorizing outer-loops, we execute the inner-loop sequentially
6544 (each vectorized inner-loop iteration contributes to VF outer-loop
6545 iterations in parallel). We therefore don't allow to change the
6546 order of the computation in the inner-loop during outer-loop
6547 vectorization. */
6548 /* TODO: Another case in which order doesn't *really* matter is when we
6549 widen and then contract again, e.g. (short)((int)x * y >> 8).
6550 Normally, pack_trunc performs an even/odd permute, whereas the
6551 repack from an even/odd expansion would be an interleave, which
6552 would be significantly simpler for e.g. AVX2. */
6553 /* In any case, in order to avoid duplicating the code below, recurse
6554 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6555 are properly set up for the caller. If we fail, we'll continue with
6556 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6557 if (vect_loop
6558 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6559 && !nested_in_vect_loop_p (vect_loop, stmt)
6560 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6561 stmt, vectype_out, vectype_in,
a86ec597
RH
6562 code1, code2, multi_step_cvt,
6563 interm_types))
6ae6116f 6564 return true;
4a00c761
JJ
6565 c1 = VEC_WIDEN_MULT_LO_EXPR;
6566 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6567 break;
6568
6ae6116f
RH
6569 case VEC_WIDEN_MULT_EVEN_EXPR:
6570 /* Support the recursion induced just above. */
6571 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6572 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6573 break;
6574
36ba4aae 6575 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6576 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6577 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6578 break;
6579
ebfd146a 6580 CASE_CONVERT:
4a00c761
JJ
6581 c1 = VEC_UNPACK_LO_EXPR;
6582 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6583 break;
6584
6585 case FLOAT_EXPR:
4a00c761
JJ
6586 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6587 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6588 break;
6589
6590 case FIX_TRUNC_EXPR:
6591 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6592 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6593 computing the operation. */
6594 return false;
6595
6596 default:
6597 gcc_unreachable ();
6598 }
6599
6ae6116f 6600 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6601 {
6602 enum tree_code ctmp = c1;
6603 c1 = c2;
6604 c2 = ctmp;
6605 }
6606
ebfd146a
IR
6607 if (code == FIX_TRUNC_EXPR)
6608 {
6609 /* The signedness is determined from output operand. */
b690cc0f
RG
6610 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6611 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6612 }
6613 else
6614 {
6615 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6616 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6617 }
6618
6619 if (!optab1 || !optab2)
6620 return false;
6621
6622 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6623 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6624 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6625 return false;
6626
4a00c761
JJ
6627 *code1 = c1;
6628 *code2 = c2;
6629
6630 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6631 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6632 return true;
6633
b8698a0f 6634 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6635 types. */
ebfd146a 6636
4a00c761
JJ
6637 prev_type = vectype;
6638 prev_mode = vec_mode;
b8698a0f 6639
4a00c761
JJ
6640 if (!CONVERT_EXPR_CODE_P (code))
6641 return false;
b8698a0f 6642
4a00c761
JJ
6643 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6644 intermediate steps in promotion sequence. We try
6645 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6646 not. */
9771b263 6647 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6648 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6649 {
6650 intermediate_mode = insn_data[icode1].operand[0].mode;
6651 intermediate_type
6652 = lang_hooks.types.type_for_mode (intermediate_mode,
6653 TYPE_UNSIGNED (prev_type));
6654 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6655 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6656
6657 if (!optab3 || !optab4
6658 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6659 || insn_data[icode1].operand[0].mode != intermediate_mode
6660 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6661 || insn_data[icode2].operand[0].mode != intermediate_mode
6662 || ((icode1 = optab_handler (optab3, intermediate_mode))
6663 == CODE_FOR_nothing)
6664 || ((icode2 = optab_handler (optab4, intermediate_mode))
6665 == CODE_FOR_nothing))
6666 break;
ebfd146a 6667
9771b263 6668 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6669 (*multi_step_cvt)++;
6670
6671 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6672 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6673 return true;
6674
6675 prev_type = intermediate_type;
6676 prev_mode = intermediate_mode;
ebfd146a
IR
6677 }
6678
9771b263 6679 interm_types->release ();
4a00c761 6680 return false;
ebfd146a
IR
6681}
6682
6683
6684/* Function supportable_narrowing_operation
6685
b8698a0f
L
6686 Check whether an operation represented by the code CODE is a
6687 narrowing operation that is supported by the target platform in
b690cc0f
RG
6688 vector form (i.e., when operating on arguments of type VECTYPE_IN
6689 and producing a result of type VECTYPE_OUT).
b8698a0f 6690
ebfd146a 6691 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6692 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6693 the target platform directly via vector tree-codes.
6694
6695 Output:
b8698a0f
L
6696 - CODE1 is the code of a vector operation to be used when
6697 vectorizing the operation, if available.
ebfd146a
IR
6698 - MULTI_STEP_CVT determines the number of required intermediate steps in
6699 case of multi-step conversion (like int->short->char - in that case
6700 MULTI_STEP_CVT will be 1).
6701 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6702 narrowing operation (short in the above example). */
ebfd146a
IR
6703
6704bool
6705supportable_narrowing_operation (enum tree_code code,
b690cc0f 6706 tree vectype_out, tree vectype_in,
ebfd146a 6707 enum tree_code *code1, int *multi_step_cvt,
9771b263 6708 vec<tree> *interm_types)
ebfd146a
IR
6709{
6710 enum machine_mode vec_mode;
6711 enum insn_code icode1;
6712 optab optab1, interm_optab;
b690cc0f
RG
6713 tree vectype = vectype_in;
6714 tree narrow_vectype = vectype_out;
ebfd146a 6715 enum tree_code c1;
4a00c761
JJ
6716 tree intermediate_type;
6717 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6718 int i;
4a00c761 6719 bool uns;
ebfd146a 6720
4a00c761 6721 *multi_step_cvt = 0;
ebfd146a
IR
6722 switch (code)
6723 {
6724 CASE_CONVERT:
6725 c1 = VEC_PACK_TRUNC_EXPR;
6726 break;
6727
6728 case FIX_TRUNC_EXPR:
6729 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6730 break;
6731
6732 case FLOAT_EXPR:
6733 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6734 tree code and optabs used for computing the operation. */
6735 return false;
6736
6737 default:
6738 gcc_unreachable ();
6739 }
6740
6741 if (code == FIX_TRUNC_EXPR)
6742 /* The signedness is determined from output operand. */
b690cc0f 6743 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6744 else
6745 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6746
6747 if (!optab1)
6748 return false;
6749
6750 vec_mode = TYPE_MODE (vectype);
947131ba 6751 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6752 return false;
6753
4a00c761
JJ
6754 *code1 = c1;
6755
6756 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6757 return true;
6758
ebfd146a
IR
6759 /* Check if it's a multi-step conversion that can be done using intermediate
6760 types. */
4a00c761
JJ
6761 prev_mode = vec_mode;
6762 if (code == FIX_TRUNC_EXPR)
6763 uns = TYPE_UNSIGNED (vectype_out);
6764 else
6765 uns = TYPE_UNSIGNED (vectype);
6766
6767 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6768 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6769 costly than signed. */
6770 if (code == FIX_TRUNC_EXPR && uns)
6771 {
6772 enum insn_code icode2;
6773
6774 intermediate_type
6775 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6776 interm_optab
6777 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6778 if (interm_optab != unknown_optab
4a00c761
JJ
6779 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6780 && insn_data[icode1].operand[0].mode
6781 == insn_data[icode2].operand[0].mode)
6782 {
6783 uns = false;
6784 optab1 = interm_optab;
6785 icode1 = icode2;
6786 }
6787 }
ebfd146a 6788
4a00c761
JJ
6789 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6790 intermediate steps in promotion sequence. We try
6791 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6792 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6793 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6794 {
6795 intermediate_mode = insn_data[icode1].operand[0].mode;
6796 intermediate_type
6797 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6798 interm_optab
6799 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6800 optab_default);
6801 if (!interm_optab
6802 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6803 || insn_data[icode1].operand[0].mode != intermediate_mode
6804 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6805 == CODE_FOR_nothing))
6806 break;
6807
9771b263 6808 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6809 (*multi_step_cvt)++;
6810
6811 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6812 return true;
6813
6814 prev_mode = intermediate_mode;
6815 optab1 = interm_optab;
ebfd146a
IR
6816 }
6817
9771b263 6818 interm_types->release ();
4a00c761 6819 return false;
ebfd146a 6820}