]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
ebfd146a 27#include "tree.h"
d8a2d370 28#include "stor-layout.h"
ebfd146a
IR
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
2fb9a547
AM
32#include "tree-ssa-alias.h"
33#include "internal-fn.h"
34#include "tree-eh.h"
35#include "gimple-expr.h"
36#include "is-a.h"
18f429e2 37#include "gimple.h"
45b0be94 38#include "gimplify.h"
5be5c238 39#include "gimple-iterator.h"
18f429e2 40#include "gimplify-me.h"
442b4905
AM
41#include "gimple-ssa.h"
42#include "tree-cfg.h"
43#include "tree-phinodes.h"
44#include "ssa-iterators.h"
d8a2d370 45#include "stringpool.h"
442b4905 46#include "tree-ssanames.h"
e28030cf 47#include "tree-ssa-loop-manip.h"
ebfd146a 48#include "cfgloop.h"
ebfd146a 49#include "expr.h"
7ee2468b 50#include "recog.h" /* FIXME: for insn_data */
ebfd146a 51#include "optabs.h"
718f9c0f 52#include "diagnostic-core.h"
ebfd146a 53#include "tree-vectorizer.h"
7ee2468b 54#include "dumpfile.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
72 gimple stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
92345349 94 if (body_cost_vec)
c3e7ee41 95 {
92345349
BS
96 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
97 add_stmt_info_to_vec (body_cost_vec, count, kind,
98 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
99 misalign);
c3e7ee41 100 return (unsigned)
92345349 101 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
102
103 }
104 else
105 {
106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
108 void *target_cost_data;
109
110 if (loop_vinfo)
111 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
112 else
113 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
114
92345349
BS
115 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
116 misalign, where);
c3e7ee41
BS
117 }
118}
119
272c6793
RS
120/* Return a variable of type ELEM_TYPE[NELEMS]. */
121
122static tree
123create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
124{
125 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
126 "vect_array");
127}
128
129/* ARRAY is an array of vectors created by create_vector_array.
130 Return an SSA_NAME for the vector in index N. The reference
131 is part of the vectorization of STMT and the vector is associated
132 with scalar destination SCALAR_DEST. */
133
134static tree
135read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
136 tree array, unsigned HOST_WIDE_INT n)
137{
138 tree vect_type, vect, vect_name, array_ref;
139 gimple new_stmt;
140
141 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
142 vect_type = TREE_TYPE (TREE_TYPE (array));
143 vect = vect_create_destination_var (scalar_dest, vect_type);
144 array_ref = build4 (ARRAY_REF, vect_type, array,
145 build_int_cst (size_type_node, n),
146 NULL_TREE, NULL_TREE);
147
148 new_stmt = gimple_build_assign (vect, array_ref);
149 vect_name = make_ssa_name (vect, new_stmt);
150 gimple_assign_set_lhs (new_stmt, vect_name);
151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
152
153 return vect_name;
154}
155
156/* ARRAY is an array of vectors created by create_vector_array.
157 Emit code to store SSA_NAME VECT in index N of the array.
158 The store is part of the vectorization of STMT. */
159
160static void
161write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
162 tree array, unsigned HOST_WIDE_INT n)
163{
164 tree array_ref;
165 gimple new_stmt;
166
167 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
170
171 new_stmt = gimple_build_assign (array_ref, vect);
172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
173}
174
175/* PTR is a pointer to an array of type TYPE. Return a representation
176 of *PTR. The memory reference replaces those in FIRST_DR
177 (and its group). */
178
179static tree
180create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
181{
272c6793
RS
182 tree mem_ref, alias_ptr_type;
183
184 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
644ffefd 187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
188 return mem_ref;
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197static void
9771b263 198vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
199 enum vect_relevant relevant, bool live_p,
200 bool used_in_pattern)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 205 gimple pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
78c60e3d 208 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 209 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 210
83197f37
IR
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
ebfd146a
IR
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
216 {
83197f37
IR
217 bool found = false;
218 if (!used_in_pattern)
219 {
220 imm_use_iterator imm_iter;
221 use_operand_p use_p;
222 gimple use_stmt;
223 tree lhs;
13c931c9
JJ
224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
225 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 226
83197f37
IR
227 if (is_gimple_assign (stmt))
228 lhs = gimple_assign_lhs (stmt);
229 else
230 lhs = gimple_call_lhs (stmt);
ebfd146a 231
83197f37
IR
232 /* This use is out of pattern use, if LHS has other uses that are
233 pattern uses, we should mark the stmt itself, and not the pattern
234 stmt. */
ab0ef706
JJ
235 if (TREE_CODE (lhs) == SSA_NAME)
236 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
237 {
238 if (is_gimple_debug (USE_STMT (use_p)))
239 continue;
240 use_stmt = USE_STMT (use_p);
241
13c931c9
JJ
242 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
243 continue;
244
ab0ef706
JJ
245 if (vinfo_for_stmt (use_stmt)
246 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
247 {
248 found = true;
249 break;
250 }
251 }
83197f37
IR
252 }
253
254 if (!found)
255 {
256 /* This is the last stmt in a sequence that was detected as a
257 pattern that can potentially be vectorized. Don't mark the stmt
258 as relevant/live because it's not going to be vectorized.
259 Instead mark the pattern-stmt that replaces it. */
260
261 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
262
73fbfcad 263 if (dump_enabled_p ())
78c60e3d
SS
264 dump_printf_loc (MSG_NOTE, vect_location,
265 "last stmt in pattern. don't mark"
e645e942 266 " relevant/live.\n");
83197f37
IR
267 stmt_info = vinfo_for_stmt (pattern_stmt);
268 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
269 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
270 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 stmt = pattern_stmt;
272 }
ebfd146a
IR
273 }
274
275 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
276 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
277 STMT_VINFO_RELEVANT (stmt_info) = relevant;
278
279 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
280 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
281 {
73fbfcad 282 if (dump_enabled_p ())
78c60e3d 283 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 284 "already marked relevant/live.\n");
ebfd146a
IR
285 return;
286 }
287
9771b263 288 worklist->safe_push (stmt);
ebfd146a
IR
289}
290
291
292/* Function vect_stmt_relevant_p.
293
294 Return true if STMT in loop that is represented by LOOP_VINFO is
295 "relevant for vectorization".
296
297 A stmt is considered "relevant for vectorization" if:
298 - it has uses outside the loop.
299 - it has vdefs (it alters memory).
300 - control stmts in the loop (except for the exit condition).
301
302 CHECKME: what other side effects would the vectorizer allow? */
303
304static bool
305vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
306 enum vect_relevant *relevant, bool *live_p)
307{
308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
309 ssa_op_iter op_iter;
310 imm_use_iterator imm_iter;
311 use_operand_p use_p;
312 def_operand_p def_p;
313
8644a673 314 *relevant = vect_unused_in_scope;
ebfd146a
IR
315 *live_p = false;
316
317 /* cond stmt other than loop exit cond. */
b8698a0f
L
318 if (is_ctrl_stmt (stmt)
319 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
320 != loop_exit_ctrl_vec_info_type)
8644a673 321 *relevant = vect_used_in_scope;
ebfd146a
IR
322
323 /* changing memory. */
324 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 325 if (gimple_vdef (stmt))
ebfd146a 326 {
73fbfcad 327 if (dump_enabled_p ())
78c60e3d 328 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 329 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 330 *relevant = vect_used_in_scope;
ebfd146a
IR
331 }
332
333 /* uses outside the loop. */
334 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
335 {
336 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
337 {
338 basic_block bb = gimple_bb (USE_STMT (use_p));
339 if (!flow_bb_inside_loop_p (loop, bb))
340 {
73fbfcad 341 if (dump_enabled_p ())
78c60e3d 342 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 343 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 344
3157b0c2
AO
345 if (is_gimple_debug (USE_STMT (use_p)))
346 continue;
347
ebfd146a
IR
348 /* We expect all such uses to be in the loop exit phis
349 (because of loop closed form) */
350 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
351 gcc_assert (bb == single_exit (loop)->dest);
352
353 *live_p = true;
354 }
355 }
356 }
357
358 return (*live_p || *relevant);
359}
360
361
b8698a0f 362/* Function exist_non_indexing_operands_for_use_p
ebfd146a 363
ff802fa1 364 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
365 used in STMT for anything other than indexing an array. */
366
367static bool
368exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
369{
370 tree operand;
371 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 372
ff802fa1 373 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
59a05b0c 378
ebfd146a
IR
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
b8698a0f 386 so USE cannot correspond to any operands that are not used
ebfd146a
IR
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
ebfd146a
IR
391
392 if (!gimple_assign_copy_p (stmt))
393 return false;
59a05b0c
EB
394 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
395 return false;
ebfd146a 396 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
397 if (TREE_CODE (operand) != SSA_NAME)
398 return false;
399
400 if (operand == use)
401 return true;
402
403 return false;
404}
405
406
b8698a0f 407/*
ebfd146a
IR
408 Function process_use.
409
410 Inputs:
411 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 412 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 413 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 414 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
415 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
416 be performed.
ebfd146a
IR
417
418 Outputs:
419 Generally, LIVE_P and RELEVANT are used to define the liveness and
420 relevance info of the DEF_STMT of this USE:
421 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
422 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
423 Exceptions:
424 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 425 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 426 of the respective DEF_STMT is left unchanged.
b8698a0f
L
427 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
428 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
429 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
430 be modified accordingly.
431
432 Return true if everything is as expected. Return false otherwise. */
433
434static bool
b8698a0f 435process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 436 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 437 bool force)
ebfd146a
IR
438{
439 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
440 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
441 stmt_vec_info dstmt_vinfo;
442 basic_block bb, def_bb;
443 tree def;
444 gimple def_stmt;
445 enum vect_def_type dt;
446
b8698a0f 447 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 448 that are used for address computation are not considered relevant. */
aec7ae7d 449 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
450 return true;
451
24ee1384 452 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 453 {
73fbfcad 454 if (dump_enabled_p ())
78c60e3d 455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 456 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
457 return false;
458 }
459
460 if (!def_stmt || gimple_nop_p (def_stmt))
461 return true;
462
463 def_bb = gimple_bb (def_stmt);
464 if (!flow_bb_inside_loop_p (loop, def_bb))
465 {
73fbfcad 466 if (dump_enabled_p ())
e645e942 467 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
468 return true;
469 }
470
b8698a0f
L
471 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
472 DEF_STMT must have already been processed, because this should be the
473 only way that STMT, which is a reduction-phi, was put in the worklist,
474 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
475 check that everything is as expected, and we are done. */
476 dstmt_vinfo = vinfo_for_stmt (def_stmt);
477 bb = gimple_bb (stmt);
478 if (gimple_code (stmt) == GIMPLE_PHI
479 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
480 && gimple_code (def_stmt) != GIMPLE_PHI
481 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
482 && bb->loop_father == def_bb->loop_father)
483 {
73fbfcad 484 if (dump_enabled_p ())
78c60e3d 485 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 486 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
487 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
488 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
489 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 490 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 491 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
492 return true;
493 }
494
495 /* case 3a: outer-loop stmt defining an inner-loop stmt:
496 outer-loop-header-bb:
497 d = def_stmt
498 inner-loop:
499 stmt # use (d)
500 outer-loop-tail-bb:
501 ... */
502 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
503 {
73fbfcad 504 if (dump_enabled_p ())
78c60e3d 505 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 506 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 507
ebfd146a
IR
508 switch (relevant)
509 {
8644a673 510 case vect_unused_in_scope:
7c5222ff
IR
511 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
512 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 513 break;
7c5222ff 514
ebfd146a 515 case vect_used_in_outer_by_reduction:
7c5222ff 516 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
517 relevant = vect_used_by_reduction;
518 break;
7c5222ff 519
ebfd146a 520 case vect_used_in_outer:
7c5222ff 521 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 522 relevant = vect_used_in_scope;
ebfd146a 523 break;
7c5222ff 524
8644a673 525 case vect_used_in_scope:
ebfd146a
IR
526 break;
527
528 default:
529 gcc_unreachable ();
b8698a0f 530 }
ebfd146a
IR
531 }
532
533 /* case 3b: inner-loop stmt defining an outer-loop stmt:
534 outer-loop-header-bb:
535 ...
536 inner-loop:
537 d = def_stmt
06066f92 538 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
539 stmt # use (d) */
540 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
541 {
73fbfcad 542 if (dump_enabled_p ())
78c60e3d 543 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 544 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 545
ebfd146a
IR
546 switch (relevant)
547 {
8644a673 548 case vect_unused_in_scope:
b8698a0f 549 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 550 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 551 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
552 break;
553
ebfd146a
IR
554 case vect_used_by_reduction:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
557
8644a673 558 case vect_used_in_scope:
ebfd146a
IR
559 relevant = vect_used_in_outer;
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566
83197f37
IR
567 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
568 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
569 return true;
570}
571
572
573/* Function vect_mark_stmts_to_be_vectorized.
574
575 Not all stmts in the loop need to be vectorized. For example:
576
577 for i...
578 for j...
579 1. T0 = i + j
580 2. T1 = a[T0]
581
582 3. j = j + 1
583
584 Stmt 1 and 3 do not need to be vectorized, because loop control and
585 addressing of vectorized data-refs are handled differently.
586
587 This pass detects such stmts. */
588
589bool
590vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
591{
ebfd146a
IR
592 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
593 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
594 unsigned int nbbs = loop->num_nodes;
595 gimple_stmt_iterator si;
596 gimple stmt;
597 unsigned int i;
598 stmt_vec_info stmt_vinfo;
599 basic_block bb;
600 gimple phi;
601 bool live_p;
06066f92
IR
602 enum vect_relevant relevant, tmp_relevant;
603 enum vect_def_type def_type;
ebfd146a 604
73fbfcad 605 if (dump_enabled_p ())
78c60e3d 606 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 607 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 608
07687835 609 stack_vec<gimple, 64> worklist;
ebfd146a
IR
610
611 /* 1. Init worklist. */
612 for (i = 0; i < nbbs; i++)
613 {
614 bb = bbs[i];
615 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 616 {
ebfd146a 617 phi = gsi_stmt (si);
73fbfcad 618 if (dump_enabled_p ())
ebfd146a 619 {
78c60e3d
SS
620 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 622 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
623 }
624
625 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 626 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
627 }
628 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
629 {
630 stmt = gsi_stmt (si);
73fbfcad 631 if (dump_enabled_p ())
ebfd146a 632 {
78c60e3d
SS
633 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
634 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 635 dump_printf (MSG_NOTE, "\n");
b8698a0f 636 }
ebfd146a
IR
637
638 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 639 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
640 }
641 }
642
643 /* 2. Process_worklist */
9771b263 644 while (worklist.length () > 0)
ebfd146a
IR
645 {
646 use_operand_p use_p;
647 ssa_op_iter iter;
648
9771b263 649 stmt = worklist.pop ();
73fbfcad 650 if (dump_enabled_p ())
ebfd146a 651 {
78c60e3d
SS
652 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 654 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
655 }
656
b8698a0f
L
657 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
658 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
659 liveness and relevance properties of STMT. */
660 stmt_vinfo = vinfo_for_stmt (stmt);
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
662 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
663
664 /* Generally, the liveness and relevance properties of STMT are
665 propagated as is to the DEF_STMTs of its USEs:
666 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
667 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
668
669 One exception is when STMT has been identified as defining a reduction
670 variable; in this case we set the liveness/relevance as follows:
671 live_p = false
672 relevant = vect_used_by_reduction
673 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 674 those that are used by a reduction computation, and those that are
ff802fa1 675 (also) used by a regular computation. This allows us later on to
b8698a0f 676 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 677 order of the results that they produce does not have to be kept. */
ebfd146a 678
06066f92
IR
679 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
680 tmp_relevant = relevant;
681 switch (def_type)
ebfd146a 682 {
06066f92
IR
683 case vect_reduction_def:
684 switch (tmp_relevant)
685 {
686 case vect_unused_in_scope:
687 relevant = vect_used_by_reduction;
688 break;
689
690 case vect_used_by_reduction:
691 if (gimple_code (stmt) == GIMPLE_PHI)
692 break;
693 /* fall through */
694
695 default:
73fbfcad 696 if (dump_enabled_p ())
78c60e3d 697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 698 "unsupported use of reduction.\n");
06066f92
IR
699 return false;
700 }
701
b8698a0f 702 live_p = false;
06066f92 703 break;
b8698a0f 704
06066f92
IR
705 case vect_nested_cycle:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_in_outer_by_reduction
708 && tmp_relevant != vect_used_in_outer)
709 {
73fbfcad 710 if (dump_enabled_p ())
78c60e3d 711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 712 "unsupported use of nested cycle.\n");
7c5222ff 713
06066f92
IR
714 return false;
715 }
7c5222ff 716
b8698a0f
L
717 live_p = false;
718 break;
719
06066f92
IR
720 case vect_double_reduction_def:
721 if (tmp_relevant != vect_unused_in_scope
722 && tmp_relevant != vect_used_by_reduction)
723 {
73fbfcad 724 if (dump_enabled_p ())
78c60e3d 725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 726 "unsupported use of double reduction.\n");
7c5222ff 727
7c5222ff 728 return false;
06066f92
IR
729 }
730
731 live_p = false;
b8698a0f 732 break;
7c5222ff 733
06066f92
IR
734 default:
735 break;
7c5222ff 736 }
b8698a0f 737
aec7ae7d 738 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
739 {
740 /* Pattern statements are not inserted into the code, so
741 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
742 have to scan the RHS or function arguments instead. */
743 if (is_gimple_assign (stmt))
744 {
69d2aade
JJ
745 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
746 tree op = gimple_assign_rhs1 (stmt);
747
748 i = 1;
749 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
750 {
751 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 752 live_p, relevant, &worklist, false)
69d2aade 753 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 754 live_p, relevant, &worklist, false))
07687835 755 return false;
69d2aade
JJ
756 i = 2;
757 }
758 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 759 {
69d2aade 760 op = gimple_op (stmt, i);
9d5e7640 761 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 762 &worklist, false))
07687835 763 return false;
9d5e7640
IR
764 }
765 }
766 else if (is_gimple_call (stmt))
767 {
768 for (i = 0; i < gimple_call_num_args (stmt); i++)
769 {
770 tree arg = gimple_call_arg (stmt, i);
771 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 772 &worklist, false))
07687835 773 return false;
9d5e7640
IR
774 }
775 }
776 }
777 else
778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
779 {
780 tree op = USE_FROM_PTR (use_p);
781 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 782 &worklist, false))
07687835 783 return false;
9d5e7640 784 }
aec7ae7d
JJ
785
786 if (STMT_VINFO_GATHER_P (stmt_vinfo))
787 {
788 tree off;
789 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
790 gcc_assert (decl);
791 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
792 &worklist, true))
07687835 793 return false;
aec7ae7d 794 }
ebfd146a
IR
795 } /* while worklist */
796
ebfd146a
IR
797 return true;
798}
799
800
b8698a0f 801/* Function vect_model_simple_cost.
ebfd146a 802
b8698a0f 803 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
804 single op. Right now, this does not account for multiple insns that could
805 be generated for the single vector op. We will handle that shortly. */
806
807void
b8698a0f 808vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
809 enum vect_def_type *dt,
810 stmt_vector_for_cost *prologue_cost_vec,
811 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
812{
813 int i;
92345349 814 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
815
816 /* The SLP costs were already calculated during SLP tree build. */
817 if (PURE_SLP_STMT (stmt_info))
818 return;
819
ebfd146a
IR
820 /* FORNOW: Assuming maximum 2 args per stmts. */
821 for (i = 0; i < 2; i++)
92345349
BS
822 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
823 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
824 stmt_info, 0, vect_prologue);
c3e7ee41
BS
825
826 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
827 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
828 stmt_info, 0, vect_body);
c3e7ee41 829
73fbfcad 830 if (dump_enabled_p ())
78c60e3d
SS
831 dump_printf_loc (MSG_NOTE, vect_location,
832 "vect_model_simple_cost: inside_cost = %d, "
e645e942 833 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
834}
835
836
8bd37302
BS
837/* Model cost for type demotion and promotion operations. PWR is normally
838 zero for single-step promotions and demotions. It will be one if
839 two-step promotion/demotion is required, and so on. Each additional
840 step doubles the number of instructions required. */
841
842static void
843vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt, int pwr)
845{
846 int i, tmp;
92345349 847 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
848 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
849 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
850 void *target_cost_data;
8bd37302
BS
851
852 /* The SLP costs were already calculated during SLP tree build. */
853 if (PURE_SLP_STMT (stmt_info))
854 return;
855
c3e7ee41
BS
856 if (loop_vinfo)
857 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
858 else
859 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
860
8bd37302
BS
861 for (i = 0; i < pwr + 1; i++)
862 {
863 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
864 (i + 1) : i;
c3e7ee41 865 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
866 vec_promote_demote, stmt_info, 0,
867 vect_body);
8bd37302
BS
868 }
869
870 /* FORNOW: Assuming maximum 2 args per stmts. */
871 for (i = 0; i < 2; i++)
92345349
BS
872 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
873 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
874 stmt_info, 0, vect_prologue);
8bd37302 875
73fbfcad 876 if (dump_enabled_p ())
78c60e3d
SS
877 dump_printf_loc (MSG_NOTE, vect_location,
878 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 879 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
880}
881
0d0293ac 882/* Function vect_cost_group_size
b8698a0f 883
0d0293ac 884 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
885 load or store of a group, else return 1. This ensures that group size is
886 only returned once per group. */
887
888static int
0d0293ac 889vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 890{
e14c1050 891 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
892
893 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 894 return GROUP_SIZE (stmt_info);
ebfd146a
IR
895
896 return 1;
897}
898
899
900/* Function vect_model_store_cost
901
0d0293ac
MM
902 Models cost for stores. In the case of grouped accesses, one access
903 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
904
905void
b8698a0f 906vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 907 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
908 slp_tree slp_node,
909 stmt_vector_for_cost *prologue_cost_vec,
910 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
911{
912 int group_size;
92345349 913 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
914 struct data_reference *first_dr;
915 gimple first_stmt;
ebfd146a
IR
916
917 /* The SLP costs were already calculated during SLP tree build. */
918 if (PURE_SLP_STMT (stmt_info))
919 return;
920
8644a673 921 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
922 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
923 stmt_info, 0, vect_prologue);
ebfd146a 924
0d0293ac
MM
925 /* Grouped access? */
926 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
927 {
928 if (slp_node)
929 {
9771b263 930 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
931 group_size = 1;
932 }
933 else
934 {
e14c1050 935 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 936 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
937 }
938
939 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
940 }
0d0293ac 941 /* Not a grouped access. */
ebfd146a 942 else
720f5239
IR
943 {
944 group_size = 1;
945 first_dr = STMT_VINFO_DATA_REF (stmt_info);
946 }
ebfd146a 947
272c6793 948 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 949 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
950 access is instead being provided by a permute-and-store operation,
951 include the cost of the permutes. */
952 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
953 {
954 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
955
956 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
957 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
958 stmt_info, 0, vect_body);
ebfd146a 959
73fbfcad 960 if (dump_enabled_p ())
78c60e3d 961 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 962 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 963 group_size);
ebfd146a
IR
964 }
965
966 /* Costs of the stores. */
92345349 967 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 968
73fbfcad 969 if (dump_enabled_p ())
78c60e3d
SS
970 dump_printf_loc (MSG_NOTE, vect_location,
971 "vect_model_store_cost: inside_cost = %d, "
e645e942 972 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
973}
974
975
720f5239
IR
976/* Calculate cost of DR's memory access. */
977void
978vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 979 unsigned int *inside_cost,
92345349 980 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
981{
982 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
983 gimple stmt = DR_STMT (dr);
984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
985
986 switch (alignment_support_scheme)
987 {
988 case dr_aligned:
989 {
92345349
BS
990 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
991 vector_store, stmt_info, 0,
992 vect_body);
720f5239 993
73fbfcad 994 if (dump_enabled_p ())
78c60e3d 995 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 996 "vect_model_store_cost: aligned.\n");
720f5239
IR
997 break;
998 }
999
1000 case dr_unaligned_supported:
1001 {
720f5239 1002 /* Here, we assign an additional cost for the unaligned store. */
92345349 1003 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1004 unaligned_store, stmt_info,
92345349 1005 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1006 if (dump_enabled_p ())
78c60e3d
SS
1007 dump_printf_loc (MSG_NOTE, vect_location,
1008 "vect_model_store_cost: unaligned supported by "
e645e942 1009 "hardware.\n");
720f5239
IR
1010 break;
1011 }
1012
38eec4c6
UW
1013 case dr_unaligned_unsupported:
1014 {
1015 *inside_cost = VECT_MAX_COST;
1016
73fbfcad 1017 if (dump_enabled_p ())
78c60e3d 1018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1019 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1020 break;
1021 }
1022
720f5239
IR
1023 default:
1024 gcc_unreachable ();
1025 }
1026}
1027
1028
ebfd146a
IR
1029/* Function vect_model_load_cost
1030
0d0293ac
MM
1031 Models cost for loads. In the case of grouped accesses, the last access
1032 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1033 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1034 access scheme chosen. */
1035
1036void
92345349
BS
1037vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1038 bool load_lanes_p, slp_tree slp_node,
1039 stmt_vector_for_cost *prologue_cost_vec,
1040 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1041{
1042 int group_size;
ebfd146a
IR
1043 gimple first_stmt;
1044 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1045 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1046
1047 /* The SLP costs were already calculated during SLP tree build. */
1048 if (PURE_SLP_STMT (stmt_info))
1049 return;
1050
0d0293ac 1051 /* Grouped accesses? */
e14c1050 1052 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1053 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1054 {
0d0293ac 1055 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1056 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057 }
0d0293ac 1058 /* Not a grouped access. */
ebfd146a
IR
1059 else
1060 {
1061 group_size = 1;
1062 first_dr = dr;
1063 }
1064
272c6793 1065 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1070 {
1071 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1072 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1073 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1074 stmt_info, 0, vect_body);
ebfd146a 1075
73fbfcad 1076 if (dump_enabled_p ())
e645e942
TJ
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1079 group_size);
ebfd146a
IR
1080 }
1081
1082 /* The loads themselves. */
a82960aa
RG
1083 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1084 {
a21892ad
BS
1085 /* N scalar loads plus gathering them into a vector. */
1086 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1087 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1088 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1089 scalar_load, stmt_info, 0, vect_body);
1090 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1091 stmt_info, 0, vect_body);
a82960aa
RG
1092 }
1093 else
1094 vect_get_load_cost (first_dr, ncopies,
1095 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1096 || group_size > 1 || slp_node),
92345349
BS
1097 &inside_cost, &prologue_cost,
1098 prologue_cost_vec, body_cost_vec, true);
720f5239 1099
73fbfcad 1100 if (dump_enabled_p ())
78c60e3d
SS
1101 dump_printf_loc (MSG_NOTE, vect_location,
1102 "vect_model_load_cost: inside_cost = %d, "
e645e942 1103 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1104}
1105
1106
1107/* Calculate cost of DR's memory access. */
1108void
1109vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1110 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1111 unsigned int *prologue_cost,
1112 stmt_vector_for_cost *prologue_cost_vec,
1113 stmt_vector_for_cost *body_cost_vec,
1114 bool record_prologue_costs)
720f5239
IR
1115{
1116 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1117 gimple stmt = DR_STMT (dr);
1118 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1119
1120 switch (alignment_support_scheme)
ebfd146a
IR
1121 {
1122 case dr_aligned:
1123 {
92345349
BS
1124 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1125 stmt_info, 0, vect_body);
ebfd146a 1126
73fbfcad 1127 if (dump_enabled_p ())
78c60e3d 1128 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1129 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1130
1131 break;
1132 }
1133 case dr_unaligned_supported:
1134 {
720f5239 1135 /* Here, we assign an additional cost for the unaligned load. */
92345349 1136 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1137 unaligned_load, stmt_info,
92345349 1138 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1139
73fbfcad 1140 if (dump_enabled_p ())
78c60e3d
SS
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_load_cost: unaligned supported by "
e645e942 1143 "hardware.\n");
ebfd146a
IR
1144
1145 break;
1146 }
1147 case dr_explicit_realign:
1148 {
92345349
BS
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1150 vector_load, stmt_info, 0, vect_body);
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1152 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1153
1154 /* FIXME: If the misalignment remains fixed across the iterations of
1155 the containing loop, the following cost should be added to the
92345349 1156 prologue costs. */
ebfd146a 1157 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1158 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1159 stmt_info, 0, vect_body);
ebfd146a 1160
73fbfcad 1161 if (dump_enabled_p ())
e645e942
TJ
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "vect_model_load_cost: explicit realign\n");
8bd37302 1164
ebfd146a
IR
1165 break;
1166 }
1167 case dr_explicit_realign_optimized:
1168 {
73fbfcad 1169 if (dump_enabled_p ())
e645e942 1170 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1171 "vect_model_load_cost: unaligned software "
e645e942 1172 "pipelined.\n");
ebfd146a
IR
1173
1174 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1175 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1176 if this is an access in a group of loads, which provide grouped
ebfd146a 1177 access, then the above cost should only be considered for one
ff802fa1 1178 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1179 and a realignment op. */
1180
92345349 1181 if (add_realign_cost && record_prologue_costs)
ebfd146a 1182 {
92345349
BS
1183 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1184 vector_stmt, stmt_info,
1185 0, vect_prologue);
ebfd146a 1186 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1188 vector_stmt, stmt_info,
1189 0, vect_prologue);
ebfd146a
IR
1190 }
1191
92345349
BS
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1193 stmt_info, 0, vect_body);
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1195 stmt_info, 0, vect_body);
8bd37302 1196
73fbfcad 1197 if (dump_enabled_p ())
78c60e3d 1198 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1199 "vect_model_load_cost: explicit realign optimized"
1200 "\n");
8bd37302 1201
ebfd146a
IR
1202 break;
1203 }
1204
38eec4c6
UW
1205 case dr_unaligned_unsupported:
1206 {
1207 *inside_cost = VECT_MAX_COST;
1208
73fbfcad 1209 if (dump_enabled_p ())
78c60e3d 1210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1211 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1212 break;
1213 }
1214
ebfd146a
IR
1215 default:
1216 gcc_unreachable ();
1217 }
ebfd146a
IR
1218}
1219
418b7df3
RG
1220/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1221 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1222
418b7df3
RG
1223static void
1224vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1225{
ebfd146a 1226 if (gsi)
418b7df3 1227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1228 else
1229 {
418b7df3 1230 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1231 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1232
a70d6342
IR
1233 if (loop_vinfo)
1234 {
1235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1236 basic_block new_bb;
1237 edge pe;
a70d6342
IR
1238
1239 if (nested_in_vect_loop_p (loop, stmt))
1240 loop = loop->inner;
b8698a0f 1241
a70d6342 1242 pe = loop_preheader_edge (loop);
418b7df3 1243 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1244 gcc_assert (!new_bb);
1245 }
1246 else
1247 {
1248 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1249 basic_block bb;
1250 gimple_stmt_iterator gsi_bb_start;
1251
1252 gcc_assert (bb_vinfo);
1253 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1254 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1255 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1256 }
ebfd146a
IR
1257 }
1258
73fbfcad 1259 if (dump_enabled_p ())
ebfd146a 1260 {
78c60e3d
SS
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "created new init_stmt: ");
1263 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1264 dump_printf (MSG_NOTE, "\n");
ebfd146a 1265 }
418b7df3
RG
1266}
1267
1268/* Function vect_init_vector.
ebfd146a 1269
5467ee52
RG
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
418b7df3
RG
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1277
1278tree
5467ee52 1279vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1280{
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1285
5467ee52
RG
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1288 {
5467ee52 1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1290 {
5467ee52
RG
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1293 else
1294 {
83d5977e 1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1297 new_temp, val,
418b7df3 1298 NULL_TREE);
418b7df3 1299 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1300 val = new_temp;
418b7df3
RG
1301 }
1302 }
5467ee52 1303 val = build_vector_from_val (type, val);
418b7df3
RG
1304 }
1305
5467ee52 1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1307 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1313}
1314
a70d6342 1315
ebfd146a
IR
1316/* Function vect_get_vec_def_for_operand.
1317
ff802fa1 1318 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1319 used in the vectorized stmt for STMT.
1320
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1326
1327tree
1328vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329{
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1335 unsigned int nunits;
ebfd146a 1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1337 tree def;
ebfd146a
IR
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1341
73fbfcad 1342 if (dump_enabled_p ())
ebfd146a 1343 {
78c60e3d
SS
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1347 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1348 }
1349
24ee1384
IR
1350 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1351 &def_stmt, &def, &dt);
ebfd146a 1352 gcc_assert (is_simple_use);
73fbfcad 1353 if (dump_enabled_p ())
ebfd146a 1354 {
78c60e3d 1355 int loc_printed = 0;
ebfd146a
IR
1356 if (def)
1357 {
78c60e3d
SS
1358 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1359 loc_printed = 1;
1360 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1361 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1362 }
1363 if (def_stmt)
1364 {
78c60e3d
SS
1365 if (loc_printed)
1366 dump_printf (MSG_NOTE, " def_stmt = ");
1367 else
1368 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1369 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1370 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1371 }
1372 }
1373
1374 switch (dt)
1375 {
1376 /* Case 1: operand is a constant. */
1377 case vect_constant_def:
1378 {
7569a6cc
RG
1379 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1380 gcc_assert (vector_type);
9dc3f7de 1381 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1382
b8698a0f 1383 if (scalar_def)
ebfd146a
IR
1384 *scalar_def = op;
1385
1386 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1387 if (dump_enabled_p ())
78c60e3d 1388 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1389 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1390
418b7df3 1391 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1392 }
1393
1394 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1395 case vect_external_def:
ebfd146a
IR
1396 {
1397 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1398 gcc_assert (vector_type);
ebfd146a 1399
b8698a0f 1400 if (scalar_def)
ebfd146a
IR
1401 *scalar_def = def;
1402
1403 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1404 if (dump_enabled_p ())
e645e942 1405 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1406
418b7df3 1407 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1408 }
1409
1410 /* Case 3: operand is defined inside the loop. */
8644a673 1411 case vect_internal_def:
ebfd146a 1412 {
b8698a0f 1413 if (scalar_def)
ebfd146a
IR
1414 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1415
1416 /* Get the def from the vectorized stmt. */
1417 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1418
ebfd146a 1419 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1420 /* Get vectorized pattern statement. */
1421 if (!vec_stmt
1422 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1423 && !STMT_VINFO_RELEVANT (def_stmt_info))
1424 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1425 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1426 gcc_assert (vec_stmt);
1427 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1428 vec_oprnd = PHI_RESULT (vec_stmt);
1429 else if (is_gimple_call (vec_stmt))
1430 vec_oprnd = gimple_call_lhs (vec_stmt);
1431 else
1432 vec_oprnd = gimple_assign_lhs (vec_stmt);
1433 return vec_oprnd;
1434 }
1435
1436 /* Case 4: operand is defined by a loop header phi - reduction */
1437 case vect_reduction_def:
06066f92 1438 case vect_double_reduction_def:
7c5222ff 1439 case vect_nested_cycle:
ebfd146a
IR
1440 {
1441 struct loop *loop;
1442
1443 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1444 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1445
1446 /* Get the def before the loop */
1447 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1448 return get_initial_def_for_reduction (stmt, op, scalar_def);
1449 }
1450
1451 /* Case 5: operand is defined by loop-header phi - induction. */
1452 case vect_induction_def:
1453 {
1454 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1455
1456 /* Get the def from the vectorized stmt. */
1457 def_stmt_info = vinfo_for_stmt (def_stmt);
1458 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1459 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1460 vec_oprnd = PHI_RESULT (vec_stmt);
1461 else
1462 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1463 return vec_oprnd;
1464 }
1465
1466 default:
1467 gcc_unreachable ();
1468 }
1469}
1470
1471
1472/* Function vect_get_vec_def_for_stmt_copy
1473
ff802fa1 1474 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1475 vectorized stmt to be created (by the caller to this function) is a "copy"
1476 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1477 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1478 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1479 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1480 DT is the type of the vector def VEC_OPRND.
1481
1482 Context:
1483 In case the vectorization factor (VF) is bigger than the number
1484 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1485 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1486 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1487 smallest data-type determines the VF, and as a result, when vectorizing
1488 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1489 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1490 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1491 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1492 which VF=16 and nunits=4, so the number of copies required is 4):
1493
1494 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1495
ebfd146a
IR
1496 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1497 VS1.1: vx.1 = memref1 VS1.2
1498 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1499 VS1.3: vx.3 = memref3
ebfd146a
IR
1500
1501 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1502 VSnew.1: vz1 = vx.1 + ... VSnew.2
1503 VSnew.2: vz2 = vx.2 + ... VSnew.3
1504 VSnew.3: vz3 = vx.3 + ...
1505
1506 The vectorization of S1 is explained in vectorizable_load.
1507 The vectorization of S2:
b8698a0f
L
1508 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1509 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1510 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1511 returns the vector-def 'vx.0'.
1512
b8698a0f
L
1513 To create the remaining copies of the vector-stmt (VSnew.j), this
1514 function is called to get the relevant vector-def for each operand. It is
1515 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1516 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1517
b8698a0f
L
1518 For example, to obtain the vector-def 'vx.1' in order to create the
1519 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1520 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1521 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1522 and return its def ('vx.1').
1523 Overall, to create the above sequence this function will be called 3 times:
1524 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1525 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1526 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1527
1528tree
1529vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1530{
1531 gimple vec_stmt_for_operand;
1532 stmt_vec_info def_stmt_info;
1533
1534 /* Do nothing; can reuse same def. */
8644a673 1535 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1536 return vec_oprnd;
1537
1538 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1539 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1540 gcc_assert (def_stmt_info);
1541 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1542 gcc_assert (vec_stmt_for_operand);
1543 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1544 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1545 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1546 else
1547 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1548 return vec_oprnd;
1549}
1550
1551
1552/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1553 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1554
1555static void
b8698a0f 1556vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1557 vec<tree> *vec_oprnds0,
1558 vec<tree> *vec_oprnds1)
ebfd146a 1559{
9771b263 1560 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1561
1562 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1563 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1564
9771b263 1565 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1566 {
9771b263 1567 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1568 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1569 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1570 }
1571}
1572
1573
d092494c
IR
1574/* Get vectorized definitions for OP0 and OP1.
1575 REDUC_INDEX is the index of reduction operand in case of reduction,
1576 and -1 otherwise. */
ebfd146a 1577
d092494c 1578void
ebfd146a 1579vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1,
d092494c 1582 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1583{
1584 if (slp_node)
d092494c
IR
1585 {
1586 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1587 auto_vec<tree> ops (nops);
1588 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1589
9771b263 1590 ops.quick_push (op0);
d092494c 1591 if (op1)
9771b263 1592 ops.quick_push (op1);
d092494c
IR
1593
1594 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1595
37b5ec8f 1596 *vec_oprnds0 = vec_defs[0];
d092494c 1597 if (op1)
37b5ec8f 1598 *vec_oprnds1 = vec_defs[1];
d092494c 1599 }
ebfd146a
IR
1600 else
1601 {
1602 tree vec_oprnd;
1603
9771b263 1604 vec_oprnds0->create (1);
b8698a0f 1605 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1606 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1607
1608 if (op1)
1609 {
9771b263 1610 vec_oprnds1->create (1);
b8698a0f 1611 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1612 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1613 }
1614 }
1615}
1616
1617
1618/* Function vect_finish_stmt_generation.
1619
1620 Insert a new stmt. */
1621
1622void
1623vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1624 gimple_stmt_iterator *gsi)
1625{
1626 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1629
1630 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1631
54e8e2c3
RG
1632 if (!gsi_end_p (*gsi)
1633 && gimple_has_mem_ops (vec_stmt))
1634 {
1635 gimple at_stmt = gsi_stmt (*gsi);
1636 tree vuse = gimple_vuse (at_stmt);
1637 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1638 {
1639 tree vdef = gimple_vdef (at_stmt);
1640 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1641 /* If we have an SSA vuse and insert a store, update virtual
1642 SSA form to avoid triggering the renamer. Do so only
1643 if we can easily see all uses - which is what almost always
1644 happens with the way vectorized stmts are inserted. */
1645 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1646 && ((is_gimple_assign (vec_stmt)
1647 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1648 || (is_gimple_call (vec_stmt)
1649 && !(gimple_call_flags (vec_stmt)
1650 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1651 {
1652 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1653 gimple_set_vdef (vec_stmt, new_vdef);
1654 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1655 }
1656 }
1657 }
ebfd146a
IR
1658 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1659
b8698a0f 1660 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1661 bb_vinfo));
ebfd146a 1662
73fbfcad 1663 if (dump_enabled_p ())
ebfd146a 1664 {
78c60e3d
SS
1665 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1667 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1668 }
1669
ad885386 1670 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1671}
1672
1673/* Checks if CALL can be vectorized in type VECTYPE. Returns
1674 a function declaration if the target has a vectorized version
1675 of the function, or NULL_TREE if the function cannot be vectorized. */
1676
1677tree
1678vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1679{
1680 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1681
1682 /* We only handle functions that do not read or clobber memory -- i.e.
1683 const or novops ones. */
1684 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1685 return NULL_TREE;
1686
1687 if (!fndecl
1688 || TREE_CODE (fndecl) != FUNCTION_DECL
1689 || !DECL_BUILT_IN (fndecl))
1690 return NULL_TREE;
1691
62f7fd21 1692 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1693 vectype_in);
1694}
1695
1696/* Function vectorizable_call.
1697
b8698a0f
L
1698 Check if STMT performs a function call that can be vectorized.
1699 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1700 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1701 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1702
1703static bool
190c2236
JJ
1704vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1705 slp_tree slp_node)
ebfd146a
IR
1706{
1707 tree vec_dest;
1708 tree scalar_dest;
1709 tree op, type;
1710 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1712 tree vectype_out, vectype_in;
1713 int nunits_in;
1714 int nunits_out;
1715 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1716 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1717 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1718 gimple def_stmt;
0502fb85
UB
1719 enum vect_def_type dt[3]
1720 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1721 gimple new_stmt = NULL;
ebfd146a 1722 int ncopies, j;
6e1aa848 1723 vec<tree> vargs = vNULL;
ebfd146a
IR
1724 enum { NARROW, NONE, WIDEN } modifier;
1725 size_t i, nargs;
9d5e7640 1726 tree lhs;
ebfd146a 1727
190c2236 1728 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1729 return false;
1730
8644a673 1731 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1732 return false;
1733
ebfd146a
IR
1734 /* Is STMT a vectorizable call? */
1735 if (!is_gimple_call (stmt))
1736 return false;
1737
1738 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1739 return false;
1740
822ba6d7 1741 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1742 return false;
1743
b690cc0f
RG
1744 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1745
ebfd146a
IR
1746 /* Process function arguments. */
1747 rhs_type = NULL_TREE;
b690cc0f 1748 vectype_in = NULL_TREE;
ebfd146a
IR
1749 nargs = gimple_call_num_args (stmt);
1750
1b1562a5
MM
1751 /* Bail out if the function has more than three arguments, we do not have
1752 interesting builtin functions to vectorize with more than two arguments
1753 except for fma. No arguments is also not good. */
1754 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1755 return false;
1756
74bf76ed
JJ
1757 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1758 if (gimple_call_internal_p (stmt)
1759 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1760 {
1761 nargs = 0;
1762 rhs_type = unsigned_type_node;
1763 }
1764
ebfd146a
IR
1765 for (i = 0; i < nargs; i++)
1766 {
b690cc0f
RG
1767 tree opvectype;
1768
ebfd146a
IR
1769 op = gimple_call_arg (stmt, i);
1770
1771 /* We can only handle calls with arguments of the same type. */
1772 if (rhs_type
8533c9d8 1773 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1774 {
73fbfcad 1775 if (dump_enabled_p ())
78c60e3d 1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1777 "argument types differ.\n");
ebfd146a
IR
1778 return false;
1779 }
b690cc0f
RG
1780 if (!rhs_type)
1781 rhs_type = TREE_TYPE (op);
ebfd146a 1782
24ee1384 1783 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1784 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1785 {
73fbfcad 1786 if (dump_enabled_p ())
78c60e3d 1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1788 "use not simple.\n");
ebfd146a
IR
1789 return false;
1790 }
ebfd146a 1791
b690cc0f
RG
1792 if (!vectype_in)
1793 vectype_in = opvectype;
1794 else if (opvectype
1795 && opvectype != vectype_in)
1796 {
73fbfcad 1797 if (dump_enabled_p ())
78c60e3d 1798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1799 "argument vector types differ.\n");
b690cc0f
RG
1800 return false;
1801 }
1802 }
1803 /* If all arguments are external or constant defs use a vector type with
1804 the same size as the output vector type. */
ebfd146a 1805 if (!vectype_in)
b690cc0f 1806 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1807 if (vec_stmt)
1808 gcc_assert (vectype_in);
1809 if (!vectype_in)
1810 {
73fbfcad 1811 if (dump_enabled_p ())
7d8930a0 1812 {
78c60e3d
SS
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "no vectype for scalar type ");
1815 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 1816 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
1817 }
1818
1819 return false;
1820 }
ebfd146a
IR
1821
1822 /* FORNOW */
b690cc0f
RG
1823 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1824 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1825 if (nunits_in == nunits_out / 2)
1826 modifier = NARROW;
1827 else if (nunits_out == nunits_in)
1828 modifier = NONE;
1829 else if (nunits_out == nunits_in / 2)
1830 modifier = WIDEN;
1831 else
1832 return false;
1833
1834 /* For now, we only vectorize functions if a target specific builtin
1835 is available. TODO -- in some cases, it might be profitable to
1836 insert the calls for pieces of the vector, in order to be able
1837 to vectorize other operations in the loop. */
1838 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1839 if (fndecl == NULL_TREE)
1840 {
74bf76ed
JJ
1841 if (gimple_call_internal_p (stmt)
1842 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1843 && !slp_node
1844 && loop_vinfo
1845 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1846 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1847 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1848 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1849 {
1850 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1851 { 0, 1, 2, ... vf - 1 } vector. */
1852 gcc_assert (nargs == 0);
1853 }
1854 else
1855 {
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1858 "function is not vectorizable.\n");
74bf76ed
JJ
1859 return false;
1860 }
ebfd146a
IR
1861 }
1862
5006671f 1863 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1864
190c2236
JJ
1865 if (slp_node || PURE_SLP_STMT (stmt_info))
1866 ncopies = 1;
1867 else if (modifier == NARROW)
ebfd146a
IR
1868 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1869 else
1870 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1871
1872 /* Sanity check: make sure that at least one copy of the vectorized stmt
1873 needs to be generated. */
1874 gcc_assert (ncopies >= 1);
1875
1876 if (!vec_stmt) /* transformation not required. */
1877 {
1878 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1879 if (dump_enabled_p ())
e645e942
TJ
1880 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1881 "\n");
c3e7ee41 1882 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1883 return true;
1884 }
1885
1886 /** Transform. **/
1887
73fbfcad 1888 if (dump_enabled_p ())
e645e942 1889 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
1890
1891 /* Handle def. */
1892 scalar_dest = gimple_call_lhs (stmt);
1893 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1894
1895 prev_stmt_info = NULL;
1896 switch (modifier)
1897 {
1898 case NONE:
1899 for (j = 0; j < ncopies; ++j)
1900 {
1901 /* Build argument list for the vectorized call. */
1902 if (j == 0)
9771b263 1903 vargs.create (nargs);
ebfd146a 1904 else
9771b263 1905 vargs.truncate (0);
ebfd146a 1906
190c2236
JJ
1907 if (slp_node)
1908 {
ef062b13 1909 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 1910 vec<tree> vec_oprnds0;
190c2236
JJ
1911
1912 for (i = 0; i < nargs; i++)
9771b263 1913 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1914 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 1915 vec_oprnds0 = vec_defs[0];
190c2236
JJ
1916
1917 /* Arguments are ready. Create the new vector stmt. */
9771b263 1918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1919 {
1920 size_t k;
1921 for (k = 0; k < nargs; k++)
1922 {
37b5ec8f 1923 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 1924 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1925 }
1926 new_stmt = gimple_build_call_vec (fndecl, vargs);
1927 new_temp = make_ssa_name (vec_dest, new_stmt);
1928 gimple_call_set_lhs (new_stmt, new_temp);
1929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1931 }
1932
1933 for (i = 0; i < nargs; i++)
1934 {
37b5ec8f 1935 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 1936 vec_oprndsi.release ();
190c2236 1937 }
190c2236
JJ
1938 continue;
1939 }
1940
ebfd146a
IR
1941 for (i = 0; i < nargs; i++)
1942 {
1943 op = gimple_call_arg (stmt, i);
1944 if (j == 0)
1945 vec_oprnd0
1946 = vect_get_vec_def_for_operand (op, stmt, NULL);
1947 else
63827fb8
IR
1948 {
1949 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1950 vec_oprnd0
1951 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1952 }
ebfd146a 1953
9771b263 1954 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1955 }
1956
74bf76ed
JJ
1957 if (gimple_call_internal_p (stmt)
1958 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1959 {
1960 tree *v = XALLOCAVEC (tree, nunits_out);
1961 int k;
1962 for (k = 0; k < nunits_out; ++k)
1963 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1964 tree cst = build_vector (vectype_out, v);
1965 tree new_var
1966 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1967 gimple init_stmt = gimple_build_assign (new_var, cst);
1968 new_temp = make_ssa_name (new_var, init_stmt);
1969 gimple_assign_set_lhs (init_stmt, new_temp);
1970 vect_init_vector_1 (stmt, init_stmt, NULL);
1971 new_temp = make_ssa_name (vec_dest, NULL);
1972 new_stmt = gimple_build_assign (new_temp,
1973 gimple_assign_lhs (init_stmt));
1974 }
1975 else
1976 {
1977 new_stmt = gimple_build_call_vec (fndecl, vargs);
1978 new_temp = make_ssa_name (vec_dest, new_stmt);
1979 gimple_call_set_lhs (new_stmt, new_temp);
1980 }
ebfd146a
IR
1981 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1982
1983 if (j == 0)
1984 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1985 else
1986 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1987
1988 prev_stmt_info = vinfo_for_stmt (new_stmt);
1989 }
1990
1991 break;
1992
1993 case NARROW:
1994 for (j = 0; j < ncopies; ++j)
1995 {
1996 /* Build argument list for the vectorized call. */
1997 if (j == 0)
9771b263 1998 vargs.create (nargs * 2);
ebfd146a 1999 else
9771b263 2000 vargs.truncate (0);
ebfd146a 2001
190c2236
JJ
2002 if (slp_node)
2003 {
ef062b13 2004 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2005 vec<tree> vec_oprnds0;
190c2236
JJ
2006
2007 for (i = 0; i < nargs; i++)
9771b263 2008 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2009 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2010 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2011
2012 /* Arguments are ready. Create the new vector stmt. */
9771b263 2013 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2014 {
2015 size_t k;
9771b263 2016 vargs.truncate (0);
190c2236
JJ
2017 for (k = 0; k < nargs; k++)
2018 {
37b5ec8f 2019 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2020 vargs.quick_push (vec_oprndsk[i]);
2021 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2022 }
2023 new_stmt = gimple_build_call_vec (fndecl, vargs);
2024 new_temp = make_ssa_name (vec_dest, new_stmt);
2025 gimple_call_set_lhs (new_stmt, new_temp);
2026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2027 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2028 }
2029
2030 for (i = 0; i < nargs; i++)
2031 {
37b5ec8f 2032 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2033 vec_oprndsi.release ();
190c2236 2034 }
190c2236
JJ
2035 continue;
2036 }
2037
ebfd146a
IR
2038 for (i = 0; i < nargs; i++)
2039 {
2040 op = gimple_call_arg (stmt, i);
2041 if (j == 0)
2042 {
2043 vec_oprnd0
2044 = vect_get_vec_def_for_operand (op, stmt, NULL);
2045 vec_oprnd1
63827fb8 2046 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2047 }
2048 else
2049 {
336ecb65 2050 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2051 vec_oprnd0
63827fb8 2052 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2053 vec_oprnd1
63827fb8 2054 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2055 }
2056
9771b263
DN
2057 vargs.quick_push (vec_oprnd0);
2058 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2059 }
2060
2061 new_stmt = gimple_build_call_vec (fndecl, vargs);
2062 new_temp = make_ssa_name (vec_dest, new_stmt);
2063 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2065
2066 if (j == 0)
2067 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2068 else
2069 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2070
2071 prev_stmt_info = vinfo_for_stmt (new_stmt);
2072 }
2073
2074 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2075
2076 break;
2077
2078 case WIDEN:
2079 /* No current target implements this case. */
2080 return false;
2081 }
2082
9771b263 2083 vargs.release ();
ebfd146a
IR
2084
2085 /* Update the exception handling table with the vector stmt if necessary. */
2086 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2087 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2088
2089 /* The call in STMT might prevent it from being removed in dce.
2090 We however cannot remove it here, due to the way the ssa name
2091 it defines is mapped to the new definition. So just replace
2092 rhs of the statement with something harmless. */
2093
dd34c087
JJ
2094 if (slp_node)
2095 return true;
2096
ebfd146a 2097 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2098 if (is_pattern_stmt_p (stmt_info))
2099 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2100 else
2101 lhs = gimple_call_lhs (stmt);
2102 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2103 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2104 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2105 STMT_VINFO_STMT (stmt_info) = new_stmt;
2106 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2107
2108 return true;
2109}
2110
2111
2112/* Function vect_gen_widened_results_half
2113
2114 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2115 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2116 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2117 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2118 needs to be created (DECL is a function-decl of a target-builtin).
2119 STMT is the original scalar stmt that we are vectorizing. */
2120
2121static gimple
2122vect_gen_widened_results_half (enum tree_code code,
2123 tree decl,
2124 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2125 tree vec_dest, gimple_stmt_iterator *gsi,
2126 gimple stmt)
b8698a0f 2127{
ebfd146a 2128 gimple new_stmt;
b8698a0f
L
2129 tree new_temp;
2130
2131 /* Generate half of the widened result: */
2132 if (code == CALL_EXPR)
2133 {
2134 /* Target specific support */
ebfd146a
IR
2135 if (op_type == binary_op)
2136 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2137 else
2138 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2139 new_temp = make_ssa_name (vec_dest, new_stmt);
2140 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2141 }
2142 else
ebfd146a 2143 {
b8698a0f
L
2144 /* Generic support */
2145 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2146 if (op_type != binary_op)
2147 vec_oprnd1 = NULL;
2148 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2149 vec_oprnd1);
2150 new_temp = make_ssa_name (vec_dest, new_stmt);
2151 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2152 }
ebfd146a
IR
2153 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2154
ebfd146a
IR
2155 return new_stmt;
2156}
2157
4a00c761
JJ
2158
2159/* Get vectorized definitions for loop-based vectorization. For the first
2160 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2161 scalar operand), and for the rest we get a copy with
2162 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2163 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2164 The vectors are collected into VEC_OPRNDS. */
2165
2166static void
2167vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2168 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2169{
2170 tree vec_oprnd;
2171
2172 /* Get first vector operand. */
2173 /* All the vector operands except the very first one (that is scalar oprnd)
2174 are stmt copies. */
2175 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2176 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2177 else
2178 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2179
9771b263 2180 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2181
2182 /* Get second vector operand. */
2183 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2184 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2185
2186 *oprnd = vec_oprnd;
2187
2188 /* For conversion in multiple steps, continue to get operands
2189 recursively. */
2190 if (multi_step_cvt)
2191 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2192}
2193
2194
2195/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2196 For multi-step conversions store the resulting vectors and call the function
2197 recursively. */
2198
2199static void
9771b263 2200vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2201 int multi_step_cvt, gimple stmt,
9771b263 2202 vec<tree> vec_dsts,
4a00c761
JJ
2203 gimple_stmt_iterator *gsi,
2204 slp_tree slp_node, enum tree_code code,
2205 stmt_vec_info *prev_stmt_info)
2206{
2207 unsigned int i;
2208 tree vop0, vop1, new_tmp, vec_dest;
2209 gimple new_stmt;
2210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2211
9771b263 2212 vec_dest = vec_dsts.pop ();
4a00c761 2213
9771b263 2214 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2215 {
2216 /* Create demotion operation. */
9771b263
DN
2217 vop0 = (*vec_oprnds)[i];
2218 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2219 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2220 new_tmp = make_ssa_name (vec_dest, new_stmt);
2221 gimple_assign_set_lhs (new_stmt, new_tmp);
2222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2223
2224 if (multi_step_cvt)
2225 /* Store the resulting vector for next recursive call. */
9771b263 2226 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2227 else
2228 {
2229 /* This is the last step of the conversion sequence. Store the
2230 vectors in SLP_NODE or in vector info of the scalar statement
2231 (or in STMT_VINFO_RELATED_STMT chain). */
2232 if (slp_node)
9771b263 2233 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2234 else
2235 {
2236 if (!*prev_stmt_info)
2237 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2238 else
2239 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2240
2241 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2242 }
2243 }
2244 }
2245
2246 /* For multi-step demotion operations we first generate demotion operations
2247 from the source type to the intermediate types, and then combine the
2248 results (stored in VEC_OPRNDS) in demotion operation to the destination
2249 type. */
2250 if (multi_step_cvt)
2251 {
2252 /* At each level of recursion we have half of the operands we had at the
2253 previous level. */
9771b263 2254 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2255 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2256 stmt, vec_dsts, gsi, slp_node,
2257 VEC_PACK_TRUNC_EXPR,
2258 prev_stmt_info);
2259 }
2260
9771b263 2261 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2262}
2263
2264
2265/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2266 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2267 the resulting vectors and call the function recursively. */
2268
2269static void
9771b263
DN
2270vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2271 vec<tree> *vec_oprnds1,
4a00c761
JJ
2272 gimple stmt, tree vec_dest,
2273 gimple_stmt_iterator *gsi,
2274 enum tree_code code1,
2275 enum tree_code code2, tree decl1,
2276 tree decl2, int op_type)
2277{
2278 int i;
2279 tree vop0, vop1, new_tmp1, new_tmp2;
2280 gimple new_stmt1, new_stmt2;
6e1aa848 2281 vec<tree> vec_tmp = vNULL;
4a00c761 2282
9771b263
DN
2283 vec_tmp.create (vec_oprnds0->length () * 2);
2284 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2285 {
2286 if (op_type == binary_op)
9771b263 2287 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2288 else
2289 vop1 = NULL_TREE;
2290
2291 /* Generate the two halves of promotion operation. */
2292 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2293 op_type, vec_dest, gsi, stmt);
2294 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2295 op_type, vec_dest, gsi, stmt);
2296 if (is_gimple_call (new_stmt1))
2297 {
2298 new_tmp1 = gimple_call_lhs (new_stmt1);
2299 new_tmp2 = gimple_call_lhs (new_stmt2);
2300 }
2301 else
2302 {
2303 new_tmp1 = gimple_assign_lhs (new_stmt1);
2304 new_tmp2 = gimple_assign_lhs (new_stmt2);
2305 }
2306
2307 /* Store the results for the next step. */
9771b263
DN
2308 vec_tmp.quick_push (new_tmp1);
2309 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2310 }
2311
689eaba3 2312 vec_oprnds0->release ();
4a00c761
JJ
2313 *vec_oprnds0 = vec_tmp;
2314}
2315
2316
b8698a0f
L
2317/* Check if STMT performs a conversion operation, that can be vectorized.
2318 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2319 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2320 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2321
2322static bool
2323vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2324 gimple *vec_stmt, slp_tree slp_node)
2325{
2326 tree vec_dest;
2327 tree scalar_dest;
4a00c761 2328 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2329 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2331 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2332 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2333 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2334 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2335 tree new_temp;
2336 tree def;
2337 gimple def_stmt;
2338 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2339 gimple new_stmt = NULL;
2340 stmt_vec_info prev_stmt_info;
2341 int nunits_in;
2342 int nunits_out;
2343 tree vectype_out, vectype_in;
4a00c761
JJ
2344 int ncopies, i, j;
2345 tree lhs_type, rhs_type;
ebfd146a 2346 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
2347 vec<tree> vec_oprnds0 = vNULL;
2348 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 2349 tree vop0;
4a00c761
JJ
2350 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2351 int multi_step_cvt = 0;
6e1aa848
DN
2352 vec<tree> vec_dsts = vNULL;
2353 vec<tree> interm_types = vNULL;
4a00c761
JJ
2354 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2355 int op_type;
2356 enum machine_mode rhs_mode;
2357 unsigned short fltsz;
ebfd146a
IR
2358
2359 /* Is STMT a vectorizable conversion? */
2360
4a00c761 2361 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2362 return false;
2363
8644a673 2364 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2365 return false;
2366
2367 if (!is_gimple_assign (stmt))
2368 return false;
2369
2370 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2371 return false;
2372
2373 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2374 if (!CONVERT_EXPR_CODE_P (code)
2375 && code != FIX_TRUNC_EXPR
2376 && code != FLOAT_EXPR
2377 && code != WIDEN_MULT_EXPR
2378 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2379 return false;
2380
4a00c761
JJ
2381 op_type = TREE_CODE_LENGTH (code);
2382
ebfd146a 2383 /* Check types of lhs and rhs. */
b690cc0f 2384 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2385 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2386 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2387
ebfd146a
IR
2388 op0 = gimple_assign_rhs1 (stmt);
2389 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2390
2391 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2392 && !((INTEGRAL_TYPE_P (lhs_type)
2393 && INTEGRAL_TYPE_P (rhs_type))
2394 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2395 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2396 return false;
2397
2398 if ((INTEGRAL_TYPE_P (lhs_type)
2399 && (TYPE_PRECISION (lhs_type)
2400 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2401 || (INTEGRAL_TYPE_P (rhs_type)
2402 && (TYPE_PRECISION (rhs_type)
2403 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2404 {
73fbfcad 2405 if (dump_enabled_p ())
78c60e3d 2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2407 "type conversion to/from bit-precision unsupported."
2408 "\n");
4a00c761
JJ
2409 return false;
2410 }
2411
b690cc0f 2412 /* Check the operands of the operation. */
24ee1384 2413 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2414 &def_stmt, &def, &dt[0], &vectype_in))
2415 {
73fbfcad 2416 if (dump_enabled_p ())
78c60e3d 2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2418 "use not simple.\n");
b690cc0f
RG
2419 return false;
2420 }
4a00c761
JJ
2421 if (op_type == binary_op)
2422 {
2423 bool ok;
2424
2425 op1 = gimple_assign_rhs2 (stmt);
2426 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2427 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2428 OP1. */
2429 if (CONSTANT_CLASS_P (op0))
f5709183 2430 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2431 &def_stmt, &def, &dt[1], &vectype_in);
2432 else
f5709183 2433 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2434 &def, &dt[1]);
4a00c761
JJ
2435
2436 if (!ok)
2437 {
73fbfcad 2438 if (dump_enabled_p ())
78c60e3d 2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2440 "use not simple.\n");
4a00c761
JJ
2441 return false;
2442 }
2443 }
2444
b690cc0f
RG
2445 /* If op0 is an external or constant defs use a vector type of
2446 the same size as the output vector type. */
ebfd146a 2447 if (!vectype_in)
b690cc0f 2448 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2449 if (vec_stmt)
2450 gcc_assert (vectype_in);
2451 if (!vectype_in)
2452 {
73fbfcad 2453 if (dump_enabled_p ())
4a00c761 2454 {
78c60e3d
SS
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "no vectype for scalar type ");
2457 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2458 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 2459 }
7d8930a0
IR
2460
2461 return false;
2462 }
ebfd146a 2463
b690cc0f
RG
2464 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2465 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2466 if (nunits_in < nunits_out)
ebfd146a
IR
2467 modifier = NARROW;
2468 else if (nunits_out == nunits_in)
2469 modifier = NONE;
ebfd146a 2470 else
4a00c761 2471 modifier = WIDEN;
ebfd146a 2472
ff802fa1
IR
2473 /* Multiple types in SLP are handled by creating the appropriate number of
2474 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2475 case of SLP. */
437f4a00 2476 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2477 ncopies = 1;
4a00c761
JJ
2478 else if (modifier == NARROW)
2479 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2480 else
2481 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2482
ebfd146a
IR
2483 /* Sanity check: make sure that at least one copy of the vectorized stmt
2484 needs to be generated. */
2485 gcc_assert (ncopies >= 1);
2486
ebfd146a 2487 /* Supportable by target? */
4a00c761 2488 switch (modifier)
ebfd146a 2489 {
4a00c761
JJ
2490 case NONE:
2491 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2492 return false;
2493 if (supportable_convert_operation (code, vectype_out, vectype_in,
2494 &decl1, &code1))
2495 break;
2496 /* FALLTHRU */
2497 unsupported:
73fbfcad 2498 if (dump_enabled_p ())
78c60e3d 2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2500 "conversion not supported by target.\n");
ebfd146a 2501 return false;
ebfd146a 2502
4a00c761
JJ
2503 case WIDEN:
2504 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2505 &code1, &code2, &multi_step_cvt,
2506 &interm_types))
4a00c761
JJ
2507 {
2508 /* Binary widening operation can only be supported directly by the
2509 architecture. */
2510 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2511 break;
2512 }
2513
2514 if (code != FLOAT_EXPR
2515 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2516 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2517 goto unsupported;
2518
2519 rhs_mode = TYPE_MODE (rhs_type);
2520 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2521 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2522 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2523 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2524 {
2525 cvt_type
2526 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2527 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2528 if (cvt_type == NULL_TREE)
2529 goto unsupported;
2530
2531 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2532 {
2533 if (!supportable_convert_operation (code, vectype_out,
2534 cvt_type, &decl1, &codecvt1))
2535 goto unsupported;
2536 }
2537 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2538 cvt_type, &codecvt1,
2539 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2540 &interm_types))
2541 continue;
2542 else
2543 gcc_assert (multi_step_cvt == 0);
2544
2545 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2546 vectype_in, &code1, &code2,
2547 &multi_step_cvt, &interm_types))
4a00c761
JJ
2548 break;
2549 }
2550
2551 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2552 goto unsupported;
2553
2554 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2555 codecvt2 = ERROR_MARK;
2556 else
2557 {
2558 multi_step_cvt++;
9771b263 2559 interm_types.safe_push (cvt_type);
4a00c761
JJ
2560 cvt_type = NULL_TREE;
2561 }
2562 break;
2563
2564 case NARROW:
2565 gcc_assert (op_type == unary_op);
2566 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2567 &code1, &multi_step_cvt,
2568 &interm_types))
2569 break;
2570
2571 if (code != FIX_TRUNC_EXPR
2572 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2573 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2574 goto unsupported;
2575
2576 rhs_mode = TYPE_MODE (rhs_type);
2577 cvt_type
2578 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2579 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2580 if (cvt_type == NULL_TREE)
2581 goto unsupported;
2582 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2583 &decl1, &codecvt1))
2584 goto unsupported;
2585 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2586 &code1, &multi_step_cvt,
2587 &interm_types))
2588 break;
2589 goto unsupported;
2590
2591 default:
2592 gcc_unreachable ();
ebfd146a
IR
2593 }
2594
2595 if (!vec_stmt) /* transformation not required. */
2596 {
73fbfcad 2597 if (dump_enabled_p ())
78c60e3d 2598 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2599 "=== vectorizable_conversion ===\n");
4a00c761 2600 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2601 {
2602 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2603 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2604 }
4a00c761
JJ
2605 else if (modifier == NARROW)
2606 {
2607 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2608 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2609 }
2610 else
2611 {
2612 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2613 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2614 }
9771b263 2615 interm_types.release ();
ebfd146a
IR
2616 return true;
2617 }
2618
2619 /** Transform. **/
73fbfcad 2620 if (dump_enabled_p ())
78c60e3d 2621 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2622 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 2623
4a00c761
JJ
2624 if (op_type == binary_op)
2625 {
2626 if (CONSTANT_CLASS_P (op0))
2627 op0 = fold_convert (TREE_TYPE (op1), op0);
2628 else if (CONSTANT_CLASS_P (op1))
2629 op1 = fold_convert (TREE_TYPE (op0), op1);
2630 }
2631
2632 /* In case of multi-step conversion, we first generate conversion operations
2633 to the intermediate types, and then from that types to the final one.
2634 We create vector destinations for the intermediate type (TYPES) received
2635 from supportable_*_operation, and store them in the correct order
2636 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2637 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2638 vec_dest = vect_create_destination_var (scalar_dest,
2639 (cvt_type && modifier == WIDEN)
2640 ? cvt_type : vectype_out);
9771b263 2641 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2642
2643 if (multi_step_cvt)
2644 {
9771b263
DN
2645 for (i = interm_types.length () - 1;
2646 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2647 {
2648 vec_dest = vect_create_destination_var (scalar_dest,
2649 intermediate_type);
9771b263 2650 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2651 }
2652 }
ebfd146a 2653
4a00c761 2654 if (cvt_type)
82294ec1
JJ
2655 vec_dest = vect_create_destination_var (scalar_dest,
2656 modifier == WIDEN
2657 ? vectype_out : cvt_type);
4a00c761
JJ
2658
2659 if (!slp_node)
2660 {
30862efc 2661 if (modifier == WIDEN)
4a00c761 2662 {
c3284718 2663 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 2664 if (op_type == binary_op)
9771b263 2665 vec_oprnds1.create (1);
4a00c761 2666 }
30862efc 2667 else if (modifier == NARROW)
9771b263
DN
2668 vec_oprnds0.create (
2669 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2670 }
2671 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2672 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2673
4a00c761 2674 last_oprnd = op0;
ebfd146a
IR
2675 prev_stmt_info = NULL;
2676 switch (modifier)
2677 {
2678 case NONE:
2679 for (j = 0; j < ncopies; j++)
2680 {
ebfd146a 2681 if (j == 0)
d092494c
IR
2682 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2683 -1);
ebfd146a
IR
2684 else
2685 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2686
9771b263 2687 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2688 {
2689 /* Arguments are ready, create the new vector stmt. */
2690 if (code1 == CALL_EXPR)
2691 {
2692 new_stmt = gimple_build_call (decl1, 1, vop0);
2693 new_temp = make_ssa_name (vec_dest, new_stmt);
2694 gimple_call_set_lhs (new_stmt, new_temp);
2695 }
2696 else
2697 {
2698 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2699 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2700 vop0, NULL);
2701 new_temp = make_ssa_name (vec_dest, new_stmt);
2702 gimple_assign_set_lhs (new_stmt, new_temp);
2703 }
2704
2705 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2706 if (slp_node)
9771b263 2707 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2708 }
2709
ebfd146a
IR
2710 if (j == 0)
2711 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2712 else
2713 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2714 prev_stmt_info = vinfo_for_stmt (new_stmt);
2715 }
2716 break;
2717
2718 case WIDEN:
2719 /* In case the vectorization factor (VF) is bigger than the number
2720 of elements that we can fit in a vectype (nunits), we have to
2721 generate more than one vector stmt - i.e - we need to "unroll"
2722 the vector stmt by a factor VF/nunits. */
2723 for (j = 0; j < ncopies; j++)
2724 {
4a00c761 2725 /* Handle uses. */
ebfd146a 2726 if (j == 0)
4a00c761
JJ
2727 {
2728 if (slp_node)
2729 {
2730 if (code == WIDEN_LSHIFT_EXPR)
2731 {
2732 unsigned int k;
ebfd146a 2733
4a00c761
JJ
2734 vec_oprnd1 = op1;
2735 /* Store vec_oprnd1 for every vector stmt to be created
2736 for SLP_NODE. We check during the analysis that all
2737 the shift arguments are the same. */
2738 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2739 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2740
2741 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2742 slp_node, -1);
2743 }
2744 else
2745 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2746 &vec_oprnds1, slp_node, -1);
2747 }
2748 else
2749 {
2750 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2751 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2752 if (op_type == binary_op)
2753 {
2754 if (code == WIDEN_LSHIFT_EXPR)
2755 vec_oprnd1 = op1;
2756 else
2757 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2758 NULL);
9771b263 2759 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2760 }
2761 }
2762 }
ebfd146a 2763 else
4a00c761
JJ
2764 {
2765 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2766 vec_oprnds0.truncate (0);
2767 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2768 if (op_type == binary_op)
2769 {
2770 if (code == WIDEN_LSHIFT_EXPR)
2771 vec_oprnd1 = op1;
2772 else
2773 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2774 vec_oprnd1);
9771b263
DN
2775 vec_oprnds1.truncate (0);
2776 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2777 }
2778 }
ebfd146a 2779
4a00c761
JJ
2780 /* Arguments are ready. Create the new vector stmts. */
2781 for (i = multi_step_cvt; i >= 0; i--)
2782 {
9771b263 2783 tree this_dest = vec_dsts[i];
4a00c761
JJ
2784 enum tree_code c1 = code1, c2 = code2;
2785 if (i == 0 && codecvt2 != ERROR_MARK)
2786 {
2787 c1 = codecvt1;
2788 c2 = codecvt2;
2789 }
2790 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2791 &vec_oprnds1,
2792 stmt, this_dest, gsi,
2793 c1, c2, decl1, decl2,
2794 op_type);
2795 }
2796
9771b263 2797 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2798 {
2799 if (cvt_type)
2800 {
2801 if (codecvt1 == CALL_EXPR)
2802 {
2803 new_stmt = gimple_build_call (decl1, 1, vop0);
2804 new_temp = make_ssa_name (vec_dest, new_stmt);
2805 gimple_call_set_lhs (new_stmt, new_temp);
2806 }
2807 else
2808 {
2809 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2810 new_temp = make_ssa_name (vec_dest, NULL);
2811 new_stmt = gimple_build_assign_with_ops (codecvt1,
2812 new_temp,
2813 vop0, NULL);
2814 }
2815
2816 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2817 }
2818 else
2819 new_stmt = SSA_NAME_DEF_STMT (vop0);
2820
2821 if (slp_node)
9771b263 2822 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2823 else
2824 {
2825 if (!prev_stmt_info)
2826 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2827 else
2828 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2829 prev_stmt_info = vinfo_for_stmt (new_stmt);
2830 }
2831 }
ebfd146a 2832 }
4a00c761
JJ
2833
2834 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2835 break;
2836
2837 case NARROW:
2838 /* In case the vectorization factor (VF) is bigger than the number
2839 of elements that we can fit in a vectype (nunits), we have to
2840 generate more than one vector stmt - i.e - we need to "unroll"
2841 the vector stmt by a factor VF/nunits. */
2842 for (j = 0; j < ncopies; j++)
2843 {
2844 /* Handle uses. */
4a00c761
JJ
2845 if (slp_node)
2846 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2847 slp_node, -1);
ebfd146a
IR
2848 else
2849 {
9771b263 2850 vec_oprnds0.truncate (0);
4a00c761
JJ
2851 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2852 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2853 }
2854
4a00c761
JJ
2855 /* Arguments are ready. Create the new vector stmts. */
2856 if (cvt_type)
9771b263 2857 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2858 {
2859 if (codecvt1 == CALL_EXPR)
2860 {
2861 new_stmt = gimple_build_call (decl1, 1, vop0);
2862 new_temp = make_ssa_name (vec_dest, new_stmt);
2863 gimple_call_set_lhs (new_stmt, new_temp);
2864 }
2865 else
2866 {
2867 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2868 new_temp = make_ssa_name (vec_dest, NULL);
2869 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2870 vop0, NULL);
2871 }
ebfd146a 2872
4a00c761 2873 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2874 vec_oprnds0[i] = new_temp;
4a00c761 2875 }
ebfd146a 2876
4a00c761
JJ
2877 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2878 stmt, vec_dsts, gsi,
2879 slp_node, code1,
2880 &prev_stmt_info);
ebfd146a
IR
2881 }
2882
2883 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2884 break;
ebfd146a
IR
2885 }
2886
9771b263
DN
2887 vec_oprnds0.release ();
2888 vec_oprnds1.release ();
2889 vec_dsts.release ();
2890 interm_types.release ();
ebfd146a
IR
2891
2892 return true;
2893}
ff802fa1
IR
2894
2895
ebfd146a
IR
2896/* Function vectorizable_assignment.
2897
b8698a0f
L
2898 Check if STMT performs an assignment (copy) that can be vectorized.
2899 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2900 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2901 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2902
2903static bool
2904vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2905 gimple *vec_stmt, slp_tree slp_node)
2906{
2907 tree vec_dest;
2908 tree scalar_dest;
2909 tree op;
2910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2911 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2912 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2913 tree new_temp;
2914 tree def;
2915 gimple def_stmt;
2916 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2917 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2918 int ncopies;
f18b55bd 2919 int i, j;
6e1aa848 2920 vec<tree> vec_oprnds = vNULL;
ebfd146a 2921 tree vop;
a70d6342 2922 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2923 gimple new_stmt = NULL;
2924 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2925 enum tree_code code;
2926 tree vectype_in;
ebfd146a
IR
2927
2928 /* Multiple types in SLP are handled by creating the appropriate number of
2929 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2930 case of SLP. */
437f4a00 2931 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2932 ncopies = 1;
2933 else
2934 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2935
2936 gcc_assert (ncopies >= 1);
ebfd146a 2937
a70d6342 2938 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2939 return false;
2940
8644a673 2941 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2942 return false;
2943
2944 /* Is vectorizable assignment? */
2945 if (!is_gimple_assign (stmt))
2946 return false;
2947
2948 scalar_dest = gimple_assign_lhs (stmt);
2949 if (TREE_CODE (scalar_dest) != SSA_NAME)
2950 return false;
2951
fde9c428 2952 code = gimple_assign_rhs_code (stmt);
ebfd146a 2953 if (gimple_assign_single_p (stmt)
fde9c428
RG
2954 || code == PAREN_EXPR
2955 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2956 op = gimple_assign_rhs1 (stmt);
2957 else
2958 return false;
2959
7b7ec6c5
RG
2960 if (code == VIEW_CONVERT_EXPR)
2961 op = TREE_OPERAND (op, 0);
2962
24ee1384 2963 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2964 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2965 {
73fbfcad 2966 if (dump_enabled_p ())
78c60e3d 2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2968 "use not simple.\n");
ebfd146a
IR
2969 return false;
2970 }
2971
fde9c428
RG
2972 /* We can handle NOP_EXPR conversions that do not change the number
2973 of elements or the vector size. */
7b7ec6c5
RG
2974 if ((CONVERT_EXPR_CODE_P (code)
2975 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2976 && (!vectype_in
2977 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2978 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2979 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2980 return false;
2981
7b7b1813
RG
2982 /* We do not handle bit-precision changes. */
2983 if ((CONVERT_EXPR_CODE_P (code)
2984 || code == VIEW_CONVERT_EXPR)
2985 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2986 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2987 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2988 || ((TYPE_PRECISION (TREE_TYPE (op))
2989 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2990 /* But a conversion that does not change the bit-pattern is ok. */
2991 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2992 > TYPE_PRECISION (TREE_TYPE (op)))
2993 && TYPE_UNSIGNED (TREE_TYPE (op))))
2994 {
73fbfcad 2995 if (dump_enabled_p ())
78c60e3d
SS
2996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2997 "type conversion to/from bit-precision "
e645e942 2998 "unsupported.\n");
7b7b1813
RG
2999 return false;
3000 }
3001
ebfd146a
IR
3002 if (!vec_stmt) /* transformation not required. */
3003 {
3004 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 3005 if (dump_enabled_p ())
78c60e3d 3006 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3007 "=== vectorizable_assignment ===\n");
c3e7ee41 3008 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3009 return true;
3010 }
3011
3012 /** Transform. **/
73fbfcad 3013 if (dump_enabled_p ())
e645e942 3014 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
3015
3016 /* Handle def. */
3017 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3018
3019 /* Handle use. */
f18b55bd 3020 for (j = 0; j < ncopies; j++)
ebfd146a 3021 {
f18b55bd
IR
3022 /* Handle uses. */
3023 if (j == 0)
d092494c 3024 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
3025 else
3026 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3027
3028 /* Arguments are ready. create the new vector stmt. */
9771b263 3029 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 3030 {
7b7ec6c5
RG
3031 if (CONVERT_EXPR_CODE_P (code)
3032 || code == VIEW_CONVERT_EXPR)
4a73490d 3033 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
3034 new_stmt = gimple_build_assign (vec_dest, vop);
3035 new_temp = make_ssa_name (vec_dest, new_stmt);
3036 gimple_assign_set_lhs (new_stmt, new_temp);
3037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3038 if (slp_node)
9771b263 3039 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3040 }
ebfd146a
IR
3041
3042 if (slp_node)
f18b55bd
IR
3043 continue;
3044
3045 if (j == 0)
3046 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3047 else
3048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3049
3050 prev_stmt_info = vinfo_for_stmt (new_stmt);
3051 }
b8698a0f 3052
9771b263 3053 vec_oprnds.release ();
ebfd146a
IR
3054 return true;
3055}
3056
9dc3f7de 3057
1107f3ae
IR
3058/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3059 either as shift by a scalar or by a vector. */
3060
3061bool
3062vect_supportable_shift (enum tree_code code, tree scalar_type)
3063{
3064
3065 enum machine_mode vec_mode;
3066 optab optab;
3067 int icode;
3068 tree vectype;
3069
3070 vectype = get_vectype_for_scalar_type (scalar_type);
3071 if (!vectype)
3072 return false;
3073
3074 optab = optab_for_tree_code (code, vectype, optab_scalar);
3075 if (!optab
3076 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3077 {
3078 optab = optab_for_tree_code (code, vectype, optab_vector);
3079 if (!optab
3080 || (optab_handler (optab, TYPE_MODE (vectype))
3081 == CODE_FOR_nothing))
3082 return false;
3083 }
3084
3085 vec_mode = TYPE_MODE (vectype);
3086 icode = (int) optab_handler (optab, vec_mode);
3087 if (icode == CODE_FOR_nothing)
3088 return false;
3089
3090 return true;
3091}
3092
3093
9dc3f7de
IR
3094/* Function vectorizable_shift.
3095
3096 Check if STMT performs a shift operation that can be vectorized.
3097 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3098 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3099 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3100
3101static bool
3102vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3103 gimple *vec_stmt, slp_tree slp_node)
3104{
3105 tree vec_dest;
3106 tree scalar_dest;
3107 tree op0, op1 = NULL;
3108 tree vec_oprnd1 = NULL_TREE;
3109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3110 tree vectype;
3111 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3112 enum tree_code code;
3113 enum machine_mode vec_mode;
3114 tree new_temp;
3115 optab optab;
3116 int icode;
3117 enum machine_mode optab_op2_mode;
3118 tree def;
3119 gimple def_stmt;
3120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3121 gimple new_stmt = NULL;
3122 stmt_vec_info prev_stmt_info;
3123 int nunits_in;
3124 int nunits_out;
3125 tree vectype_out;
cede2577 3126 tree op1_vectype;
9dc3f7de
IR
3127 int ncopies;
3128 int j, i;
6e1aa848
DN
3129 vec<tree> vec_oprnds0 = vNULL;
3130 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
3131 tree vop0, vop1;
3132 unsigned int k;
49eab32e 3133 bool scalar_shift_arg = true;
9dc3f7de
IR
3134 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3135 int vf;
3136
3137 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3138 return false;
3139
3140 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3141 return false;
3142
3143 /* Is STMT a vectorizable binary/unary operation? */
3144 if (!is_gimple_assign (stmt))
3145 return false;
3146
3147 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3148 return false;
3149
3150 code = gimple_assign_rhs_code (stmt);
3151
3152 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3153 || code == RROTATE_EXPR))
3154 return false;
3155
3156 scalar_dest = gimple_assign_lhs (stmt);
3157 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3158 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3159 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3160 {
73fbfcad 3161 if (dump_enabled_p ())
78c60e3d 3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3163 "bit-precision shifts not supported.\n");
7b7b1813
RG
3164 return false;
3165 }
9dc3f7de
IR
3166
3167 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3168 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3169 &def_stmt, &def, &dt[0], &vectype))
3170 {
73fbfcad 3171 if (dump_enabled_p ())
78c60e3d 3172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3173 "use not simple.\n");
9dc3f7de
IR
3174 return false;
3175 }
3176 /* If op0 is an external or constant def use a vector type with
3177 the same size as the output vector type. */
3178 if (!vectype)
3179 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3180 if (vec_stmt)
3181 gcc_assert (vectype);
3182 if (!vectype)
3183 {
73fbfcad 3184 if (dump_enabled_p ())
78c60e3d 3185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3186 "no vectype for scalar type\n");
9dc3f7de
IR
3187 return false;
3188 }
3189
3190 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3191 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3192 if (nunits_out != nunits_in)
3193 return false;
3194
3195 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3196 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3197 &def, &dt[1], &op1_vectype))
9dc3f7de 3198 {
73fbfcad 3199 if (dump_enabled_p ())
78c60e3d 3200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3201 "use not simple.\n");
9dc3f7de
IR
3202 return false;
3203 }
3204
3205 if (loop_vinfo)
3206 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3207 else
3208 vf = 1;
3209
3210 /* Multiple types in SLP are handled by creating the appropriate number of
3211 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3212 case of SLP. */
437f4a00 3213 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3214 ncopies = 1;
3215 else
3216 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3217
3218 gcc_assert (ncopies >= 1);
3219
3220 /* Determine whether the shift amount is a vector, or scalar. If the
3221 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3222
49eab32e
JJ
3223 if (dt[1] == vect_internal_def && !slp_node)
3224 scalar_shift_arg = false;
3225 else if (dt[1] == vect_constant_def
3226 || dt[1] == vect_external_def
3227 || dt[1] == vect_internal_def)
3228 {
3229 /* In SLP, need to check whether the shift count is the same,
3230 in loops if it is a constant or invariant, it is always
3231 a scalar shift. */
3232 if (slp_node)
3233 {
9771b263 3234 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3235 gimple slpstmt;
3236
9771b263 3237 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3238 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3239 scalar_shift_arg = false;
3240 }
3241 }
3242 else
3243 {
73fbfcad 3244 if (dump_enabled_p ())
78c60e3d 3245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3246 "operand mode requires invariant argument.\n");
49eab32e
JJ
3247 return false;
3248 }
3249
9dc3f7de 3250 /* Vector shifted by vector. */
49eab32e 3251 if (!scalar_shift_arg)
9dc3f7de
IR
3252 {
3253 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3254 if (dump_enabled_p ())
78c60e3d 3255 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3256 "vector/vector shift/rotate found.\n");
78c60e3d 3257
aa948027
JJ
3258 if (!op1_vectype)
3259 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3260 if (op1_vectype == NULL_TREE
3261 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3262 {
73fbfcad 3263 if (dump_enabled_p ())
78c60e3d
SS
3264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3265 "unusable type for last operand in"
e645e942 3266 " vector/vector shift/rotate.\n");
cede2577
JJ
3267 return false;
3268 }
9dc3f7de
IR
3269 }
3270 /* See if the machine has a vector shifted by scalar insn and if not
3271 then see if it has a vector shifted by vector insn. */
49eab32e 3272 else
9dc3f7de
IR
3273 {
3274 optab = optab_for_tree_code (code, vectype, optab_scalar);
3275 if (optab
3276 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3277 {
73fbfcad 3278 if (dump_enabled_p ())
78c60e3d 3279 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3280 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
3281 }
3282 else
3283 {
3284 optab = optab_for_tree_code (code, vectype, optab_vector);
3285 if (optab
3286 && (optab_handler (optab, TYPE_MODE (vectype))
3287 != CODE_FOR_nothing))
3288 {
49eab32e
JJ
3289 scalar_shift_arg = false;
3290
73fbfcad 3291 if (dump_enabled_p ())
78c60e3d 3292 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3293 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
3294
3295 /* Unlike the other binary operators, shifts/rotates have
3296 the rhs being int, instead of the same type as the lhs,
3297 so make sure the scalar is the right type if we are
aa948027 3298 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3299 if (dt[1] == vect_constant_def)
3300 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3301 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3302 TREE_TYPE (op1)))
3303 {
3304 if (slp_node
3305 && TYPE_MODE (TREE_TYPE (vectype))
3306 != TYPE_MODE (TREE_TYPE (op1)))
3307 {
73fbfcad 3308 if (dump_enabled_p ())
78c60e3d
SS
3309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3310 "unusable type for last operand in"
e645e942 3311 " vector/vector shift/rotate.\n");
aa948027
JJ
3312 return false;
3313 }
3314 if (vec_stmt && !slp_node)
3315 {
3316 op1 = fold_convert (TREE_TYPE (vectype), op1);
3317 op1 = vect_init_vector (stmt, op1,
3318 TREE_TYPE (vectype), NULL);
3319 }
3320 }
9dc3f7de
IR
3321 }
3322 }
3323 }
9dc3f7de
IR
3324
3325 /* Supportable by target? */
3326 if (!optab)
3327 {
73fbfcad 3328 if (dump_enabled_p ())
78c60e3d 3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3330 "no optab.\n");
9dc3f7de
IR
3331 return false;
3332 }
3333 vec_mode = TYPE_MODE (vectype);
3334 icode = (int) optab_handler (optab, vec_mode);
3335 if (icode == CODE_FOR_nothing)
3336 {
73fbfcad 3337 if (dump_enabled_p ())
78c60e3d 3338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3339 "op not supported by target.\n");
9dc3f7de
IR
3340 /* Check only during analysis. */
3341 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3342 || (vf < vect_min_worthwhile_factor (code)
3343 && !vec_stmt))
3344 return false;
73fbfcad 3345 if (dump_enabled_p ())
e645e942
TJ
3346 dump_printf_loc (MSG_NOTE, vect_location,
3347 "proceeding using word mode.\n");
9dc3f7de
IR
3348 }
3349
3350 /* Worthwhile without SIMD support? Check only during analysis. */
3351 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3352 && vf < vect_min_worthwhile_factor (code)
3353 && !vec_stmt)
3354 {
73fbfcad 3355 if (dump_enabled_p ())
78c60e3d 3356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3357 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
3358 return false;
3359 }
3360
3361 if (!vec_stmt) /* transformation not required. */
3362 {
3363 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3364 if (dump_enabled_p ())
e645e942
TJ
3365 dump_printf_loc (MSG_NOTE, vect_location,
3366 "=== vectorizable_shift ===\n");
c3e7ee41 3367 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3368 return true;
3369 }
3370
3371 /** Transform. **/
3372
73fbfcad 3373 if (dump_enabled_p ())
78c60e3d 3374 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3375 "transform binary/unary operation.\n");
9dc3f7de
IR
3376
3377 /* Handle def. */
3378 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3379
9dc3f7de
IR
3380 prev_stmt_info = NULL;
3381 for (j = 0; j < ncopies; j++)
3382 {
3383 /* Handle uses. */
3384 if (j == 0)
3385 {
3386 if (scalar_shift_arg)
3387 {
3388 /* Vector shl and shr insn patterns can be defined with scalar
3389 operand 2 (shift operand). In this case, use constant or loop
3390 invariant op1 directly, without extending it to vector mode
3391 first. */
3392 optab_op2_mode = insn_data[icode].operand[2].mode;
3393 if (!VECTOR_MODE_P (optab_op2_mode))
3394 {
73fbfcad 3395 if (dump_enabled_p ())
78c60e3d 3396 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3397 "operand 1 using scalar mode.\n");
9dc3f7de 3398 vec_oprnd1 = op1;
8930f723 3399 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 3400 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3401 if (slp_node)
3402 {
3403 /* Store vec_oprnd1 for every vector stmt to be created
3404 for SLP_NODE. We check during the analysis that all
3405 the shift arguments are the same.
3406 TODO: Allow different constants for different vector
3407 stmts generated for an SLP instance. */
3408 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3409 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3410 }
3411 }
3412 }
3413
3414 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3415 (a special case for certain kind of vector shifts); otherwise,
3416 operand 1 should be of a vector type (the usual case). */
3417 if (vec_oprnd1)
3418 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3419 slp_node, -1);
9dc3f7de
IR
3420 else
3421 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3422 slp_node, -1);
9dc3f7de
IR
3423 }
3424 else
3425 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3426
3427 /* Arguments are ready. Create the new vector stmt. */
9771b263 3428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3429 {
9771b263 3430 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3431 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3432 new_temp = make_ssa_name (vec_dest, new_stmt);
3433 gimple_assign_set_lhs (new_stmt, new_temp);
3434 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3435 if (slp_node)
9771b263 3436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3437 }
3438
3439 if (slp_node)
3440 continue;
3441
3442 if (j == 0)
3443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3444 else
3445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3446 prev_stmt_info = vinfo_for_stmt (new_stmt);
3447 }
3448
9771b263
DN
3449 vec_oprnds0.release ();
3450 vec_oprnds1.release ();
9dc3f7de
IR
3451
3452 return true;
3453}
3454
3455
5deb57cb
JJ
3456static tree permute_vec_elements (tree, tree, tree, gimple,
3457 gimple_stmt_iterator *);
3458
3459
ebfd146a
IR
3460/* Function vectorizable_operation.
3461
16949072
RG
3462 Check if STMT performs a binary, unary or ternary operation that can
3463 be vectorized.
b8698a0f 3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3465 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3467
3468static bool
3469vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3470 gimple *vec_stmt, slp_tree slp_node)
3471{
00f07b86 3472 tree vec_dest;
ebfd146a 3473 tree scalar_dest;
16949072 3474 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3476 tree vectype;
ebfd146a
IR
3477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3478 enum tree_code code;
3479 enum machine_mode vec_mode;
3480 tree new_temp;
3481 int op_type;
00f07b86 3482 optab optab;
ebfd146a 3483 int icode;
ebfd146a
IR
3484 tree def;
3485 gimple def_stmt;
16949072
RG
3486 enum vect_def_type dt[3]
3487 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3488 gimple new_stmt = NULL;
3489 stmt_vec_info prev_stmt_info;
b690cc0f 3490 int nunits_in;
ebfd146a
IR
3491 int nunits_out;
3492 tree vectype_out;
3493 int ncopies;
3494 int j, i;
6e1aa848
DN
3495 vec<tree> vec_oprnds0 = vNULL;
3496 vec<tree> vec_oprnds1 = vNULL;
3497 vec<tree> vec_oprnds2 = vNULL;
16949072 3498 tree vop0, vop1, vop2;
a70d6342
IR
3499 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3500 int vf;
3501
a70d6342 3502 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3503 return false;
3504
8644a673 3505 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3506 return false;
3507
3508 /* Is STMT a vectorizable binary/unary operation? */
3509 if (!is_gimple_assign (stmt))
3510 return false;
3511
3512 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3513 return false;
3514
ebfd146a
IR
3515 code = gimple_assign_rhs_code (stmt);
3516
3517 /* For pointer addition, we should use the normal plus for
3518 the vector addition. */
3519 if (code == POINTER_PLUS_EXPR)
3520 code = PLUS_EXPR;
3521
3522 /* Support only unary or binary operations. */
3523 op_type = TREE_CODE_LENGTH (code);
16949072 3524 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3525 {
73fbfcad 3526 if (dump_enabled_p ())
78c60e3d 3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3528 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 3529 op_type);
ebfd146a
IR
3530 return false;
3531 }
3532
b690cc0f
RG
3533 scalar_dest = gimple_assign_lhs (stmt);
3534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3535
7b7b1813
RG
3536 /* Most operations cannot handle bit-precision types without extra
3537 truncations. */
3538 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3539 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3540 /* Exception are bitwise binary operations. */
3541 && code != BIT_IOR_EXPR
3542 && code != BIT_XOR_EXPR
3543 && code != BIT_AND_EXPR)
3544 {
73fbfcad 3545 if (dump_enabled_p ())
78c60e3d 3546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3547 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
3548 return false;
3549 }
3550
ebfd146a 3551 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3552 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3553 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3554 {
73fbfcad 3555 if (dump_enabled_p ())
78c60e3d 3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3557 "use not simple.\n");
ebfd146a
IR
3558 return false;
3559 }
b690cc0f
RG
3560 /* If op0 is an external or constant def use a vector type with
3561 the same size as the output vector type. */
3562 if (!vectype)
3563 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3564 if (vec_stmt)
3565 gcc_assert (vectype);
3566 if (!vectype)
3567 {
73fbfcad 3568 if (dump_enabled_p ())
7d8930a0 3569 {
78c60e3d
SS
3570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3571 "no vectype for scalar type ");
3572 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3573 TREE_TYPE (op0));
e645e942 3574 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3575 }
3576
3577 return false;
3578 }
b690cc0f
RG
3579
3580 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3581 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3582 if (nunits_out != nunits_in)
3583 return false;
ebfd146a 3584
16949072 3585 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3586 {
3587 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3588 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3589 &def, &dt[1]))
ebfd146a 3590 {
73fbfcad 3591 if (dump_enabled_p ())
78c60e3d 3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3593 "use not simple.\n");
ebfd146a
IR
3594 return false;
3595 }
3596 }
16949072
RG
3597 if (op_type == ternary_op)
3598 {
3599 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3600 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3601 &def, &dt[2]))
16949072 3602 {
73fbfcad 3603 if (dump_enabled_p ())
78c60e3d 3604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3605 "use not simple.\n");
16949072
RG
3606 return false;
3607 }
3608 }
ebfd146a 3609
b690cc0f
RG
3610 if (loop_vinfo)
3611 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3612 else
3613 vf = 1;
3614
3615 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3616 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3617 case of SLP. */
437f4a00 3618 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3619 ncopies = 1;
3620 else
3621 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3622
3623 gcc_assert (ncopies >= 1);
3624
9dc3f7de 3625 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3626 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3627 || code == RROTATE_EXPR)
9dc3f7de 3628 return false;
ebfd146a 3629
ebfd146a 3630 /* Supportable by target? */
00f07b86
RH
3631
3632 vec_mode = TYPE_MODE (vectype);
3633 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3634 {
00f07b86 3635 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3636 icode = LAST_INSN_CODE;
00f07b86
RH
3637 else
3638 icode = CODE_FOR_nothing;
ebfd146a 3639 }
00f07b86
RH
3640 else
3641 {
3642 optab = optab_for_tree_code (code, vectype, optab_default);
3643 if (!optab)
5deb57cb 3644 {
73fbfcad 3645 if (dump_enabled_p ())
78c60e3d 3646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3647 "no optab.\n");
00f07b86 3648 return false;
5deb57cb 3649 }
00f07b86 3650 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3651 }
3652
ebfd146a
IR
3653 if (icode == CODE_FOR_nothing)
3654 {
73fbfcad 3655 if (dump_enabled_p ())
78c60e3d 3656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3657 "op not supported by target.\n");
ebfd146a
IR
3658 /* Check only during analysis. */
3659 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3660 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3661 return false;
73fbfcad 3662 if (dump_enabled_p ())
e645e942
TJ
3663 dump_printf_loc (MSG_NOTE, vect_location,
3664 "proceeding using word mode.\n");
383d9c83
IR
3665 }
3666
4a00c761 3667 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3668 if (!VECTOR_MODE_P (vec_mode)
3669 && !vec_stmt
3670 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3671 {
73fbfcad 3672 if (dump_enabled_p ())
78c60e3d 3673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3674 "not worthwhile without SIMD support.\n");
e34842c6 3675 return false;
7d8930a0 3676 }
ebfd146a 3677
ebfd146a
IR
3678 if (!vec_stmt) /* transformation not required. */
3679 {
4a00c761 3680 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3681 if (dump_enabled_p ())
78c60e3d 3682 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3683 "=== vectorizable_operation ===\n");
c3e7ee41 3684 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3685 return true;
3686 }
3687
3688 /** Transform. **/
3689
73fbfcad 3690 if (dump_enabled_p ())
78c60e3d 3691 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3692 "transform binary/unary operation.\n");
383d9c83 3693
ebfd146a 3694 /* Handle def. */
00f07b86 3695 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3696
ebfd146a
IR
3697 /* In case the vectorization factor (VF) is bigger than the number
3698 of elements that we can fit in a vectype (nunits), we have to generate
3699 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3700 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3701 from one copy of the vector stmt to the next, in the field
3702 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3703 stages to find the correct vector defs to be used when vectorizing
3704 stmts that use the defs of the current stmt. The example below
3705 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3706 we need to create 4 vectorized stmts):
3707
3708 before vectorization:
3709 RELATED_STMT VEC_STMT
3710 S1: x = memref - -
3711 S2: z = x + 1 - -
3712
3713 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3714 there):
3715 RELATED_STMT VEC_STMT
3716 VS1_0: vx0 = memref0 VS1_1 -
3717 VS1_1: vx1 = memref1 VS1_2 -
3718 VS1_2: vx2 = memref2 VS1_3 -
3719 VS1_3: vx3 = memref3 - -
3720 S1: x = load - VS1_0
3721 S2: z = x + 1 - -
3722
3723 step2: vectorize stmt S2 (done here):
3724 To vectorize stmt S2 we first need to find the relevant vector
3725 def for the first operand 'x'. This is, as usual, obtained from
3726 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3727 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3728 relevant vector def 'vx0'. Having found 'vx0' we can generate
3729 the vector stmt VS2_0, and as usual, record it in the
3730 STMT_VINFO_VEC_STMT of stmt S2.
3731 When creating the second copy (VS2_1), we obtain the relevant vector
3732 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3733 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3734 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3735 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3736 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3737 chain of stmts and pointers:
3738 RELATED_STMT VEC_STMT
3739 VS1_0: vx0 = memref0 VS1_1 -
3740 VS1_1: vx1 = memref1 VS1_2 -
3741 VS1_2: vx2 = memref2 VS1_3 -
3742 VS1_3: vx3 = memref3 - -
3743 S1: x = load - VS1_0
3744 VS2_0: vz0 = vx0 + v1 VS2_1 -
3745 VS2_1: vz1 = vx1 + v1 VS2_2 -
3746 VS2_2: vz2 = vx2 + v1 VS2_3 -
3747 VS2_3: vz3 = vx3 + v1 - -
3748 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3749
3750 prev_stmt_info = NULL;
3751 for (j = 0; j < ncopies; j++)
3752 {
3753 /* Handle uses. */
3754 if (j == 0)
4a00c761
JJ
3755 {
3756 if (op_type == binary_op || op_type == ternary_op)
3757 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3758 slp_node, -1);
3759 else
3760 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3761 slp_node, -1);
3762 if (op_type == ternary_op)
36ba4aae 3763 {
9771b263
DN
3764 vec_oprnds2.create (1);
3765 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3766 stmt,
3767 NULL));
36ba4aae 3768 }
4a00c761 3769 }
ebfd146a 3770 else
4a00c761
JJ
3771 {
3772 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3773 if (op_type == ternary_op)
3774 {
9771b263
DN
3775 tree vec_oprnd = vec_oprnds2.pop ();
3776 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3777 vec_oprnd));
4a00c761
JJ
3778 }
3779 }
3780
3781 /* Arguments are ready. Create the new vector stmt. */
9771b263 3782 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3783 {
4a00c761 3784 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3785 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3786 vop2 = ((op_type == ternary_op)
9771b263 3787 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3788 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3789 vop0, vop1, vop2);
4a00c761
JJ
3790 new_temp = make_ssa_name (vec_dest, new_stmt);
3791 gimple_assign_set_lhs (new_stmt, new_temp);
3792 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3793 if (slp_node)
9771b263 3794 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3795 }
3796
4a00c761
JJ
3797 if (slp_node)
3798 continue;
3799
3800 if (j == 0)
3801 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3802 else
3803 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3804 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3805 }
3806
9771b263
DN
3807 vec_oprnds0.release ();
3808 vec_oprnds1.release ();
3809 vec_oprnds2.release ();
ebfd146a 3810
ebfd146a
IR
3811 return true;
3812}
3813
c716e67f
XDL
3814/* A helper function to ensure data reference DR's base alignment
3815 for STMT_INFO. */
3816
3817static void
3818ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3819{
3820 if (!dr->aux)
3821 return;
3822
3823 if (((dataref_aux *)dr->aux)->base_misaligned)
3824 {
3825 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3826 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3827
3828 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3829 DECL_USER_ALIGN (base_decl) = 1;
3830 ((dataref_aux *)dr->aux)->base_misaligned = false;
3831 }
3832}
3833
ebfd146a
IR
3834
3835/* Function vectorizable_store.
3836
b8698a0f
L
3837 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3838 can be vectorized.
3839 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3840 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3841 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3842
3843static bool
3844vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 3845 slp_tree slp_node)
ebfd146a
IR
3846{
3847 tree scalar_dest;
3848 tree data_ref;
3849 tree op;
3850 tree vec_oprnd = NULL_TREE;
3851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3852 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3853 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3854 tree elem_type;
ebfd146a 3855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3856 struct loop *loop = NULL;
ebfd146a
IR
3857 enum machine_mode vec_mode;
3858 tree dummy;
3859 enum dr_alignment_support alignment_support_scheme;
3860 tree def;
3861 gimple def_stmt;
3862 enum vect_def_type dt;
3863 stmt_vec_info prev_stmt_info = NULL;
3864 tree dataref_ptr = NULL_TREE;
74bf76ed 3865 tree dataref_offset = NULL_TREE;
fef4d2b3 3866 gimple ptr_incr = NULL;
ebfd146a
IR
3867 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3868 int ncopies;
3869 int j;
3870 gimple next_stmt, first_stmt = NULL;
0d0293ac 3871 bool grouped_store = false;
272c6793 3872 bool store_lanes_p = false;
ebfd146a 3873 unsigned int group_size, i;
6e1aa848
DN
3874 vec<tree> dr_chain = vNULL;
3875 vec<tree> oprnds = vNULL;
3876 vec<tree> result_chain = vNULL;
ebfd146a 3877 bool inv_p;
6e1aa848 3878 vec<tree> vec_oprnds = vNULL;
ebfd146a 3879 bool slp = (slp_node != NULL);
ebfd146a 3880 unsigned int vec_num;
a70d6342 3881 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3882 tree aggr_type;
a70d6342
IR
3883
3884 if (loop_vinfo)
3885 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3886
3887 /* Multiple types in SLP are handled by creating the appropriate number of
3888 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3889 case of SLP. */
437f4a00 3890 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3891 ncopies = 1;
3892 else
3893 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3894
3895 gcc_assert (ncopies >= 1);
3896
3897 /* FORNOW. This restriction should be relaxed. */
a70d6342 3898 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3899 {
73fbfcad 3900 if (dump_enabled_p ())
78c60e3d 3901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3902 "multiple types in nested loop.\n");
ebfd146a
IR
3903 return false;
3904 }
3905
a70d6342 3906 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3907 return false;
3908
8644a673 3909 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3910 return false;
3911
3912 /* Is vectorizable store? */
3913
3914 if (!is_gimple_assign (stmt))
3915 return false;
3916
3917 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3918 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3919 && is_pattern_stmt_p (stmt_info))
3920 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 3921 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 3922 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 3923 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3924 && TREE_CODE (scalar_dest) != COMPONENT_REF
3925 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3926 && TREE_CODE (scalar_dest) != REALPART_EXPR
3927 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3928 return false;
3929
3930 gcc_assert (gimple_assign_single_p (stmt));
3931 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3932 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3933 &def, &dt))
ebfd146a 3934 {
73fbfcad 3935 if (dump_enabled_p ())
78c60e3d 3936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3937 "use not simple.\n");
ebfd146a
IR
3938 return false;
3939 }
3940
272c6793 3941 elem_type = TREE_TYPE (vectype);
ebfd146a 3942 vec_mode = TYPE_MODE (vectype);
7b7b1813 3943
ebfd146a
IR
3944 /* FORNOW. In some cases can vectorize even if data-type not supported
3945 (e.g. - array initialization with 0). */
947131ba 3946 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3947 return false;
3948
3949 if (!STMT_VINFO_DATA_REF (stmt_info))
3950 return false;
3951
a7ce6ec3
RG
3952 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3953 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3954 size_zero_node) < 0)
a1e53f3f 3955 {
73fbfcad 3956 if (dump_enabled_p ())
78c60e3d 3957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3958 "negative step for store.\n");
a1e53f3f
L
3959 return false;
3960 }
3961
0d0293ac 3962 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3963 {
0d0293ac 3964 grouped_store = true;
e14c1050 3965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3966 if (!slp && !PURE_SLP_STMT (stmt_info))
3967 {
e14c1050 3968 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3969 if (vect_store_lanes_supported (vectype, group_size))
3970 store_lanes_p = true;
0d0293ac 3971 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3972 return false;
3973 }
b8698a0f 3974
ebfd146a
IR
3975 if (first_stmt == stmt)
3976 {
3977 /* STMT is the leader of the group. Check the operands of all the
3978 stmts of the group. */
e14c1050 3979 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3980 while (next_stmt)
3981 {
3982 gcc_assert (gimple_assign_single_p (next_stmt));
3983 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3984 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3985 &def_stmt, &def, &dt))
ebfd146a 3986 {
73fbfcad 3987 if (dump_enabled_p ())
78c60e3d 3988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3989 "use not simple.\n");
ebfd146a
IR
3990 return false;
3991 }
e14c1050 3992 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3993 }
3994 }
3995 }
3996
3997 if (!vec_stmt) /* transformation not required. */
3998 {
3999 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
4000 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4001 NULL, NULL, NULL);
ebfd146a
IR
4002 return true;
4003 }
4004
4005 /** Transform. **/
4006
c716e67f
XDL
4007 ensure_base_align (stmt_info, dr);
4008
0d0293ac 4009 if (grouped_store)
ebfd146a
IR
4010 {
4011 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4012 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 4013
e14c1050 4014 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
4015
4016 /* FORNOW */
a70d6342 4017 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
4018
4019 /* We vectorize all the stmts of the interleaving group when we
4020 reach the last stmt in the group. */
e14c1050
IR
4021 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4022 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
4023 && !slp)
4024 {
4025 *vec_stmt = NULL;
4026 return true;
4027 }
4028
4029 if (slp)
4b5caab7 4030 {
0d0293ac 4031 grouped_store = false;
4b5caab7
IR
4032 /* VEC_NUM is the number of vect stmts to be created for this
4033 group. */
4034 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4035 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 4036 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 4037 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 4038 }
ebfd146a 4039 else
4b5caab7
IR
4040 /* VEC_NUM is the number of vect stmts to be created for this
4041 group. */
ebfd146a
IR
4042 vec_num = group_size;
4043 }
b8698a0f 4044 else
ebfd146a
IR
4045 {
4046 first_stmt = stmt;
4047 first_dr = dr;
4048 group_size = vec_num = 1;
ebfd146a 4049 }
b8698a0f 4050
73fbfcad 4051 if (dump_enabled_p ())
78c60e3d 4052 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4053 "transform store. ncopies = %d\n", ncopies);
ebfd146a 4054
9771b263
DN
4055 dr_chain.create (group_size);
4056 oprnds.create (group_size);
ebfd146a 4057
720f5239 4058 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4059 gcc_assert (alignment_support_scheme);
272c6793
RS
4060 /* Targets with store-lane instructions must not require explicit
4061 realignment. */
4062 gcc_assert (!store_lanes_p
4063 || alignment_support_scheme == dr_aligned
4064 || alignment_support_scheme == dr_unaligned_supported);
4065
4066 if (store_lanes_p)
4067 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4068 else
4069 aggr_type = vectype;
ebfd146a
IR
4070
4071 /* In case the vectorization factor (VF) is bigger than the number
4072 of elements that we can fit in a vectype (nunits), we have to generate
4073 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4074 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4075 vect_get_vec_def_for_copy_stmt. */
4076
0d0293ac 4077 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4078
4079 S1: &base + 2 = x2
4080 S2: &base = x0
4081 S3: &base + 1 = x1
4082 S4: &base + 3 = x3
4083
4084 We create vectorized stores starting from base address (the access of the
4085 first stmt in the chain (S2 in the above example), when the last store stmt
4086 of the chain (S4) is reached:
4087
4088 VS1: &base = vx2
4089 VS2: &base + vec_size*1 = vx0
4090 VS3: &base + vec_size*2 = vx1
4091 VS4: &base + vec_size*3 = vx3
4092
4093 Then permutation statements are generated:
4094
3fcc1b55
JJ
4095 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4096 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4097 ...
b8698a0f 4098
ebfd146a
IR
4099 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4100 (the order of the data-refs in the output of vect_permute_store_chain
4101 corresponds to the order of scalar stmts in the interleaving chain - see
4102 the documentation of vect_permute_store_chain()).
4103
4104 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4105 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4106 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4107 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4108 */
4109
4110 prev_stmt_info = NULL;
4111 for (j = 0; j < ncopies; j++)
4112 {
4113 gimple new_stmt;
ebfd146a
IR
4114
4115 if (j == 0)
4116 {
4117 if (slp)
4118 {
4119 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4120 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4121 NULL, slp_node, -1);
ebfd146a 4122
9771b263 4123 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4124 }
4125 else
4126 {
b8698a0f
L
4127 /* For interleaved stores we collect vectorized defs for all the
4128 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4129 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4130 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4131
0d0293ac 4132 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4133 OPRNDS are of size 1. */
b8698a0f 4134 next_stmt = first_stmt;
ebfd146a
IR
4135 for (i = 0; i < group_size; i++)
4136 {
b8698a0f
L
4137 /* Since gaps are not supported for interleaved stores,
4138 GROUP_SIZE is the exact number of stmts in the chain.
4139 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4140 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4141 iteration of the loop will be executed. */
4142 gcc_assert (next_stmt
4143 && gimple_assign_single_p (next_stmt));
4144 op = gimple_assign_rhs1 (next_stmt);
4145
b8698a0f 4146 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4147 NULL);
9771b263
DN
4148 dr_chain.quick_push (vec_oprnd);
4149 oprnds.quick_push (vec_oprnd);
e14c1050 4150 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4151 }
4152 }
4153
4154 /* We should have catched mismatched types earlier. */
4155 gcc_assert (useless_type_conversion_p (vectype,
4156 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
4157 bool simd_lane_access_p
4158 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4159 if (simd_lane_access_p
4160 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4161 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4162 && integer_zerop (DR_OFFSET (first_dr))
4163 && integer_zerop (DR_INIT (first_dr))
4164 && alias_sets_conflict_p (get_alias_set (aggr_type),
4165 get_alias_set (DR_REF (first_dr))))
4166 {
4167 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4168 dataref_offset = build_int_cst (reference_alias_ptr_type
4169 (DR_REF (first_dr)), 0);
8928eff3 4170 inv_p = false;
74bf76ed
JJ
4171 }
4172 else
4173 dataref_ptr
4174 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4175 simd_lane_access_p ? loop : NULL,
4176 NULL_TREE, &dummy, gsi, &ptr_incr,
4177 simd_lane_access_p, &inv_p);
a70d6342 4178 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4179 }
b8698a0f 4180 else
ebfd146a 4181 {
b8698a0f
L
4182 /* For interleaved stores we created vectorized defs for all the
4183 defs stored in OPRNDS in the previous iteration (previous copy).
4184 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4185 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4186 next copy.
0d0293ac 4187 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4188 OPRNDS are of size 1. */
4189 for (i = 0; i < group_size; i++)
4190 {
9771b263 4191 op = oprnds[i];
24ee1384
IR
4192 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4193 &def, &dt);
b8698a0f 4194 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4195 dr_chain[i] = vec_oprnd;
4196 oprnds[i] = vec_oprnd;
ebfd146a 4197 }
74bf76ed
JJ
4198 if (dataref_offset)
4199 dataref_offset
4200 = int_const_binop (PLUS_EXPR, dataref_offset,
4201 TYPE_SIZE_UNIT (aggr_type));
4202 else
4203 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4204 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4205 }
4206
272c6793 4207 if (store_lanes_p)
ebfd146a 4208 {
272c6793 4209 tree vec_array;
267d3070 4210
272c6793
RS
4211 /* Combine all the vectors into an array. */
4212 vec_array = create_vector_array (vectype, vec_num);
4213 for (i = 0; i < vec_num; i++)
c2d7ab2a 4214 {
9771b263 4215 vec_oprnd = dr_chain[i];
272c6793 4216 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4217 }
b8698a0f 4218
272c6793
RS
4219 /* Emit:
4220 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4221 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4222 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4223 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4224 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4225 }
4226 else
4227 {
4228 new_stmt = NULL;
0d0293ac 4229 if (grouped_store)
272c6793 4230 {
b6b9227d
JJ
4231 if (j == 0)
4232 result_chain.create (group_size);
272c6793
RS
4233 /* Permute. */
4234 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4235 &result_chain);
4236 }
c2d7ab2a 4237
272c6793
RS
4238 next_stmt = first_stmt;
4239 for (i = 0; i < vec_num; i++)
4240 {
644ffefd 4241 unsigned align, misalign;
272c6793
RS
4242
4243 if (i > 0)
4244 /* Bump the vector pointer. */
4245 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4246 stmt, NULL_TREE);
4247
4248 if (slp)
9771b263 4249 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4250 else if (grouped_store)
4251 /* For grouped stores vectorized defs are interleaved in
272c6793 4252 vect_permute_store_chain(). */
9771b263 4253 vec_oprnd = result_chain[i];
272c6793
RS
4254
4255 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
4256 dataref_offset
4257 ? dataref_offset
4258 : build_int_cst (reference_alias_ptr_type
4259 (DR_REF (first_dr)), 0));
644ffefd 4260 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4261 if (aligned_access_p (first_dr))
644ffefd 4262 misalign = 0;
272c6793
RS
4263 else if (DR_MISALIGNMENT (first_dr) == -1)
4264 {
4265 TREE_TYPE (data_ref)
4266 = build_aligned_type (TREE_TYPE (data_ref),
4267 TYPE_ALIGN (elem_type));
644ffefd
MJ
4268 align = TYPE_ALIGN_UNIT (elem_type);
4269 misalign = 0;
272c6793
RS
4270 }
4271 else
4272 {
4273 TREE_TYPE (data_ref)
4274 = build_aligned_type (TREE_TYPE (data_ref),
4275 TYPE_ALIGN (elem_type));
644ffefd 4276 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4277 }
74bf76ed
JJ
4278 if (dataref_offset == NULL_TREE)
4279 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4280 misalign);
c2d7ab2a 4281
272c6793
RS
4282 /* Arguments are ready. Create the new vector stmt. */
4283 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4285
4286 if (slp)
4287 continue;
4288
e14c1050 4289 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4290 if (!next_stmt)
4291 break;
4292 }
ebfd146a 4293 }
1da0876c
RS
4294 if (!slp)
4295 {
4296 if (j == 0)
4297 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4298 else
4299 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4300 prev_stmt_info = vinfo_for_stmt (new_stmt);
4301 }
ebfd146a
IR
4302 }
4303
9771b263
DN
4304 dr_chain.release ();
4305 oprnds.release ();
4306 result_chain.release ();
4307 vec_oprnds.release ();
ebfd146a
IR
4308
4309 return true;
4310}
4311
aec7ae7d
JJ
4312/* Given a vector type VECTYPE and permutation SEL returns
4313 the VECTOR_CST mask that implements the permutation of the
4314 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4315
3fcc1b55
JJ
4316tree
4317vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4318{
d2a12ae7 4319 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4320 int i, nunits;
a1e53f3f 4321
22e4dee7 4322 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4323
4324 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4325 return NULL;
4326
96f9265a
RG
4327 mask_elt_type = lang_hooks.types.type_for_mode
4328 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4329 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4330
d2a12ae7 4331 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4332 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4333 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4334 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4335
2635892a 4336 return mask_vec;
a1e53f3f
L
4337}
4338
aec7ae7d
JJ
4339/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4340 reversal of the vector elements. If that is impossible to do,
4341 returns NULL. */
4342
4343static tree
4344perm_mask_for_reverse (tree vectype)
4345{
4346 int i, nunits;
4347 unsigned char *sel;
4348
4349 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4350 sel = XALLOCAVEC (unsigned char, nunits);
4351
4352 for (i = 0; i < nunits; ++i)
4353 sel[i] = nunits - 1 - i;
4354
3fcc1b55 4355 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4356}
4357
4358/* Given a vector variable X and Y, that was generated for the scalar
4359 STMT, generate instructions to permute the vector elements of X and Y
4360 using permutation mask MASK_VEC, insert them at *GSI and return the
4361 permuted vector variable. */
a1e53f3f
L
4362
4363static tree
aec7ae7d
JJ
4364permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4365 gimple_stmt_iterator *gsi)
a1e53f3f
L
4366{
4367 tree vectype = TREE_TYPE (x);
aec7ae7d 4368 tree perm_dest, data_ref;
a1e53f3f
L
4369 gimple perm_stmt;
4370
a1e53f3f 4371 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4372 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4373
4374 /* Generate the permute statement. */
73804b12
RG
4375 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4376 x, y, mask_vec);
a1e53f3f
L
4377 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4378
4379 return data_ref;
4380}
4381
ebfd146a
IR
4382/* vectorizable_load.
4383
b8698a0f
L
4384 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4385 can be vectorized.
4386 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4387 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4388 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4389
4390static bool
4391vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4392 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
4393{
4394 tree scalar_dest;
4395 tree vec_dest = NULL;
4396 tree data_ref = NULL;
4397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4398 stmt_vec_info prev_stmt_info;
ebfd146a 4399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4400 struct loop *loop = NULL;
ebfd146a 4401 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4402 bool nested_in_vect_loop = false;
c716e67f 4403 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 4404 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4405 tree elem_type;
ebfd146a 4406 tree new_temp;
947131ba 4407 enum machine_mode mode;
ebfd146a
IR
4408 gimple new_stmt = NULL;
4409 tree dummy;
4410 enum dr_alignment_support alignment_support_scheme;
4411 tree dataref_ptr = NULL_TREE;
74bf76ed 4412 tree dataref_offset = NULL_TREE;
fef4d2b3 4413 gimple ptr_incr = NULL;
ebfd146a
IR
4414 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4415 int ncopies;
a64b9c26 4416 int i, j, group_size, group_gap;
ebfd146a
IR
4417 tree msq = NULL_TREE, lsq;
4418 tree offset = NULL_TREE;
4419 tree realignment_token = NULL_TREE;
4420 gimple phi = NULL;
6e1aa848 4421 vec<tree> dr_chain = vNULL;
0d0293ac 4422 bool grouped_load = false;
272c6793 4423 bool load_lanes_p = false;
ebfd146a 4424 gimple first_stmt;
ebfd146a 4425 bool inv_p;
319e6439 4426 bool negative = false;
ebfd146a
IR
4427 bool compute_in_loop = false;
4428 struct loop *at_loop;
4429 int vec_num;
4430 bool slp = (slp_node != NULL);
4431 bool slp_perm = false;
4432 enum tree_code code;
a70d6342
IR
4433 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4434 int vf;
272c6793 4435 tree aggr_type;
aec7ae7d
JJ
4436 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4437 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4438 int gather_scale = 1;
4439 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4440
4441 if (loop_vinfo)
4442 {
4443 loop = LOOP_VINFO_LOOP (loop_vinfo);
4444 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4445 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4446 }
4447 else
3533e503 4448 vf = 1;
ebfd146a
IR
4449
4450 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4452 case of SLP. */
437f4a00 4453 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4454 ncopies = 1;
4455 else
4456 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4457
4458 gcc_assert (ncopies >= 1);
4459
4460 /* FORNOW. This restriction should be relaxed. */
4461 if (nested_in_vect_loop && ncopies > 1)
4462 {
73fbfcad 4463 if (dump_enabled_p ())
78c60e3d 4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4465 "multiple types in nested loop.\n");
ebfd146a
IR
4466 return false;
4467 }
4468
a70d6342 4469 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4470 return false;
4471
8644a673 4472 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4473 return false;
4474
4475 /* Is vectorizable load? */
4476 if (!is_gimple_assign (stmt))
4477 return false;
4478
4479 scalar_dest = gimple_assign_lhs (stmt);
4480 if (TREE_CODE (scalar_dest) != SSA_NAME)
4481 return false;
4482
4483 code = gimple_assign_rhs_code (stmt);
4484 if (code != ARRAY_REF
38000232 4485 && code != BIT_FIELD_REF
ebfd146a 4486 && code != INDIRECT_REF
e9dbe7bb
IR
4487 && code != COMPONENT_REF
4488 && code != IMAGPART_EXPR
70f34814 4489 && code != REALPART_EXPR
42373e0b
RG
4490 && code != MEM_REF
4491 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4492 return false;
4493
4494 if (!STMT_VINFO_DATA_REF (stmt_info))
4495 return false;
4496
7b7b1813 4497 elem_type = TREE_TYPE (vectype);
947131ba 4498 mode = TYPE_MODE (vectype);
ebfd146a
IR
4499
4500 /* FORNOW. In some cases can vectorize even if data-type not supported
4501 (e.g. - data copies). */
947131ba 4502 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4503 {
73fbfcad 4504 if (dump_enabled_p ())
78c60e3d 4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4506 "Aligned load, but unsupported type.\n");
ebfd146a
IR
4507 return false;
4508 }
4509
ebfd146a 4510 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4511 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4512 {
0d0293ac 4513 grouped_load = true;
ebfd146a 4514 /* FORNOW */
aec7ae7d 4515 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4516
e14c1050 4517 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4518 if (!slp && !PURE_SLP_STMT (stmt_info))
4519 {
e14c1050 4520 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4521 if (vect_load_lanes_supported (vectype, group_size))
4522 load_lanes_p = true;
0d0293ac 4523 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4524 return false;
4525 }
ebfd146a
IR
4526 }
4527
a1e53f3f 4528
aec7ae7d
JJ
4529 if (STMT_VINFO_GATHER_P (stmt_info))
4530 {
4531 gimple def_stmt;
4532 tree def;
4533 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4534 &gather_off, &gather_scale);
4535 gcc_assert (gather_decl);
24ee1384 4536 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4537 &def_stmt, &def, &gather_dt,
4538 &gather_off_vectype))
4539 {
73fbfcad 4540 if (dump_enabled_p ())
78c60e3d 4541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4542 "gather index use not simple.\n");
aec7ae7d
JJ
4543 return false;
4544 }
4545 }
7d75abc8 4546 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 4547 ;
319e6439
RG
4548 else
4549 {
4550 negative = tree_int_cst_compare (nested_in_vect_loop
4551 ? STMT_VINFO_DR_STEP (stmt_info)
4552 : DR_STEP (dr),
4553 size_zero_node) < 0;
4554 if (negative && ncopies > 1)
4555 {
73fbfcad 4556 if (dump_enabled_p ())
78c60e3d 4557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4558 "multiple types with negative step.\n");
319e6439
RG
4559 return false;
4560 }
4561
4562 if (negative)
4563 {
08940f33
RB
4564 if (grouped_load)
4565 {
4566 if (dump_enabled_p ())
4567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4568 "negative step for group load not supported"
4569 "\n");
08940f33
RB
4570 return false;
4571 }
319e6439
RG
4572 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4573 if (alignment_support_scheme != dr_aligned
4574 && alignment_support_scheme != dr_unaligned_supported)
4575 {
73fbfcad 4576 if (dump_enabled_p ())
78c60e3d 4577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4578 "negative step but alignment required.\n");
319e6439
RG
4579 return false;
4580 }
4581 if (!perm_mask_for_reverse (vectype))
4582 {
73fbfcad 4583 if (dump_enabled_p ())
78c60e3d 4584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4585 "negative step and reversing not supported."
4586 "\n");
319e6439
RG
4587 return false;
4588 }
4589 }
7d75abc8 4590 }
aec7ae7d 4591
ebfd146a
IR
4592 if (!vec_stmt) /* transformation not required. */
4593 {
4594 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4595 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4596 return true;
4597 }
4598
73fbfcad 4599 if (dump_enabled_p ())
78c60e3d 4600 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4601 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
4602
4603 /** Transform. **/
4604
c716e67f
XDL
4605 ensure_base_align (stmt_info, dr);
4606
aec7ae7d
JJ
4607 if (STMT_VINFO_GATHER_P (stmt_info))
4608 {
4609 tree vec_oprnd0 = NULL_TREE, op;
4610 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4611 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4612 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4613 edge pe = loop_preheader_edge (loop);
4614 gimple_seq seq;
4615 basic_block new_bb;
4616 enum { NARROW, NONE, WIDEN } modifier;
4617 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4618
4619 if (nunits == gather_off_nunits)
4620 modifier = NONE;
4621 else if (nunits == gather_off_nunits / 2)
4622 {
4623 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4624 modifier = WIDEN;
4625
4626 for (i = 0; i < gather_off_nunits; ++i)
4627 sel[i] = i | nunits;
4628
3fcc1b55 4629 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4630 gcc_assert (perm_mask != NULL_TREE);
4631 }
4632 else if (nunits == gather_off_nunits * 2)
4633 {
4634 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4635 modifier = NARROW;
4636
4637 for (i = 0; i < nunits; ++i)
4638 sel[i] = i < gather_off_nunits
4639 ? i : i + nunits - gather_off_nunits;
4640
3fcc1b55 4641 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4642 gcc_assert (perm_mask != NULL_TREE);
4643 ncopies *= 2;
4644 }
4645 else
4646 gcc_unreachable ();
4647
4648 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4649 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4650 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4651 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4652 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4653 scaletype = TREE_VALUE (arglist);
4654 gcc_checking_assert (types_compatible_p (srctype, rettype)
4655 && types_compatible_p (srctype, masktype));
4656
4657 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4658
4659 ptr = fold_convert (ptrtype, gather_base);
4660 if (!is_gimple_min_invariant (ptr))
4661 {
4662 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4663 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4664 gcc_assert (!new_bb);
4665 }
4666
4667 /* Currently we support only unconditional gather loads,
4668 so mask should be all ones. */
4669 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4670 mask = build_int_cst (TREE_TYPE (masktype), -1);
4671 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4672 {
4673 REAL_VALUE_TYPE r;
4674 long tmp[6];
4675 for (j = 0; j < 6; ++j)
4676 tmp[j] = -1;
4677 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4678 mask = build_real (TREE_TYPE (masktype), r);
4679 }
4680 else
4681 gcc_unreachable ();
4682 mask = build_vector_from_val (masktype, mask);
4683 mask = vect_init_vector (stmt, mask, masktype, NULL);
4684
4685 scale = build_int_cst (scaletype, gather_scale);
4686
4687 prev_stmt_info = NULL;
4688 for (j = 0; j < ncopies; ++j)
4689 {
4690 if (modifier == WIDEN && (j & 1))
4691 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4692 perm_mask, stmt, gsi);
4693 else if (j == 0)
4694 op = vec_oprnd0
4695 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4696 else
4697 op = vec_oprnd0
4698 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4699
4700 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4701 {
4702 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4703 == TYPE_VECTOR_SUBPARTS (idxtype));
4704 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4705 var = make_ssa_name (var, NULL);
4706 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4707 new_stmt
4708 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4709 op, NULL_TREE);
4710 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4711 op = var;
4712 }
4713
4714 new_stmt
4715 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4716
4717 if (!useless_type_conversion_p (vectype, rettype))
4718 {
4719 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4720 == TYPE_VECTOR_SUBPARTS (rettype));
4721 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4722 op = make_ssa_name (var, new_stmt);
4723 gimple_call_set_lhs (new_stmt, op);
4724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4725 var = make_ssa_name (vec_dest, NULL);
4726 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4727 new_stmt
4728 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4729 NULL_TREE);
4730 }
4731 else
4732 {
4733 var = make_ssa_name (vec_dest, new_stmt);
4734 gimple_call_set_lhs (new_stmt, var);
4735 }
4736
4737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4738
4739 if (modifier == NARROW)
4740 {
4741 if ((j & 1) == 0)
4742 {
4743 prev_res = var;
4744 continue;
4745 }
4746 var = permute_vec_elements (prev_res, var,
4747 perm_mask, stmt, gsi);
4748 new_stmt = SSA_NAME_DEF_STMT (var);
4749 }
4750
4751 if (prev_stmt_info == NULL)
4752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4753 else
4754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4755 prev_stmt_info = vinfo_for_stmt (new_stmt);
4756 }
4757 return true;
4758 }
7d75abc8
MM
4759 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4760 {
4761 gimple_stmt_iterator incr_gsi;
4762 bool insert_after;
4763 gimple incr;
4764 tree offvar;
7d75abc8
MM
4765 tree ivstep;
4766 tree running_off;
9771b263 4767 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 4768 gimple_seq stmts = NULL;
14ac6aa2
RB
4769 tree stride_base, stride_step, alias_off;
4770
4771 gcc_assert (!nested_in_vect_loop);
7d75abc8 4772
14ac6aa2
RB
4773 stride_base
4774 = fold_build_pointer_plus
4775 (unshare_expr (DR_BASE_ADDRESS (dr)),
4776 size_binop (PLUS_EXPR,
4777 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 4778 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 4779 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
4780
4781 /* For a load with loop-invariant (but other than power-of-2)
4782 stride (i.e. not a grouped access) like so:
4783
4784 for (i = 0; i < n; i += stride)
4785 ... = array[i];
4786
4787 we generate a new induction variable and new accesses to
4788 form a new vector (or vectors, depending on ncopies):
4789
4790 for (j = 0; ; j += VF*stride)
4791 tmp1 = array[j];
4792 tmp2 = array[j + stride];
4793 ...
4794 vectemp = {tmp1, tmp2, ...}
4795 */
4796
4797 ivstep = stride_step;
4798 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4799 build_int_cst (TREE_TYPE (ivstep), vf));
4800
4801 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4802
4803 create_iv (stride_base, ivstep, NULL,
4804 loop, &incr_gsi, insert_after,
4805 &offvar, NULL);
4806 incr = gsi_stmt (incr_gsi);
4807 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4808
4809 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4810 if (stmts)
4811 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4812
4813 prev_stmt_info = NULL;
4814 running_off = offvar;
14ac6aa2 4815 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
4816 for (j = 0; j < ncopies; j++)
4817 {
4818 tree vec_inv;
4819
9771b263 4820 vec_alloc (v, nunits);
7d75abc8
MM
4821 for (i = 0; i < nunits; i++)
4822 {
4823 tree newref, newoff;
4824 gimple incr;
14ac6aa2
RB
4825 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4826 running_off, alias_off);
7d75abc8
MM
4827
4828 newref = force_gimple_operand_gsi (gsi, newref, true,
4829 NULL_TREE, true,
4830 GSI_SAME_STMT);
4831 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4832 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
4833 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4834 running_off, stride_step);
7d75abc8
MM
4835 vect_finish_stmt_generation (stmt, incr, gsi);
4836
4837 running_off = newoff;
4838 }
4839
4840 vec_inv = build_constructor (vectype, v);
4841 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4842 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4843
4844 if (j == 0)
4845 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4846 else
4847 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4848 prev_stmt_info = vinfo_for_stmt (new_stmt);
4849 }
4850 return true;
4851 }
aec7ae7d 4852
0d0293ac 4853 if (grouped_load)
ebfd146a 4854 {
e14c1050 4855 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4856 if (slp
01d8bf07 4857 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
4858 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4859 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4860
ebfd146a 4861 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
4862 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4863 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4864 ??? But we can only do so if there is exactly one
4865 as we have no way to get at the rest. Leave the CSE
4866 opportunity alone.
4867 ??? With the group load eventually participating
4868 in multiple different permutations (having multiple
4869 slp nodes which refer to the same group) the CSE
4870 is even wrong code. See PR56270. */
4871 && !slp)
ebfd146a
IR
4872 {
4873 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4874 return true;
4875 }
4876 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4877 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4878
4879 /* VEC_NUM is the number of vect stmts to be created for this group. */
4880 if (slp)
4881 {
0d0293ac 4882 grouped_load = false;
ebfd146a 4883 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 4884 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 4885 slp_perm = true;
a64b9c26 4886 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 4887 }
ebfd146a 4888 else
a64b9c26
RB
4889 {
4890 vec_num = group_size;
4891 group_gap = 0;
4892 }
ebfd146a
IR
4893 }
4894 else
4895 {
4896 first_stmt = stmt;
4897 first_dr = dr;
4898 group_size = vec_num = 1;
a64b9c26 4899 group_gap = 0;
ebfd146a
IR
4900 }
4901
720f5239 4902 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4903 gcc_assert (alignment_support_scheme);
272c6793
RS
4904 /* Targets with load-lane instructions must not require explicit
4905 realignment. */
4906 gcc_assert (!load_lanes_p
4907 || alignment_support_scheme == dr_aligned
4908 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4909
4910 /* In case the vectorization factor (VF) is bigger than the number
4911 of elements that we can fit in a vectype (nunits), we have to generate
4912 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4913 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4914 from one copy of the vector stmt to the next, in the field
ff802fa1 4915 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4916 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4917 stmts that use the defs of the current stmt. The example below
4918 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4919 need to create 4 vectorized stmts):
ebfd146a
IR
4920
4921 before vectorization:
4922 RELATED_STMT VEC_STMT
4923 S1: x = memref - -
4924 S2: z = x + 1 - -
4925
4926 step 1: vectorize stmt S1:
4927 We first create the vector stmt VS1_0, and, as usual, record a
4928 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4929 Next, we create the vector stmt VS1_1, and record a pointer to
4930 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4931 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4932 stmts and pointers:
4933 RELATED_STMT VEC_STMT
4934 VS1_0: vx0 = memref0 VS1_1 -
4935 VS1_1: vx1 = memref1 VS1_2 -
4936 VS1_2: vx2 = memref2 VS1_3 -
4937 VS1_3: vx3 = memref3 - -
4938 S1: x = load - VS1_0
4939 S2: z = x + 1 - -
4940
b8698a0f
L
4941 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4942 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4943 stmt S2. */
4944
0d0293ac 4945 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4946
4947 S1: x2 = &base + 2
4948 S2: x0 = &base
4949 S3: x1 = &base + 1
4950 S4: x3 = &base + 3
4951
b8698a0f 4952 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4953 starting from the access of the first stmt of the chain:
4954
4955 VS1: vx0 = &base
4956 VS2: vx1 = &base + vec_size*1
4957 VS3: vx3 = &base + vec_size*2
4958 VS4: vx4 = &base + vec_size*3
4959
4960 Then permutation statements are generated:
4961
e2c83630
RH
4962 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4963 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4964 ...
4965
4966 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4967 (the order of the data-refs in the output of vect_permute_load_chain
4968 corresponds to the order of scalar stmts in the interleaving chain - see
4969 the documentation of vect_permute_load_chain()).
4970 The generation of permutation stmts and recording them in
0d0293ac 4971 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4972
b8698a0f 4973 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4974 permutation stmts above are created for every copy. The result vector
4975 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4976 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4977
4978 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4979 on a target that supports unaligned accesses (dr_unaligned_supported)
4980 we generate the following code:
4981 p = initial_addr;
4982 indx = 0;
4983 loop {
4984 p = p + indx * vectype_size;
4985 vec_dest = *(p);
4986 indx = indx + 1;
4987 }
4988
4989 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4990 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4991 then generate the following code, in which the data in each iteration is
4992 obtained by two vector loads, one from the previous iteration, and one
4993 from the current iteration:
4994 p1 = initial_addr;
4995 msq_init = *(floor(p1))
4996 p2 = initial_addr + VS - 1;
4997 realignment_token = call target_builtin;
4998 indx = 0;
4999 loop {
5000 p2 = p2 + indx * vectype_size
5001 lsq = *(floor(p2))
5002 vec_dest = realign_load (msq, lsq, realignment_token)
5003 indx = indx + 1;
5004 msq = lsq;
5005 } */
5006
5007 /* If the misalignment remains the same throughout the execution of the
5008 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 5009 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
5010 This can only occur when vectorizing memory accesses in the inner-loop
5011 nested within an outer-loop that is being vectorized. */
5012
d1e4b493 5013 if (nested_in_vect_loop
211bea38 5014 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
5015 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5016 {
5017 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5018 compute_in_loop = true;
5019 }
5020
5021 if ((alignment_support_scheme == dr_explicit_realign_optimized
5022 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 5023 && !compute_in_loop)
ebfd146a
IR
5024 {
5025 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5026 alignment_support_scheme, NULL_TREE,
5027 &at_loop);
5028 if (alignment_support_scheme == dr_explicit_realign_optimized)
5029 {
5030 phi = SSA_NAME_DEF_STMT (msq);
5031 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5032 }
5033 }
5034 else
5035 at_loop = loop;
5036
a1e53f3f
L
5037 if (negative)
5038 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5039
272c6793
RS
5040 if (load_lanes_p)
5041 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5042 else
5043 aggr_type = vectype;
5044
ebfd146a
IR
5045 prev_stmt_info = NULL;
5046 for (j = 0; j < ncopies; j++)
b8698a0f 5047 {
272c6793 5048 /* 1. Create the vector or array pointer update chain. */
ebfd146a 5049 if (j == 0)
74bf76ed
JJ
5050 {
5051 bool simd_lane_access_p
5052 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5053 if (simd_lane_access_p
5054 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5055 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5056 && integer_zerop (DR_OFFSET (first_dr))
5057 && integer_zerop (DR_INIT (first_dr))
5058 && alias_sets_conflict_p (get_alias_set (aggr_type),
5059 get_alias_set (DR_REF (first_dr)))
5060 && (alignment_support_scheme == dr_aligned
5061 || alignment_support_scheme == dr_unaligned_supported))
5062 {
5063 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5064 dataref_offset = build_int_cst (reference_alias_ptr_type
5065 (DR_REF (first_dr)), 0);
8928eff3 5066 inv_p = false;
74bf76ed
JJ
5067 }
5068 else
5069 dataref_ptr
5070 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5071 offset, &dummy, gsi, &ptr_incr,
5072 simd_lane_access_p, &inv_p);
5073 }
5074 else if (dataref_offset)
5075 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5076 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5077 else
272c6793
RS
5078 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5079 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 5080
0d0293ac 5081 if (grouped_load || slp_perm)
9771b263 5082 dr_chain.create (vec_num);
5ce1ee7f 5083
272c6793 5084 if (load_lanes_p)
ebfd146a 5085 {
272c6793
RS
5086 tree vec_array;
5087
5088 vec_array = create_vector_array (vectype, vec_num);
5089
5090 /* Emit:
5091 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5092 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5093 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5094 gimple_call_set_lhs (new_stmt, vec_array);
5095 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 5096
272c6793
RS
5097 /* Extract each vector into an SSA_NAME. */
5098 for (i = 0; i < vec_num; i++)
ebfd146a 5099 {
272c6793
RS
5100 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5101 vec_array, i);
9771b263 5102 dr_chain.quick_push (new_temp);
272c6793
RS
5103 }
5104
5105 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 5106 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
5107 }
5108 else
5109 {
5110 for (i = 0; i < vec_num; i++)
5111 {
5112 if (i > 0)
5113 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5114 stmt, NULL_TREE);
5115
5116 /* 2. Create the vector-load in the loop. */
5117 switch (alignment_support_scheme)
5118 {
5119 case dr_aligned:
5120 case dr_unaligned_supported:
be1ac4ec 5121 {
644ffefd
MJ
5122 unsigned int align, misalign;
5123
272c6793
RS
5124 data_ref
5125 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
5126 dataref_offset
5127 ? dataref_offset
5128 : build_int_cst (reference_alias_ptr_type
5129 (DR_REF (first_dr)), 0));
644ffefd 5130 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5131 if (alignment_support_scheme == dr_aligned)
5132 {
5133 gcc_assert (aligned_access_p (first_dr));
644ffefd 5134 misalign = 0;
272c6793
RS
5135 }
5136 else if (DR_MISALIGNMENT (first_dr) == -1)
5137 {
5138 TREE_TYPE (data_ref)
5139 = build_aligned_type (TREE_TYPE (data_ref),
5140 TYPE_ALIGN (elem_type));
644ffefd
MJ
5141 align = TYPE_ALIGN_UNIT (elem_type);
5142 misalign = 0;
272c6793
RS
5143 }
5144 else
5145 {
5146 TREE_TYPE (data_ref)
5147 = build_aligned_type (TREE_TYPE (data_ref),
5148 TYPE_ALIGN (elem_type));
644ffefd 5149 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5150 }
74bf76ed
JJ
5151 if (dataref_offset == NULL_TREE)
5152 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5153 align, misalign);
272c6793 5154 break;
be1ac4ec 5155 }
272c6793 5156 case dr_explicit_realign:
267d3070 5157 {
272c6793
RS
5158 tree ptr, bump;
5159 tree vs_minus_1;
5160
5161 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5162
5163 if (compute_in_loop)
5164 msq = vect_setup_realignment (first_stmt, gsi,
5165 &realignment_token,
5166 dr_explicit_realign,
5167 dataref_ptr, NULL);
5168
070ecdfd 5169 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5170 new_stmt = gimple_build_assign_with_ops
070ecdfd 5171 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5172 build_int_cst
5173 (TREE_TYPE (dataref_ptr),
5174 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5176 data_ref
5177 = build2 (MEM_REF, vectype, ptr,
5178 build_int_cst (reference_alias_ptr_type
5179 (DR_REF (first_dr)), 0));
5180 vec_dest = vect_create_destination_var (scalar_dest,
5181 vectype);
5182 new_stmt = gimple_build_assign (vec_dest, data_ref);
5183 new_temp = make_ssa_name (vec_dest, new_stmt);
5184 gimple_assign_set_lhs (new_stmt, new_temp);
5185 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5186 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5188 msq = new_temp;
5189
5190 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5191 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5192 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5193 new_stmt = gimple_build_assign_with_ops
5194 (BIT_AND_EXPR, NULL_TREE, ptr,
5195 build_int_cst
5196 (TREE_TYPE (ptr),
5197 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5198 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5199 gimple_assign_set_lhs (new_stmt, ptr);
5200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5201 data_ref
5202 = build2 (MEM_REF, vectype, ptr,
5203 build_int_cst (reference_alias_ptr_type
5204 (DR_REF (first_dr)), 0));
5205 break;
267d3070 5206 }
272c6793 5207 case dr_explicit_realign_optimized:
070ecdfd 5208 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5209 new_stmt = gimple_build_assign_with_ops
070ecdfd 5210 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5211 build_int_cst
5212 (TREE_TYPE (dataref_ptr),
5213 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5215 data_ref
5216 = build2 (MEM_REF, vectype, new_temp,
5217 build_int_cst (reference_alias_ptr_type
5218 (DR_REF (first_dr)), 0));
5219 break;
5220 default:
5221 gcc_unreachable ();
5222 }
ebfd146a 5223 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5224 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5225 new_temp = make_ssa_name (vec_dest, new_stmt);
5226 gimple_assign_set_lhs (new_stmt, new_temp);
5227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5228
272c6793
RS
5229 /* 3. Handle explicit realignment if necessary/supported.
5230 Create in loop:
5231 vec_dest = realign_load (msq, lsq, realignment_token) */
5232 if (alignment_support_scheme == dr_explicit_realign_optimized
5233 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5234 {
272c6793
RS
5235 lsq = gimple_assign_lhs (new_stmt);
5236 if (!realignment_token)
5237 realignment_token = dataref_ptr;
5238 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5239 new_stmt
73804b12
RG
5240 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5241 vec_dest, msq, lsq,
5242 realignment_token);
272c6793
RS
5243 new_temp = make_ssa_name (vec_dest, new_stmt);
5244 gimple_assign_set_lhs (new_stmt, new_temp);
5245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5246
5247 if (alignment_support_scheme == dr_explicit_realign_optimized)
5248 {
5249 gcc_assert (phi);
5250 if (i == vec_num - 1 && j == ncopies - 1)
5251 add_phi_arg (phi, lsq,
5252 loop_latch_edge (containing_loop),
9e227d60 5253 UNKNOWN_LOCATION);
272c6793
RS
5254 msq = lsq;
5255 }
ebfd146a 5256 }
ebfd146a 5257
59fd17e3
RB
5258 /* 4. Handle invariant-load. */
5259 if (inv_p && !bb_vinfo)
5260 {
5261 gimple_stmt_iterator gsi2 = *gsi;
5262 gcc_assert (!grouped_load);
5263 gsi_next (&gsi2);
5264 new_temp = vect_init_vector (stmt, scalar_dest,
5265 vectype, &gsi2);
5266 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5267 }
5268
272c6793
RS
5269 if (negative)
5270 {
aec7ae7d
JJ
5271 tree perm_mask = perm_mask_for_reverse (vectype);
5272 new_temp = permute_vec_elements (new_temp, new_temp,
5273 perm_mask, stmt, gsi);
ebfd146a
IR
5274 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5275 }
267d3070 5276
272c6793 5277 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5278 vect_transform_grouped_load (). */
5279 if (grouped_load || slp_perm)
9771b263 5280 dr_chain.quick_push (new_temp);
267d3070 5281
272c6793
RS
5282 /* Store vector loads in the corresponding SLP_NODE. */
5283 if (slp && !slp_perm)
9771b263 5284 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5285 }
a64b9c26
RB
5286 /* Bump the vector pointer to account for a gap. */
5287 if (slp && group_gap != 0)
5288 {
5289 tree bump = size_binop (MULT_EXPR,
5290 TYPE_SIZE_UNIT (elem_type),
5291 size_int (group_gap));
5292 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5293 stmt, bump);
5294 }
ebfd146a
IR
5295 }
5296
5297 if (slp && !slp_perm)
5298 continue;
5299
5300 if (slp_perm)
5301 {
01d8bf07 5302 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
5303 slp_node_instance, false))
5304 {
9771b263 5305 dr_chain.release ();
ebfd146a
IR
5306 return false;
5307 }
5308 }
5309 else
5310 {
0d0293ac 5311 if (grouped_load)
ebfd146a 5312 {
272c6793 5313 if (!load_lanes_p)
0d0293ac 5314 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5315 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5316 }
5317 else
5318 {
5319 if (j == 0)
5320 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5321 else
5322 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5323 prev_stmt_info = vinfo_for_stmt (new_stmt);
5324 }
5325 }
9771b263 5326 dr_chain.release ();
ebfd146a
IR
5327 }
5328
ebfd146a
IR
5329 return true;
5330}
5331
5332/* Function vect_is_simple_cond.
b8698a0f 5333
ebfd146a
IR
5334 Input:
5335 LOOP - the loop that is being vectorized.
5336 COND - Condition that is checked for simple use.
5337
e9e1d143
RG
5338 Output:
5339 *COMP_VECTYPE - the vector type for the comparison.
5340
ebfd146a
IR
5341 Returns whether a COND can be vectorized. Checks whether
5342 condition operands are supportable using vec_is_simple_use. */
5343
87aab9b2 5344static bool
24ee1384
IR
5345vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5346 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5347{
5348 tree lhs, rhs;
5349 tree def;
5350 enum vect_def_type dt;
e9e1d143 5351 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5352
5353 if (!COMPARISON_CLASS_P (cond))
5354 return false;
5355
5356 lhs = TREE_OPERAND (cond, 0);
5357 rhs = TREE_OPERAND (cond, 1);
5358
5359 if (TREE_CODE (lhs) == SSA_NAME)
5360 {
5361 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5362 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5363 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5364 return false;
5365 }
5366 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5367 && TREE_CODE (lhs) != FIXED_CST)
5368 return false;
5369
5370 if (TREE_CODE (rhs) == SSA_NAME)
5371 {
5372 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5373 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5374 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5375 return false;
5376 }
f7e531cf 5377 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5378 && TREE_CODE (rhs) != FIXED_CST)
5379 return false;
5380
e9e1d143 5381 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5382 return true;
5383}
5384
5385/* vectorizable_condition.
5386
b8698a0f
L
5387 Check if STMT is conditional modify expression that can be vectorized.
5388 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5389 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5390 at GSI.
5391
5392 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5393 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5394 else caluse if it is 2).
ebfd146a
IR
5395
5396 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5397
4bbe8262 5398bool
ebfd146a 5399vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5400 gimple *vec_stmt, tree reduc_def, int reduc_index,
5401 slp_tree slp_node)
ebfd146a
IR
5402{
5403 tree scalar_dest = NULL_TREE;
5404 tree vec_dest = NULL_TREE;
ebfd146a
IR
5405 tree cond_expr, then_clause, else_clause;
5406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5407 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5408 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5409 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5410 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5411 tree vec_compare, vec_cond_expr;
5412 tree new_temp;
5413 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5414 tree def;
a855b1b1 5415 enum vect_def_type dt, dts[4];
ebfd146a 5416 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5417 int ncopies;
ebfd146a 5418 enum tree_code code;
a855b1b1 5419 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5420 int i, j;
5421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
5422 vec<tree> vec_oprnds0 = vNULL;
5423 vec<tree> vec_oprnds1 = vNULL;
5424 vec<tree> vec_oprnds2 = vNULL;
5425 vec<tree> vec_oprnds3 = vNULL;
74946978 5426 tree vec_cmp_type;
b8698a0f 5427
f7e531cf
IR
5428 if (slp_node || PURE_SLP_STMT (stmt_info))
5429 ncopies = 1;
5430 else
5431 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5432
ebfd146a 5433 gcc_assert (ncopies >= 1);
a855b1b1 5434 if (reduc_index && ncopies > 1)
ebfd146a
IR
5435 return false; /* FORNOW */
5436
f7e531cf
IR
5437 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5438 return false;
5439
5440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5441 return false;
5442
4bbe8262
IR
5443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5444 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5445 && reduc_def))
ebfd146a
IR
5446 return false;
5447
ebfd146a 5448 /* FORNOW: not yet supported. */
b8698a0f 5449 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5450 {
73fbfcad 5451 if (dump_enabled_p ())
78c60e3d 5452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5453 "value used after loop.\n");
ebfd146a
IR
5454 return false;
5455 }
5456
5457 /* Is vectorizable conditional operation? */
5458 if (!is_gimple_assign (stmt))
5459 return false;
5460
5461 code = gimple_assign_rhs_code (stmt);
5462
5463 if (code != COND_EXPR)
5464 return false;
5465
4e71066d
RG
5466 cond_expr = gimple_assign_rhs1 (stmt);
5467 then_clause = gimple_assign_rhs2 (stmt);
5468 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5469
24ee1384
IR
5470 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5471 &comp_vectype)
e9e1d143 5472 || !comp_vectype)
ebfd146a
IR
5473 return false;
5474
5475 if (TREE_CODE (then_clause) == SSA_NAME)
5476 {
5477 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5478 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5479 &then_def_stmt, &def, &dt))
5480 return false;
5481 }
b8698a0f 5482 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5483 && TREE_CODE (then_clause) != REAL_CST
5484 && TREE_CODE (then_clause) != FIXED_CST)
5485 return false;
5486
5487 if (TREE_CODE (else_clause) == SSA_NAME)
5488 {
5489 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5490 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5491 &else_def_stmt, &def, &dt))
5492 return false;
5493 }
b8698a0f 5494 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5495 && TREE_CODE (else_clause) != REAL_CST
5496 && TREE_CODE (else_clause) != FIXED_CST)
5497 return false;
5498
74946978
MP
5499 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5500 /* The result of a vector comparison should be signed type. */
5501 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5502 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5503 if (vec_cmp_type == NULL_TREE)
5504 return false;
784fb9b3 5505
b8698a0f 5506 if (!vec_stmt)
ebfd146a
IR
5507 {
5508 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5509 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5510 }
5511
f7e531cf
IR
5512 /* Transform. */
5513
5514 if (!slp_node)
5515 {
9771b263
DN
5516 vec_oprnds0.create (1);
5517 vec_oprnds1.create (1);
5518 vec_oprnds2.create (1);
5519 vec_oprnds3.create (1);
f7e531cf 5520 }
ebfd146a
IR
5521
5522 /* Handle def. */
5523 scalar_dest = gimple_assign_lhs (stmt);
5524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5525
5526 /* Handle cond expr. */
a855b1b1
MM
5527 for (j = 0; j < ncopies; j++)
5528 {
f7e531cf 5529 gimple new_stmt = NULL;
a855b1b1
MM
5530 if (j == 0)
5531 {
f7e531cf
IR
5532 if (slp_node)
5533 {
07687835
TS
5534 stack_vec<tree, 4> ops;
5535 stack_vec<vec<tree>, 4> vec_defs;
9771b263 5536
9771b263
DN
5537 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5538 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5539 ops.safe_push (then_clause);
5540 ops.safe_push (else_clause);
f7e531cf 5541 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
5542 vec_oprnds3 = vec_defs.pop ();
5543 vec_oprnds2 = vec_defs.pop ();
5544 vec_oprnds1 = vec_defs.pop ();
5545 vec_oprnds0 = vec_defs.pop ();
f7e531cf 5546
9771b263
DN
5547 ops.release ();
5548 vec_defs.release ();
f7e531cf
IR
5549 }
5550 else
5551 {
5552 gimple gtemp;
5553 vec_cond_lhs =
a855b1b1
MM
5554 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5555 stmt, NULL);
24ee1384
IR
5556 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5557 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5558
5559 vec_cond_rhs =
5560 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5561 stmt, NULL);
24ee1384
IR
5562 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5563 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5564 if (reduc_index == 1)
5565 vec_then_clause = reduc_def;
5566 else
5567 {
5568 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5569 stmt, NULL);
24ee1384 5570 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5571 NULL, &gtemp, &def, &dts[2]);
5572 }
5573 if (reduc_index == 2)
5574 vec_else_clause = reduc_def;
5575 else
5576 {
5577 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5578 stmt, NULL);
24ee1384 5579 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5580 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5581 }
a855b1b1
MM
5582 }
5583 }
5584 else
5585 {
f7e531cf 5586 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5587 vec_oprnds0.pop ());
f7e531cf 5588 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5589 vec_oprnds1.pop ());
a855b1b1 5590 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5591 vec_oprnds2.pop ());
a855b1b1 5592 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5593 vec_oprnds3.pop ());
f7e531cf
IR
5594 }
5595
5596 if (!slp_node)
5597 {
9771b263
DN
5598 vec_oprnds0.quick_push (vec_cond_lhs);
5599 vec_oprnds1.quick_push (vec_cond_rhs);
5600 vec_oprnds2.quick_push (vec_then_clause);
5601 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5602 }
5603
9dc3f7de 5604 /* Arguments are ready. Create the new vector stmt. */
9771b263 5605 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5606 {
9771b263
DN
5607 vec_cond_rhs = vec_oprnds1[i];
5608 vec_then_clause = vec_oprnds2[i];
5609 vec_else_clause = vec_oprnds3[i];
a855b1b1 5610
784fb9b3
JJ
5611 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5612 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5613 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5614 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5615
f7e531cf
IR
5616 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5617 new_temp = make_ssa_name (vec_dest, new_stmt);
5618 gimple_assign_set_lhs (new_stmt, new_temp);
5619 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5620 if (slp_node)
9771b263 5621 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5622 }
5623
5624 if (slp_node)
5625 continue;
5626
5627 if (j == 0)
5628 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5629 else
5630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5631
5632 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5633 }
b8698a0f 5634
9771b263
DN
5635 vec_oprnds0.release ();
5636 vec_oprnds1.release ();
5637 vec_oprnds2.release ();
5638 vec_oprnds3.release ();
f7e531cf 5639
ebfd146a
IR
5640 return true;
5641}
5642
5643
8644a673 5644/* Make sure the statement is vectorizable. */
ebfd146a
IR
5645
5646bool
a70d6342 5647vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5648{
8644a673 5649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5650 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5651 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5652 bool ok;
a70d6342 5653 tree scalar_type, vectype;
363477c0
JJ
5654 gimple pattern_stmt;
5655 gimple_seq pattern_def_seq;
ebfd146a 5656
73fbfcad 5657 if (dump_enabled_p ())
ebfd146a 5658 {
78c60e3d
SS
5659 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5660 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5661 dump_printf (MSG_NOTE, "\n");
8644a673 5662 }
ebfd146a 5663
1825a1f3 5664 if (gimple_has_volatile_ops (stmt))
b8698a0f 5665 {
73fbfcad 5666 if (dump_enabled_p ())
78c60e3d 5667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5668 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
5669
5670 return false;
5671 }
b8698a0f
L
5672
5673 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5674 to include:
5675 - the COND_EXPR which is the loop exit condition
5676 - any LABEL_EXPRs in the loop
b8698a0f 5677 - computations that are used only for array indexing or loop control.
8644a673 5678 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5679 instance, therefore, all the statements are relevant.
ebfd146a 5680
d092494c 5681 Pattern statement needs to be analyzed instead of the original statement
83197f37 5682 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5683 statements. In basic blocks we are called from some SLP instance
5684 traversal, don't analyze pattern stmts instead, the pattern stmts
5685 already will be part of SLP instance. */
83197f37
IR
5686
5687 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5688 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5689 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5690 {
9d5e7640 5691 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5692 && pattern_stmt
9d5e7640
IR
5693 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5694 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5695 {
83197f37 5696 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5697 stmt = pattern_stmt;
5698 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5699 if (dump_enabled_p ())
9d5e7640 5700 {
78c60e3d
SS
5701 dump_printf_loc (MSG_NOTE, vect_location,
5702 "==> examining pattern statement: ");
5703 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5704 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
5705 }
5706 }
5707 else
5708 {
73fbfcad 5709 if (dump_enabled_p ())
e645e942 5710 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 5711
9d5e7640
IR
5712 return true;
5713 }
8644a673 5714 }
83197f37 5715 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5716 && node == NULL
83197f37
IR
5717 && pattern_stmt
5718 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5719 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5720 {
5721 /* Analyze PATTERN_STMT too. */
73fbfcad 5722 if (dump_enabled_p ())
83197f37 5723 {
78c60e3d
SS
5724 dump_printf_loc (MSG_NOTE, vect_location,
5725 "==> examining pattern statement: ");
5726 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 5727 dump_printf (MSG_NOTE, "\n");
83197f37
IR
5728 }
5729
5730 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5731 return false;
5732 }
ebfd146a 5733
1107f3ae 5734 if (is_pattern_stmt_p (stmt_info)
079c527f 5735 && node == NULL
363477c0 5736 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5737 {
363477c0 5738 gimple_stmt_iterator si;
1107f3ae 5739
363477c0
JJ
5740 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5741 {
5742 gimple pattern_def_stmt = gsi_stmt (si);
5743 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5744 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5745 {
5746 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5747 if (dump_enabled_p ())
363477c0 5748 {
78c60e3d
SS
5749 dump_printf_loc (MSG_NOTE, vect_location,
5750 "==> examining pattern def statement: ");
5751 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 5752 dump_printf (MSG_NOTE, "\n");
363477c0 5753 }
1107f3ae 5754
363477c0
JJ
5755 if (!vect_analyze_stmt (pattern_def_stmt,
5756 need_to_vectorize, node))
5757 return false;
5758 }
5759 }
5760 }
1107f3ae 5761
8644a673
IR
5762 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5763 {
5764 case vect_internal_def:
5765 break;
ebfd146a 5766
8644a673 5767 case vect_reduction_def:
7c5222ff 5768 case vect_nested_cycle:
a70d6342 5769 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5770 || relevance == vect_used_in_outer_by_reduction
a70d6342 5771 || relevance == vect_unused_in_scope));
8644a673
IR
5772 break;
5773
5774 case vect_induction_def:
5775 case vect_constant_def:
5776 case vect_external_def:
5777 case vect_unknown_def_type:
5778 default:
5779 gcc_unreachable ();
5780 }
ebfd146a 5781
a70d6342
IR
5782 if (bb_vinfo)
5783 {
5784 gcc_assert (PURE_SLP_STMT (stmt_info));
5785
b690cc0f 5786 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5787 if (dump_enabled_p ())
a70d6342 5788 {
78c60e3d
SS
5789 dump_printf_loc (MSG_NOTE, vect_location,
5790 "get vectype for scalar type: ");
5791 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 5792 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5793 }
5794
5795 vectype = get_vectype_for_scalar_type (scalar_type);
5796 if (!vectype)
5797 {
73fbfcad 5798 if (dump_enabled_p ())
a70d6342 5799 {
78c60e3d
SS
5800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5801 "not SLPed: unsupported data-type ");
5802 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5803 scalar_type);
e645e942 5804 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
5805 }
5806 return false;
5807 }
5808
73fbfcad 5809 if (dump_enabled_p ())
a70d6342 5810 {
78c60e3d
SS
5811 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5812 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 5813 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
5814 }
5815
5816 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5817 }
5818
8644a673 5819 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5820 {
8644a673
IR
5821 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5822 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5823 *need_to_vectorize = true;
ebfd146a
IR
5824 }
5825
8644a673 5826 ok = true;
b8698a0f 5827 if (!bb_vinfo
a70d6342
IR
5828 && (STMT_VINFO_RELEVANT_P (stmt_info)
5829 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5830 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5831 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5832 || vectorizable_operation (stmt, NULL, NULL, NULL)
5833 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5834 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5835 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5836 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5837 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5838 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5839 else
5840 {
5841 if (bb_vinfo)
4a00c761
JJ
5842 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5843 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5844 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5845 || vectorizable_assignment (stmt, NULL, NULL, node)
5846 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5847 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5848 || vectorizable_store (stmt, NULL, NULL, node)
5849 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5850 }
8644a673
IR
5851
5852 if (!ok)
ebfd146a 5853 {
73fbfcad 5854 if (dump_enabled_p ())
8644a673 5855 {
78c60e3d
SS
5856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5857 "not vectorized: relevant stmt not ");
5858 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5859 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5860 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5861 }
b8698a0f 5862
ebfd146a
IR
5863 return false;
5864 }
5865
a70d6342
IR
5866 if (bb_vinfo)
5867 return true;
5868
8644a673
IR
5869 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5870 need extra handling, except for vectorizable reductions. */
5871 if (STMT_VINFO_LIVE_P (stmt_info)
5872 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5873 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5874
8644a673 5875 if (!ok)
ebfd146a 5876 {
73fbfcad 5877 if (dump_enabled_p ())
8644a673 5878 {
78c60e3d
SS
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "not vectorized: live stmt not ");
5881 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5882 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 5883 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 5884 }
b8698a0f 5885
8644a673 5886 return false;
ebfd146a
IR
5887 }
5888
ebfd146a
IR
5889 return true;
5890}
5891
5892
5893/* Function vect_transform_stmt.
5894
5895 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5896
5897bool
5898vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5899 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5900 slp_instance slp_node_instance)
5901{
5902 bool is_store = false;
5903 gimple vec_stmt = NULL;
5904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5905 bool done;
ebfd146a
IR
5906
5907 switch (STMT_VINFO_TYPE (stmt_info))
5908 {
5909 case type_demotion_vec_info_type:
ebfd146a 5910 case type_promotion_vec_info_type:
ebfd146a
IR
5911 case type_conversion_vec_info_type:
5912 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5913 gcc_assert (done);
5914 break;
5915
5916 case induc_vec_info_type:
5917 gcc_assert (!slp_node);
5918 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5919 gcc_assert (done);
5920 break;
5921
9dc3f7de
IR
5922 case shift_vec_info_type:
5923 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5924 gcc_assert (done);
5925 break;
5926
ebfd146a
IR
5927 case op_vec_info_type:
5928 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5929 gcc_assert (done);
5930 break;
5931
5932 case assignment_vec_info_type:
5933 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5934 gcc_assert (done);
5935 break;
5936
5937 case load_vec_info_type:
b8698a0f 5938 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5939 slp_node_instance);
5940 gcc_assert (done);
5941 break;
5942
5943 case store_vec_info_type:
5944 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5945 gcc_assert (done);
0d0293ac 5946 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5947 {
5948 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5949 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5950 one are skipped, and there vec_stmt_info shouldn't be freed
5951 meanwhile. */
0d0293ac 5952 *grouped_store = true;
ebfd146a
IR
5953 if (STMT_VINFO_VEC_STMT (stmt_info))
5954 is_store = true;
5955 }
5956 else
5957 is_store = true;
5958 break;
5959
5960 case condition_vec_info_type:
f7e531cf 5961 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5962 gcc_assert (done);
5963 break;
5964
5965 case call_vec_info_type:
190c2236 5966 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5967 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5968 break;
5969
5970 case reduc_vec_info_type:
b5aeb3bb 5971 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5972 gcc_assert (done);
5973 break;
5974
5975 default:
5976 if (!STMT_VINFO_LIVE_P (stmt_info))
5977 {
73fbfcad 5978 if (dump_enabled_p ())
78c60e3d 5979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5980 "stmt not supported.\n");
ebfd146a
IR
5981 gcc_unreachable ();
5982 }
5983 }
5984
5985 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5986 is being vectorized, but outside the immediately enclosing loop. */
5987 if (vec_stmt
a70d6342
IR
5988 && STMT_VINFO_LOOP_VINFO (stmt_info)
5989 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5990 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5991 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5992 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5993 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5994 vect_used_in_outer_by_reduction))
ebfd146a 5995 {
a70d6342
IR
5996 struct loop *innerloop = LOOP_VINFO_LOOP (
5997 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5998 imm_use_iterator imm_iter;
5999 use_operand_p use_p;
6000 tree scalar_dest;
6001 gimple exit_phi;
6002
73fbfcad 6003 if (dump_enabled_p ())
78c60e3d 6004 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6005 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
6006
6007 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6008 (to be used when vectorizing outer-loop stmts that use the DEF of
6009 STMT). */
6010 if (gimple_code (stmt) == GIMPLE_PHI)
6011 scalar_dest = PHI_RESULT (stmt);
6012 else
6013 scalar_dest = gimple_assign_lhs (stmt);
6014
6015 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6016 {
6017 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6018 {
6019 exit_phi = USE_STMT (use_p);
6020 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6021 }
6022 }
6023 }
6024
6025 /* Handle stmts whose DEF is used outside the loop-nest that is
6026 being vectorized. */
6027 if (STMT_VINFO_LIVE_P (stmt_info)
6028 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6029 {
6030 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6031 gcc_assert (done);
6032 }
6033
6034 if (vec_stmt)
83197f37 6035 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 6036
b8698a0f 6037 return is_store;
ebfd146a
IR
6038}
6039
6040
b8698a0f 6041/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
6042 stmt_vec_info. */
6043
6044void
6045vect_remove_stores (gimple first_stmt)
6046{
6047 gimple next = first_stmt;
6048 gimple tmp;
6049 gimple_stmt_iterator next_si;
6050
6051 while (next)
6052 {
78048b1c
JJ
6053 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6054
6055 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6056 if (is_pattern_stmt_p (stmt_info))
6057 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
6058 /* Free the attached stmt_vec_info and remove the stmt. */
6059 next_si = gsi_for_stmt (next);
3d3f2249 6060 unlink_stmt_vdef (next);
ebfd146a 6061 gsi_remove (&next_si, true);
3d3f2249 6062 release_defs (next);
ebfd146a
IR
6063 free_stmt_vec_info (next);
6064 next = tmp;
6065 }
6066}
6067
6068
6069/* Function new_stmt_vec_info.
6070
6071 Create and initialize a new stmt_vec_info struct for STMT. */
6072
6073stmt_vec_info
b8698a0f 6074new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6075 bb_vec_info bb_vinfo)
ebfd146a
IR
6076{
6077 stmt_vec_info res;
6078 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6079
6080 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6081 STMT_VINFO_STMT (res) = stmt;
6082 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 6083 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 6084 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
6085 STMT_VINFO_LIVE_P (res) = false;
6086 STMT_VINFO_VECTYPE (res) = NULL;
6087 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 6088 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
6089 STMT_VINFO_IN_PATTERN_P (res) = false;
6090 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 6091 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
6092 STMT_VINFO_DATA_REF (res) = NULL;
6093
6094 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6095 STMT_VINFO_DR_OFFSET (res) = NULL;
6096 STMT_VINFO_DR_INIT (res) = NULL;
6097 STMT_VINFO_DR_STEP (res) = NULL;
6098 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6099
6100 if (gimple_code (stmt) == GIMPLE_PHI
6101 && is_loop_header_bb_p (gimple_bb (stmt)))
6102 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6103 else
8644a673
IR
6104 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6105
9771b263 6106 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 6107 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
6108 GROUP_FIRST_ELEMENT (res) = NULL;
6109 GROUP_NEXT_ELEMENT (res) = NULL;
6110 GROUP_SIZE (res) = 0;
6111 GROUP_STORE_COUNT (res) = 0;
6112 GROUP_GAP (res) = 0;
6113 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
6114
6115 return res;
6116}
6117
6118
6119/* Create a hash table for stmt_vec_info. */
6120
6121void
6122init_stmt_vec_info_vec (void)
6123{
9771b263
DN
6124 gcc_assert (!stmt_vec_info_vec.exists ());
6125 stmt_vec_info_vec.create (50);
ebfd146a
IR
6126}
6127
6128
6129/* Free hash table for stmt_vec_info. */
6130
6131void
6132free_stmt_vec_info_vec (void)
6133{
93675444
JJ
6134 unsigned int i;
6135 vec_void_p info;
6136 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6137 if (info != NULL)
6138 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
6139 gcc_assert (stmt_vec_info_vec.exists ());
6140 stmt_vec_info_vec.release ();
ebfd146a
IR
6141}
6142
6143
6144/* Free stmt vectorization related info. */
6145
6146void
6147free_stmt_vec_info (gimple stmt)
6148{
6149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6150
6151 if (!stmt_info)
6152 return;
6153
78048b1c
JJ
6154 /* Check if this statement has a related "pattern stmt"
6155 (introduced by the vectorizer during the pattern recognition
6156 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6157 too. */
6158 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6159 {
6160 stmt_vec_info patt_info
6161 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6162 if (patt_info)
6163 {
363477c0
JJ
6164 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6165 if (seq)
6166 {
6167 gimple_stmt_iterator si;
6168 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6169 free_stmt_vec_info (gsi_stmt (si));
6170 }
78048b1c
JJ
6171 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6172 }
6173 }
6174
9771b263 6175 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6176 set_vinfo_for_stmt (stmt, NULL);
6177 free (stmt_info);
6178}
6179
6180
bb67d9c7 6181/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6182
bb67d9c7 6183 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6184 by the target. */
6185
bb67d9c7
RG
6186static tree
6187get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6188{
6189 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6190 enum machine_mode simd_mode;
2f816591 6191 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6192 int nunits;
6193 tree vectype;
6194
cc4b5170 6195 if (nbytes == 0)
ebfd146a
IR
6196 return NULL_TREE;
6197
48f2e373
RB
6198 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6199 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6200 return NULL_TREE;
6201
7b7b1813
RG
6202 /* For vector types of elements whose mode precision doesn't
6203 match their types precision we use a element type of mode
6204 precision. The vectorization routines will have to make sure
48f2e373
RB
6205 they support the proper result truncation/extension.
6206 We also make sure to build vector types with INTEGER_TYPE
6207 component type only. */
6d7971b8 6208 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6209 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6210 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6211 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6212 TYPE_UNSIGNED (scalar_type));
6d7971b8 6213
ccbf5bb4
RG
6214 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6215 When the component mode passes the above test simply use a type
6216 corresponding to that mode. The theory is that any use that
6217 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 6218 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 6219 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
6220 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6221
6222 /* We can't build a vector type of elements with alignment bigger than
6223 their size. */
dfc2e2ac 6224 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
6225 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6226 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 6227
dfc2e2ac
RB
6228 /* If we felt back to using the mode fail if there was
6229 no scalar type for it. */
6230 if (scalar_type == NULL_TREE)
6231 return NULL_TREE;
6232
bb67d9c7
RG
6233 /* If no size was supplied use the mode the target prefers. Otherwise
6234 lookup a vector mode of the specified size. */
6235 if (size == 0)
6236 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6237 else
6238 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6239 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6240 if (nunits <= 1)
6241 return NULL_TREE;
ebfd146a
IR
6242
6243 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
6244
6245 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6246 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 6247 return NULL_TREE;
ebfd146a
IR
6248
6249 return vectype;
6250}
6251
bb67d9c7
RG
6252unsigned int current_vector_size;
6253
6254/* Function get_vectype_for_scalar_type.
6255
6256 Returns the vector type corresponding to SCALAR_TYPE as supported
6257 by the target. */
6258
6259tree
6260get_vectype_for_scalar_type (tree scalar_type)
6261{
6262 tree vectype;
6263 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6264 current_vector_size);
6265 if (vectype
6266 && current_vector_size == 0)
6267 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6268 return vectype;
6269}
6270
b690cc0f
RG
6271/* Function get_same_sized_vectype
6272
6273 Returns a vector type corresponding to SCALAR_TYPE of size
6274 VECTOR_TYPE if supported by the target. */
6275
6276tree
bb67d9c7 6277get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6278{
bb67d9c7
RG
6279 return get_vectype_for_scalar_type_and_size
6280 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6281}
6282
ebfd146a
IR
6283/* Function vect_is_simple_use.
6284
6285 Input:
a70d6342
IR
6286 LOOP_VINFO - the vect info of the loop that is being vectorized.
6287 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6288 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6289 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6290
6291 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6292 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6293 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6294 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6295 is the case in reduction/induction computations).
6296 For basic blocks, supportable operands are constants and bb invariants.
6297 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6298
6299bool
24ee1384 6300vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6301 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6302 tree *def, enum vect_def_type *dt)
b8698a0f 6303{
ebfd146a
IR
6304 basic_block bb;
6305 stmt_vec_info stmt_vinfo;
a70d6342 6306 struct loop *loop = NULL;
b8698a0f 6307
a70d6342
IR
6308 if (loop_vinfo)
6309 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6310
6311 *def_stmt = NULL;
6312 *def = NULL_TREE;
b8698a0f 6313
73fbfcad 6314 if (dump_enabled_p ())
ebfd146a 6315 {
78c60e3d
SS
6316 dump_printf_loc (MSG_NOTE, vect_location,
6317 "vect_is_simple_use: operand ");
6318 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 6319 dump_printf (MSG_NOTE, "\n");
ebfd146a 6320 }
b8698a0f 6321
b758f602 6322 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6323 {
6324 *dt = vect_constant_def;
6325 return true;
6326 }
b8698a0f 6327
ebfd146a
IR
6328 if (is_gimple_min_invariant (operand))
6329 {
6330 *def = operand;
8644a673 6331 *dt = vect_external_def;
ebfd146a
IR
6332 return true;
6333 }
6334
6335 if (TREE_CODE (operand) == PAREN_EXPR)
6336 {
73fbfcad 6337 if (dump_enabled_p ())
e645e942 6338 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
6339 operand = TREE_OPERAND (operand, 0);
6340 }
b8698a0f 6341
ebfd146a
IR
6342 if (TREE_CODE (operand) != SSA_NAME)
6343 {
73fbfcad 6344 if (dump_enabled_p ())
78c60e3d 6345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6346 "not ssa-name.\n");
ebfd146a
IR
6347 return false;
6348 }
b8698a0f 6349
ebfd146a
IR
6350 *def_stmt = SSA_NAME_DEF_STMT (operand);
6351 if (*def_stmt == NULL)
6352 {
73fbfcad 6353 if (dump_enabled_p ())
78c60e3d 6354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6355 "no def_stmt.\n");
ebfd146a
IR
6356 return false;
6357 }
6358
73fbfcad 6359 if (dump_enabled_p ())
ebfd146a 6360 {
78c60e3d
SS
6361 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6362 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 6363 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
6364 }
6365
8644a673 6366 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6367 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6368 if (gimple_nop_p (*def_stmt))
6369 {
6370 *def = operand;
8644a673 6371 *dt = vect_external_def;
ebfd146a
IR
6372 return true;
6373 }
6374
6375 bb = gimple_bb (*def_stmt);
a70d6342
IR
6376
6377 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6378 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6379 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6380 *dt = vect_external_def;
ebfd146a
IR
6381 else
6382 {
6383 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6384 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6385 }
6386
24ee1384
IR
6387 if (*dt == vect_unknown_def_type
6388 || (stmt
6389 && *dt == vect_double_reduction_def
6390 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6391 {
73fbfcad 6392 if (dump_enabled_p ())
78c60e3d 6393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6394 "Unsupported pattern.\n");
ebfd146a
IR
6395 return false;
6396 }
6397
73fbfcad 6398 if (dump_enabled_p ())
e645e942 6399 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
6400
6401 switch (gimple_code (*def_stmt))
6402 {
6403 case GIMPLE_PHI:
6404 *def = gimple_phi_result (*def_stmt);
6405 break;
6406
6407 case GIMPLE_ASSIGN:
6408 *def = gimple_assign_lhs (*def_stmt);
6409 break;
6410
6411 case GIMPLE_CALL:
6412 *def = gimple_call_lhs (*def_stmt);
6413 if (*def != NULL)
6414 break;
6415 /* FALLTHRU */
6416 default:
73fbfcad 6417 if (dump_enabled_p ())
78c60e3d 6418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6419 "unsupported defining stmt:\n");
ebfd146a
IR
6420 return false;
6421 }
6422
6423 return true;
6424}
6425
b690cc0f
RG
6426/* Function vect_is_simple_use_1.
6427
6428 Same as vect_is_simple_use_1 but also determines the vector operand
6429 type of OPERAND and stores it to *VECTYPE. If the definition of
6430 OPERAND is vect_uninitialized_def, vect_constant_def or
6431 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6432 is responsible to compute the best suited vector type for the
6433 scalar operand. */
6434
6435bool
24ee1384 6436vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6437 bb_vec_info bb_vinfo, gimple *def_stmt,
6438 tree *def, enum vect_def_type *dt, tree *vectype)
6439{
24ee1384
IR
6440 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6441 def, dt))
b690cc0f
RG
6442 return false;
6443
6444 /* Now get a vector type if the def is internal, otherwise supply
6445 NULL_TREE and leave it up to the caller to figure out a proper
6446 type for the use stmt. */
6447 if (*dt == vect_internal_def
6448 || *dt == vect_induction_def
6449 || *dt == vect_reduction_def
6450 || *dt == vect_double_reduction_def
6451 || *dt == vect_nested_cycle)
6452 {
6453 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6454
6455 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6456 && !STMT_VINFO_RELEVANT (stmt_info)
6457 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6458 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6459
b690cc0f
RG
6460 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6461 gcc_assert (*vectype != NULL_TREE);
6462 }
6463 else if (*dt == vect_uninitialized_def
6464 || *dt == vect_constant_def
6465 || *dt == vect_external_def)
6466 *vectype = NULL_TREE;
6467 else
6468 gcc_unreachable ();
6469
6470 return true;
6471}
6472
ebfd146a
IR
6473
6474/* Function supportable_widening_operation
6475
b8698a0f
L
6476 Check whether an operation represented by the code CODE is a
6477 widening operation that is supported by the target platform in
b690cc0f
RG
6478 vector form (i.e., when operating on arguments of type VECTYPE_IN
6479 producing a result of type VECTYPE_OUT).
b8698a0f 6480
ebfd146a
IR
6481 Widening operations we currently support are NOP (CONVERT), FLOAT
6482 and WIDEN_MULT. This function checks if these operations are supported
6483 by the target platform either directly (via vector tree-codes), or via
6484 target builtins.
6485
6486 Output:
b8698a0f
L
6487 - CODE1 and CODE2 are codes of vector operations to be used when
6488 vectorizing the operation, if available.
ebfd146a
IR
6489 - MULTI_STEP_CVT determines the number of required intermediate steps in
6490 case of multi-step conversion (like char->short->int - in that case
6491 MULTI_STEP_CVT will be 1).
b8698a0f
L
6492 - INTERM_TYPES contains the intermediate type required to perform the
6493 widening operation (short in the above example). */
ebfd146a
IR
6494
6495bool
b690cc0f
RG
6496supportable_widening_operation (enum tree_code code, gimple stmt,
6497 tree vectype_out, tree vectype_in,
ebfd146a
IR
6498 enum tree_code *code1, enum tree_code *code2,
6499 int *multi_step_cvt,
9771b263 6500 vec<tree> *interm_types)
ebfd146a
IR
6501{
6502 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6503 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6504 struct loop *vect_loop = NULL;
ebfd146a 6505 enum machine_mode vec_mode;
81f40b79 6506 enum insn_code icode1, icode2;
ebfd146a 6507 optab optab1, optab2;
b690cc0f
RG
6508 tree vectype = vectype_in;
6509 tree wide_vectype = vectype_out;
ebfd146a 6510 enum tree_code c1, c2;
4a00c761
JJ
6511 int i;
6512 tree prev_type, intermediate_type;
6513 enum machine_mode intermediate_mode, prev_mode;
6514 optab optab3, optab4;
ebfd146a 6515
4a00c761 6516 *multi_step_cvt = 0;
4ef69dfc
IR
6517 if (loop_info)
6518 vect_loop = LOOP_VINFO_LOOP (loop_info);
6519
ebfd146a
IR
6520 switch (code)
6521 {
6522 case WIDEN_MULT_EXPR:
6ae6116f
RH
6523 /* The result of a vectorized widening operation usually requires
6524 two vectors (because the widened results do not fit into one vector).
6525 The generated vector results would normally be expected to be
6526 generated in the same order as in the original scalar computation,
6527 i.e. if 8 results are generated in each vector iteration, they are
6528 to be organized as follows:
6529 vect1: [res1,res2,res3,res4],
6530 vect2: [res5,res6,res7,res8].
6531
6532 However, in the special case that the result of the widening
6533 operation is used in a reduction computation only, the order doesn't
6534 matter (because when vectorizing a reduction we change the order of
6535 the computation). Some targets can take advantage of this and
6536 generate more efficient code. For example, targets like Altivec,
6537 that support widen_mult using a sequence of {mult_even,mult_odd}
6538 generate the following vectors:
6539 vect1: [res1,res3,res5,res7],
6540 vect2: [res2,res4,res6,res8].
6541
6542 When vectorizing outer-loops, we execute the inner-loop sequentially
6543 (each vectorized inner-loop iteration contributes to VF outer-loop
6544 iterations in parallel). We therefore don't allow to change the
6545 order of the computation in the inner-loop during outer-loop
6546 vectorization. */
6547 /* TODO: Another case in which order doesn't *really* matter is when we
6548 widen and then contract again, e.g. (short)((int)x * y >> 8).
6549 Normally, pack_trunc performs an even/odd permute, whereas the
6550 repack from an even/odd expansion would be an interleave, which
6551 would be significantly simpler for e.g. AVX2. */
6552 /* In any case, in order to avoid duplicating the code below, recurse
6553 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6554 are properly set up for the caller. If we fail, we'll continue with
6555 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6556 if (vect_loop
6557 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6558 && !nested_in_vect_loop_p (vect_loop, stmt)
6559 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6560 stmt, vectype_out, vectype_in,
a86ec597
RH
6561 code1, code2, multi_step_cvt,
6562 interm_types))
6ae6116f 6563 return true;
4a00c761
JJ
6564 c1 = VEC_WIDEN_MULT_LO_EXPR;
6565 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6566 break;
6567
6ae6116f
RH
6568 case VEC_WIDEN_MULT_EVEN_EXPR:
6569 /* Support the recursion induced just above. */
6570 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6571 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6572 break;
6573
36ba4aae 6574 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6575 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6576 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6577 break;
6578
ebfd146a 6579 CASE_CONVERT:
4a00c761
JJ
6580 c1 = VEC_UNPACK_LO_EXPR;
6581 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6582 break;
6583
6584 case FLOAT_EXPR:
4a00c761
JJ
6585 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6586 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6587 break;
6588
6589 case FIX_TRUNC_EXPR:
6590 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6591 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6592 computing the operation. */
6593 return false;
6594
6595 default:
6596 gcc_unreachable ();
6597 }
6598
6ae6116f 6599 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6600 {
6601 enum tree_code ctmp = c1;
6602 c1 = c2;
6603 c2 = ctmp;
6604 }
6605
ebfd146a
IR
6606 if (code == FIX_TRUNC_EXPR)
6607 {
6608 /* The signedness is determined from output operand. */
b690cc0f
RG
6609 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6610 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6611 }
6612 else
6613 {
6614 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6615 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6616 }
6617
6618 if (!optab1 || !optab2)
6619 return false;
6620
6621 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6622 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6623 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6624 return false;
6625
4a00c761
JJ
6626 *code1 = c1;
6627 *code2 = c2;
6628
6629 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6630 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6631 return true;
6632
b8698a0f 6633 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6634 types. */
ebfd146a 6635
4a00c761
JJ
6636 prev_type = vectype;
6637 prev_mode = vec_mode;
b8698a0f 6638
4a00c761
JJ
6639 if (!CONVERT_EXPR_CODE_P (code))
6640 return false;
b8698a0f 6641
4a00c761
JJ
6642 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6643 intermediate steps in promotion sequence. We try
6644 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6645 not. */
9771b263 6646 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6647 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6648 {
6649 intermediate_mode = insn_data[icode1].operand[0].mode;
6650 intermediate_type
6651 = lang_hooks.types.type_for_mode (intermediate_mode,
6652 TYPE_UNSIGNED (prev_type));
6653 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6654 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6655
6656 if (!optab3 || !optab4
6657 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6658 || insn_data[icode1].operand[0].mode != intermediate_mode
6659 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6660 || insn_data[icode2].operand[0].mode != intermediate_mode
6661 || ((icode1 = optab_handler (optab3, intermediate_mode))
6662 == CODE_FOR_nothing)
6663 || ((icode2 = optab_handler (optab4, intermediate_mode))
6664 == CODE_FOR_nothing))
6665 break;
ebfd146a 6666
9771b263 6667 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6668 (*multi_step_cvt)++;
6669
6670 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6671 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6672 return true;
6673
6674 prev_type = intermediate_type;
6675 prev_mode = intermediate_mode;
ebfd146a
IR
6676 }
6677
9771b263 6678 interm_types->release ();
4a00c761 6679 return false;
ebfd146a
IR
6680}
6681
6682
6683/* Function supportable_narrowing_operation
6684
b8698a0f
L
6685 Check whether an operation represented by the code CODE is a
6686 narrowing operation that is supported by the target platform in
b690cc0f
RG
6687 vector form (i.e., when operating on arguments of type VECTYPE_IN
6688 and producing a result of type VECTYPE_OUT).
b8698a0f 6689
ebfd146a 6690 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6691 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6692 the target platform directly via vector tree-codes.
6693
6694 Output:
b8698a0f
L
6695 - CODE1 is the code of a vector operation to be used when
6696 vectorizing the operation, if available.
ebfd146a
IR
6697 - MULTI_STEP_CVT determines the number of required intermediate steps in
6698 case of multi-step conversion (like int->short->char - in that case
6699 MULTI_STEP_CVT will be 1).
6700 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6701 narrowing operation (short in the above example). */
ebfd146a
IR
6702
6703bool
6704supportable_narrowing_operation (enum tree_code code,
b690cc0f 6705 tree vectype_out, tree vectype_in,
ebfd146a 6706 enum tree_code *code1, int *multi_step_cvt,
9771b263 6707 vec<tree> *interm_types)
ebfd146a
IR
6708{
6709 enum machine_mode vec_mode;
6710 enum insn_code icode1;
6711 optab optab1, interm_optab;
b690cc0f
RG
6712 tree vectype = vectype_in;
6713 tree narrow_vectype = vectype_out;
ebfd146a 6714 enum tree_code c1;
4a00c761
JJ
6715 tree intermediate_type;
6716 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6717 int i;
4a00c761 6718 bool uns;
ebfd146a 6719
4a00c761 6720 *multi_step_cvt = 0;
ebfd146a
IR
6721 switch (code)
6722 {
6723 CASE_CONVERT:
6724 c1 = VEC_PACK_TRUNC_EXPR;
6725 break;
6726
6727 case FIX_TRUNC_EXPR:
6728 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6729 break;
6730
6731 case FLOAT_EXPR:
6732 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6733 tree code and optabs used for computing the operation. */
6734 return false;
6735
6736 default:
6737 gcc_unreachable ();
6738 }
6739
6740 if (code == FIX_TRUNC_EXPR)
6741 /* The signedness is determined from output operand. */
b690cc0f 6742 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6743 else
6744 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6745
6746 if (!optab1)
6747 return false;
6748
6749 vec_mode = TYPE_MODE (vectype);
947131ba 6750 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6751 return false;
6752
4a00c761
JJ
6753 *code1 = c1;
6754
6755 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6756 return true;
6757
ebfd146a
IR
6758 /* Check if it's a multi-step conversion that can be done using intermediate
6759 types. */
4a00c761
JJ
6760 prev_mode = vec_mode;
6761 if (code == FIX_TRUNC_EXPR)
6762 uns = TYPE_UNSIGNED (vectype_out);
6763 else
6764 uns = TYPE_UNSIGNED (vectype);
6765
6766 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6767 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6768 costly than signed. */
6769 if (code == FIX_TRUNC_EXPR && uns)
6770 {
6771 enum insn_code icode2;
6772
6773 intermediate_type
6774 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6775 interm_optab
6776 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6777 if (interm_optab != unknown_optab
4a00c761
JJ
6778 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6779 && insn_data[icode1].operand[0].mode
6780 == insn_data[icode2].operand[0].mode)
6781 {
6782 uns = false;
6783 optab1 = interm_optab;
6784 icode1 = icode2;
6785 }
6786 }
ebfd146a 6787
4a00c761
JJ
6788 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6789 intermediate steps in promotion sequence. We try
6790 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6791 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6792 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6793 {
6794 intermediate_mode = insn_data[icode1].operand[0].mode;
6795 intermediate_type
6796 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6797 interm_optab
6798 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6799 optab_default);
6800 if (!interm_optab
6801 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6802 || insn_data[icode1].operand[0].mode != intermediate_mode
6803 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6804 == CODE_FOR_nothing))
6805 break;
6806
9771b263 6807 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6808 (*multi_step_cvt)++;
6809
6810 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6811 return true;
6812
6813 prev_mode = intermediate_mode;
6814 optab1 = interm_optab;
ebfd146a
IR
6815 }
6816
9771b263 6817 interm_types->release ();
4a00c761 6818 return false;
ebfd146a 6819}