]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Fix sanitizer build on sparc64.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
82294ec1 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
62f7fd21 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
78c60e3d 26#include "dumpfile.h"
ebfd146a
IR
27#include "tm.h"
28#include "ggc.h"
29#include "tree.h"
30#include "target.h"
31#include "basic-block.h"
cf835838 32#include "gimple-pretty-print.h"
ebfd146a 33#include "tree-flow.h"
ebfd146a 34#include "cfgloop.h"
ebfd146a 35#include "expr.h"
7ee2468b 36#include "recog.h" /* FIXME: for insn_data */
ebfd146a 37#include "optabs.h"
718f9c0f 38#include "diagnostic-core.h"
ebfd146a 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
ebfd146a 41
7ee2468b
SB
42/* For lang_hooks.types.type_for_mode. */
43#include "langhooks.h"
ebfd146a 44
c3e7ee41
BS
45/* Return the vectorized type for the given statement. */
46
47tree
48stmt_vectype (struct _stmt_vec_info *stmt_info)
49{
50 return STMT_VINFO_VECTYPE (stmt_info);
51}
52
53/* Return TRUE iff the given statement is in an inner loop relative to
54 the loop being vectorized. */
55bool
56stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57{
58 gimple stmt = STMT_VINFO_STMT (stmt_info);
59 basic_block bb = gimple_bb (stmt);
60 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
61 struct loop* loop;
62
63 if (!loop_vinfo)
64 return false;
65
66 loop = LOOP_VINFO_LOOP (loop_vinfo);
67
68 return (bb->loop_father == loop->inner);
69}
70
71/* Record the cost of a statement, either by directly informing the
72 target model or by saving it in a vector for later processing.
73 Return a preliminary estimate of the statement's cost. */
74
75unsigned
92345349 76record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 77 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 78 int misalign, enum vect_cost_model_location where)
c3e7ee41 79{
92345349 80 if (body_cost_vec)
c3e7ee41 81 {
92345349
BS
82 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
83 add_stmt_info_to_vec (body_cost_vec, count, kind,
84 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
85 misalign);
c3e7ee41 86 return (unsigned)
92345349 87 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
88
89 }
90 else
91 {
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
94 void *target_cost_data;
95
96 if (loop_vinfo)
97 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
98 else
99 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100
92345349
BS
101 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
102 misalign, where);
c3e7ee41
BS
103 }
104}
105
272c6793
RS
106/* Return a variable of type ELEM_TYPE[NELEMS]. */
107
108static tree
109create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110{
111 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
112 "vect_array");
113}
114
115/* ARRAY is an array of vectors created by create_vector_array.
116 Return an SSA_NAME for the vector in index N. The reference
117 is part of the vectorization of STMT and the vector is associated
118 with scalar destination SCALAR_DEST. */
119
120static tree
121read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
122 tree array, unsigned HOST_WIDE_INT n)
123{
124 tree vect_type, vect, vect_name, array_ref;
125 gimple new_stmt;
126
127 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
128 vect_type = TREE_TYPE (TREE_TYPE (array));
129 vect = vect_create_destination_var (scalar_dest, vect_type);
130 array_ref = build4 (ARRAY_REF, vect_type, array,
131 build_int_cst (size_type_node, n),
132 NULL_TREE, NULL_TREE);
133
134 new_stmt = gimple_build_assign (vect, array_ref);
135 vect_name = make_ssa_name (vect, new_stmt);
136 gimple_assign_set_lhs (new_stmt, vect_name);
137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
138
139 return vect_name;
140}
141
142/* ARRAY is an array of vectors created by create_vector_array.
143 Emit code to store SSA_NAME VECT in index N of the array.
144 The store is part of the vectorization of STMT. */
145
146static void
147write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
148 tree array, unsigned HOST_WIDE_INT n)
149{
150 tree array_ref;
151 gimple new_stmt;
152
153 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
154 build_int_cst (size_type_node, n),
155 NULL_TREE, NULL_TREE);
156
157 new_stmt = gimple_build_assign (array_ref, vect);
158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
159}
160
161/* PTR is a pointer to an array of type TYPE. Return a representation
162 of *PTR. The memory reference replaces those in FIRST_DR
163 (and its group). */
164
165static tree
166create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167{
272c6793
RS
168 tree mem_ref, alias_ptr_type;
169
170 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
171 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
172 /* Arrays have the same alignment as their type. */
644ffefd 173 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
174 return mem_ref;
175}
176
ebfd146a
IR
177/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178
179/* Function vect_mark_relevant.
180
181 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182
183static void
9771b263 184vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
185 enum vect_relevant relevant, bool live_p,
186 bool used_in_pattern)
ebfd146a
IR
187{
188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
189 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
190 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 191 gimple pattern_stmt;
ebfd146a 192
73fbfcad 193 if (dump_enabled_p ())
78c60e3d
SS
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d.", relevant, live_p);
ebfd146a 196
83197f37
IR
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
ebfd146a
IR
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
202 {
83197f37
IR
203 bool found = false;
204 if (!used_in_pattern)
205 {
206 imm_use_iterator imm_iter;
207 use_operand_p use_p;
208 gimple use_stmt;
209 tree lhs;
13c931c9
JJ
210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 212
83197f37
IR
213 if (is_gimple_assign (stmt))
214 lhs = gimple_assign_lhs (stmt);
215 else
216 lhs = gimple_call_lhs (stmt);
ebfd146a 217
83197f37
IR
218 /* This use is out of pattern use, if LHS has other uses that are
219 pattern uses, we should mark the stmt itself, and not the pattern
220 stmt. */
ab0ef706
JJ
221 if (TREE_CODE (lhs) == SSA_NAME)
222 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
223 {
224 if (is_gimple_debug (USE_STMT (use_p)))
225 continue;
226 use_stmt = USE_STMT (use_p);
227
13c931c9
JJ
228 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
229 continue;
230
ab0ef706
JJ
231 if (vinfo_for_stmt (use_stmt)
232 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
233 {
234 found = true;
235 break;
236 }
237 }
83197f37
IR
238 }
239
240 if (!found)
241 {
242 /* This is the last stmt in a sequence that was detected as a
243 pattern that can potentially be vectorized. Don't mark the stmt
244 as relevant/live because it's not going to be vectorized.
245 Instead mark the pattern-stmt that replaces it. */
246
247 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
248
73fbfcad 249 if (dump_enabled_p ())
78c60e3d
SS
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "last stmt in pattern. don't mark"
252 " relevant/live.");
83197f37
IR
253 stmt_info = vinfo_for_stmt (pattern_stmt);
254 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
255 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 stmt = pattern_stmt;
258 }
ebfd146a
IR
259 }
260
261 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
262 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
263 STMT_VINFO_RELEVANT (stmt_info) = relevant;
264
265 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
266 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
267 {
73fbfcad 268 if (dump_enabled_p ())
78c60e3d
SS
269 dump_printf_loc (MSG_NOTE, vect_location,
270 "already marked relevant/live.");
ebfd146a
IR
271 return;
272 }
273
9771b263 274 worklist->safe_push (stmt);
ebfd146a
IR
275}
276
277
278/* Function vect_stmt_relevant_p.
279
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
282
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
287
288 CHECKME: what other side effects would the vectorizer allow? */
289
290static bool
291vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
293{
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
299
8644a673 300 *relevant = vect_unused_in_scope;
ebfd146a
IR
301 *live_p = false;
302
303 /* cond stmt other than loop exit cond. */
b8698a0f
L
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
8644a673 307 *relevant = vect_used_in_scope;
ebfd146a
IR
308
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 311 if (gimple_vdef (stmt))
ebfd146a 312 {
73fbfcad 313 if (dump_enabled_p ())
78c60e3d
SS
314 dump_printf_loc (MSG_NOTE, vect_location,
315 "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 316 *relevant = vect_used_in_scope;
ebfd146a
IR
317 }
318
319 /* uses outside the loop. */
320 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
321 {
322 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
323 {
324 basic_block bb = gimple_bb (USE_STMT (use_p));
325 if (!flow_bb_inside_loop_p (loop, bb))
326 {
73fbfcad 327 if (dump_enabled_p ())
78c60e3d
SS
328 dump_printf_loc (MSG_NOTE, vect_location,
329 "vec_stmt_relevant_p: used out of loop.");
ebfd146a 330
3157b0c2
AO
331 if (is_gimple_debug (USE_STMT (use_p)))
332 continue;
333
ebfd146a
IR
334 /* We expect all such uses to be in the loop exit phis
335 (because of loop closed form) */
336 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
337 gcc_assert (bb == single_exit (loop)->dest);
338
339 *live_p = true;
340 }
341 }
342 }
343
344 return (*live_p || *relevant);
345}
346
347
b8698a0f 348/* Function exist_non_indexing_operands_for_use_p
ebfd146a 349
ff802fa1 350 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
351 used in STMT for anything other than indexing an array. */
352
353static bool
354exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
355{
356 tree operand;
357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 358
ff802fa1 359 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
360 reference in STMT, then any operand that corresponds to USE
361 is not indexing an array. */
362 if (!STMT_VINFO_DATA_REF (stmt_info))
363 return true;
59a05b0c 364
ebfd146a
IR
365 /* STMT has a data_ref. FORNOW this means that its of one of
366 the following forms:
367 -1- ARRAY_REF = var
368 -2- var = ARRAY_REF
369 (This should have been verified in analyze_data_refs).
370
371 'var' in the second case corresponds to a def, not a use,
b8698a0f 372 so USE cannot correspond to any operands that are not used
ebfd146a
IR
373 for array indexing.
374
375 Therefore, all we need to check is if STMT falls into the
376 first case, and whether var corresponds to USE. */
ebfd146a
IR
377
378 if (!gimple_assign_copy_p (stmt))
379 return false;
59a05b0c
EB
380 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
381 return false;
ebfd146a 382 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
383 if (TREE_CODE (operand) != SSA_NAME)
384 return false;
385
386 if (operand == use)
387 return true;
388
389 return false;
390}
391
392
b8698a0f 393/*
ebfd146a
IR
394 Function process_use.
395
396 Inputs:
397 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 398 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 399 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 400 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
401 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
402 be performed.
ebfd146a
IR
403
404 Outputs:
405 Generally, LIVE_P and RELEVANT are used to define the liveness and
406 relevance info of the DEF_STMT of this USE:
407 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
408 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 Exceptions:
410 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 411 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 412 of the respective DEF_STMT is left unchanged.
b8698a0f
L
413 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
414 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
415 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
416 be modified accordingly.
417
418 Return true if everything is as expected. Return false otherwise. */
419
420static bool
b8698a0f 421process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 422 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 423 bool force)
ebfd146a
IR
424{
425 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
426 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
427 stmt_vec_info dstmt_vinfo;
428 basic_block bb, def_bb;
429 tree def;
430 gimple def_stmt;
431 enum vect_def_type dt;
432
b8698a0f 433 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 434 that are used for address computation are not considered relevant. */
aec7ae7d 435 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
436 return true;
437
24ee1384 438 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 439 {
73fbfcad 440 if (dump_enabled_p ())
78c60e3d
SS
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "not vectorized: unsupported use in stmt.");
ebfd146a
IR
443 return false;
444 }
445
446 if (!def_stmt || gimple_nop_p (def_stmt))
447 return true;
448
449 def_bb = gimple_bb (def_stmt);
450 if (!flow_bb_inside_loop_p (loop, def_bb))
451 {
73fbfcad 452 if (dump_enabled_p ())
78c60e3d 453 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
ebfd146a
IR
454 return true;
455 }
456
b8698a0f
L
457 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
458 DEF_STMT must have already been processed, because this should be the
459 only way that STMT, which is a reduction-phi, was put in the worklist,
460 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
461 check that everything is as expected, and we are done. */
462 dstmt_vinfo = vinfo_for_stmt (def_stmt);
463 bb = gimple_bb (stmt);
464 if (gimple_code (stmt) == GIMPLE_PHI
465 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 && gimple_code (def_stmt) != GIMPLE_PHI
467 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
468 && bb->loop_father == def_bb->loop_father)
469 {
73fbfcad 470 if (dump_enabled_p ())
78c60e3d
SS
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "reduc-stmt defining reduc-phi in the same nest.");
ebfd146a
IR
473 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
474 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
475 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 476 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 477 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
478 return true;
479 }
480
481 /* case 3a: outer-loop stmt defining an inner-loop stmt:
482 outer-loop-header-bb:
483 d = def_stmt
484 inner-loop:
485 stmt # use (d)
486 outer-loop-tail-bb:
487 ... */
488 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
489 {
73fbfcad 490 if (dump_enabled_p ())
78c60e3d
SS
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 493
ebfd146a
IR
494 switch (relevant)
495 {
8644a673 496 case vect_unused_in_scope:
7c5222ff
IR
497 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
498 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 499 break;
7c5222ff 500
ebfd146a 501 case vect_used_in_outer_by_reduction:
7c5222ff 502 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
503 relevant = vect_used_by_reduction;
504 break;
7c5222ff 505
ebfd146a 506 case vect_used_in_outer:
7c5222ff 507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 508 relevant = vect_used_in_scope;
ebfd146a 509 break;
7c5222ff 510
8644a673 511 case vect_used_in_scope:
ebfd146a
IR
512 break;
513
514 default:
515 gcc_unreachable ();
b8698a0f 516 }
ebfd146a
IR
517 }
518
519 /* case 3b: inner-loop stmt defining an outer-loop stmt:
520 outer-loop-header-bb:
521 ...
522 inner-loop:
523 d = def_stmt
06066f92 524 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
525 stmt # use (d) */
526 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
527 {
73fbfcad 528 if (dump_enabled_p ())
78c60e3d
SS
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 531
ebfd146a
IR
532 switch (relevant)
533 {
8644a673 534 case vect_unused_in_scope:
b8698a0f 535 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 536 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 537 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
538 break;
539
ebfd146a
IR
540 case vect_used_by_reduction:
541 relevant = vect_used_in_outer_by_reduction;
542 break;
543
8644a673 544 case vect_used_in_scope:
ebfd146a
IR
545 relevant = vect_used_in_outer;
546 break;
547
548 default:
549 gcc_unreachable ();
550 }
551 }
552
83197f37
IR
553 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
554 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
555 return true;
556}
557
558
559/* Function vect_mark_stmts_to_be_vectorized.
560
561 Not all stmts in the loop need to be vectorized. For example:
562
563 for i...
564 for j...
565 1. T0 = i + j
566 2. T1 = a[T0]
567
568 3. j = j + 1
569
570 Stmt 1 and 3 do not need to be vectorized, because loop control and
571 addressing of vectorized data-refs are handled differently.
572
573 This pass detects such stmts. */
574
575bool
576vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
577{
9771b263 578 vec<gimple> worklist;
ebfd146a
IR
579 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
580 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
581 unsigned int nbbs = loop->num_nodes;
582 gimple_stmt_iterator si;
583 gimple stmt;
584 unsigned int i;
585 stmt_vec_info stmt_vinfo;
586 basic_block bb;
587 gimple phi;
588 bool live_p;
06066f92
IR
589 enum vect_relevant relevant, tmp_relevant;
590 enum vect_def_type def_type;
ebfd146a 591
73fbfcad 592 if (dump_enabled_p ())
78c60e3d
SS
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "=== vect_mark_stmts_to_be_vectorized ===");
ebfd146a 595
9771b263 596 worklist.create (64);
ebfd146a
IR
597
598 /* 1. Init worklist. */
599 for (i = 0; i < nbbs; i++)
600 {
601 bb = bbs[i];
602 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 603 {
ebfd146a 604 phi = gsi_stmt (si);
73fbfcad 605 if (dump_enabled_p ())
ebfd146a 606 {
78c60e3d
SS
607 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
608 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
609 }
610
611 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 612 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
613 }
614 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
615 {
616 stmt = gsi_stmt (si);
73fbfcad 617 if (dump_enabled_p ())
ebfd146a 618 {
78c60e3d
SS
619 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 621 }
ebfd146a
IR
622
623 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 624 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
625 }
626 }
627
628 /* 2. Process_worklist */
9771b263 629 while (worklist.length () > 0)
ebfd146a
IR
630 {
631 use_operand_p use_p;
632 ssa_op_iter iter;
633
9771b263 634 stmt = worklist.pop ();
73fbfcad 635 if (dump_enabled_p ())
ebfd146a 636 {
78c60e3d
SS
637 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
638 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
639 }
640
b8698a0f
L
641 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
642 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
643 liveness and relevance properties of STMT. */
644 stmt_vinfo = vinfo_for_stmt (stmt);
645 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
646 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
647
648 /* Generally, the liveness and relevance properties of STMT are
649 propagated as is to the DEF_STMTs of its USEs:
650 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
651 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
652
653 One exception is when STMT has been identified as defining a reduction
654 variable; in this case we set the liveness/relevance as follows:
655 live_p = false
656 relevant = vect_used_by_reduction
657 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 658 those that are used by a reduction computation, and those that are
ff802fa1 659 (also) used by a regular computation. This allows us later on to
b8698a0f 660 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 661 order of the results that they produce does not have to be kept. */
ebfd146a 662
06066f92
IR
663 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
664 tmp_relevant = relevant;
665 switch (def_type)
ebfd146a 666 {
06066f92
IR
667 case vect_reduction_def:
668 switch (tmp_relevant)
669 {
670 case vect_unused_in_scope:
671 relevant = vect_used_by_reduction;
672 break;
673
674 case vect_used_by_reduction:
675 if (gimple_code (stmt) == GIMPLE_PHI)
676 break;
677 /* fall through */
678
679 default:
73fbfcad 680 if (dump_enabled_p ())
78c60e3d
SS
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "unsupported use of reduction.");
9771b263 683 worklist.release ();
06066f92
IR
684 return false;
685 }
686
b8698a0f 687 live_p = false;
06066f92 688 break;
b8698a0f 689
06066f92
IR
690 case vect_nested_cycle:
691 if (tmp_relevant != vect_unused_in_scope
692 && tmp_relevant != vect_used_in_outer_by_reduction
693 && tmp_relevant != vect_used_in_outer)
694 {
73fbfcad 695 if (dump_enabled_p ())
78c60e3d
SS
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "unsupported use of nested cycle.");
7c5222ff 698
9771b263 699 worklist.release ();
06066f92
IR
700 return false;
701 }
7c5222ff 702
b8698a0f
L
703 live_p = false;
704 break;
705
06066f92
IR
706 case vect_double_reduction_def:
707 if (tmp_relevant != vect_unused_in_scope
708 && tmp_relevant != vect_used_by_reduction)
709 {
73fbfcad 710 if (dump_enabled_p ())
78c60e3d
SS
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of double reduction.");
7c5222ff 713
9771b263 714 worklist.release ();
7c5222ff 715 return false;
06066f92
IR
716 }
717
718 live_p = false;
b8698a0f 719 break;
7c5222ff 720
06066f92
IR
721 default:
722 break;
7c5222ff 723 }
b8698a0f 724
aec7ae7d 725 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
726 {
727 /* Pattern statements are not inserted into the code, so
728 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
729 have to scan the RHS or function arguments instead. */
730 if (is_gimple_assign (stmt))
731 {
69d2aade
JJ
732 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
733 tree op = gimple_assign_rhs1 (stmt);
734
735 i = 1;
736 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 {
738 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 739 live_p, relevant, &worklist, false)
69d2aade 740 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 741 live_p, relevant, &worklist, false))
69d2aade 742 {
9771b263 743 worklist.release ();
69d2aade
JJ
744 return false;
745 }
746 i = 2;
747 }
748 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 749 {
69d2aade 750 op = gimple_op (stmt, i);
9d5e7640 751 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 752 &worklist, false))
9d5e7640 753 {
9771b263 754 worklist.release ();
9d5e7640
IR
755 return false;
756 }
757 }
758 }
759 else if (is_gimple_call (stmt))
760 {
761 for (i = 0; i < gimple_call_num_args (stmt); i++)
762 {
763 tree arg = gimple_call_arg (stmt, i);
764 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 765 &worklist, false))
9d5e7640 766 {
9771b263 767 worklist.release ();
9d5e7640
IR
768 return false;
769 }
770 }
771 }
772 }
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 {
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 778 &worklist, false))
9d5e7640 779 {
9771b263 780 worklist.release ();
9d5e7640
IR
781 return false;
782 }
783 }
aec7ae7d
JJ
784
785 if (STMT_VINFO_GATHER_P (stmt_vinfo))
786 {
787 tree off;
788 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
789 gcc_assert (decl);
790 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
791 &worklist, true))
792 {
9771b263 793 worklist.release ();
aec7ae7d
JJ
794 return false;
795 }
796 }
ebfd146a
IR
797 } /* while worklist */
798
9771b263 799 worklist.release ();
ebfd146a
IR
800 return true;
801}
802
803
b8698a0f 804/* Function vect_model_simple_cost.
ebfd146a 805
b8698a0f 806 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
807 single op. Right now, this does not account for multiple insns that could
808 be generated for the single vector op. We will handle that shortly. */
809
810void
b8698a0f 811vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
812 enum vect_def_type *dt,
813 stmt_vector_for_cost *prologue_cost_vec,
814 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
815{
816 int i;
92345349 817 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
818
819 /* The SLP costs were already calculated during SLP tree build. */
820 if (PURE_SLP_STMT (stmt_info))
821 return;
822
ebfd146a
IR
823 /* FORNOW: Assuming maximum 2 args per stmts. */
824 for (i = 0; i < 2; i++)
92345349
BS
825 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
826 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
827 stmt_info, 0, vect_prologue);
c3e7ee41
BS
828
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
830 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
831 stmt_info, 0, vect_body);
c3e7ee41 832
73fbfcad 833 if (dump_enabled_p ())
78c60e3d
SS
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
837}
838
839
8bd37302
BS
840/* Model cost for type demotion and promotion operations. PWR is normally
841 zero for single-step promotions and demotions. It will be one if
842 two-step promotion/demotion is required, and so on. Each additional
843 step doubles the number of instructions required. */
844
845static void
846vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
847 enum vect_def_type *dt, int pwr)
848{
849 int i, tmp;
92345349 850 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
853 void *target_cost_data;
8bd37302
BS
854
855 /* The SLP costs were already calculated during SLP tree build. */
856 if (PURE_SLP_STMT (stmt_info))
857 return;
858
c3e7ee41
BS
859 if (loop_vinfo)
860 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
861 else
862 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
863
8bd37302
BS
864 for (i = 0; i < pwr + 1; i++)
865 {
866 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
867 (i + 1) : i;
c3e7ee41 868 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
869 vec_promote_demote, stmt_info, 0,
870 vect_body);
8bd37302
BS
871 }
872
873 /* FORNOW: Assuming maximum 2 args per stmts. */
874 for (i = 0; i < 2; i++)
92345349
BS
875 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
876 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
877 stmt_info, 0, vect_prologue);
8bd37302 878
73fbfcad 879 if (dump_enabled_p ())
78c60e3d
SS
880 dump_printf_loc (MSG_NOTE, vect_location,
881 "vect_model_promotion_demotion_cost: inside_cost = %d, "
882 "prologue_cost = %d .", inside_cost, prologue_cost);
8bd37302
BS
883}
884
0d0293ac 885/* Function vect_cost_group_size
b8698a0f 886
0d0293ac 887 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
888 load or store of a group, else return 1. This ensures that group size is
889 only returned once per group. */
890
891static int
0d0293ac 892vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 893{
e14c1050 894 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
895
896 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 897 return GROUP_SIZE (stmt_info);
ebfd146a
IR
898
899 return 1;
900}
901
902
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 910 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
911 slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
914{
915 int group_size;
92345349 916 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
917 struct data_reference *first_dr;
918 gimple first_stmt;
ebfd146a
IR
919
920 /* The SLP costs were already calculated during SLP tree build. */
921 if (PURE_SLP_STMT (stmt_info))
922 return;
923
8644a673 924 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
925 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
926 stmt_info, 0, vect_prologue);
ebfd146a 927
0d0293ac
MM
928 /* Grouped access? */
929 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
930 {
931 if (slp_node)
932 {
9771b263 933 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
934 group_size = 1;
935 }
936 else
937 {
e14c1050 938 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 939 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
940 }
941
942 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 }
0d0293ac 944 /* Not a grouped access. */
ebfd146a 945 else
720f5239
IR
946 {
947 group_size = 1;
948 first_dr = STMT_VINFO_DATA_REF (stmt_info);
949 }
ebfd146a 950
272c6793 951 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 952 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
953 access is instead being provided by a permute-and-store operation,
954 include the cost of the permutes. */
955 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
956 {
957 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
958
959 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
ebfd146a 962
73fbfcad 963 if (dump_enabled_p ())
78c60e3d
SS
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .",
966 group_size);
ebfd146a
IR
967 }
968
969 /* Costs of the stores. */
92345349 970 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 971
73fbfcad 972 if (dump_enabled_p ())
78c60e3d
SS
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .", inside_cost, prologue_cost);
ebfd146a
IR
976}
977
978
720f5239
IR
979/* Calculate cost of DR's memory access. */
980void
981vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 982 unsigned int *inside_cost,
92345349 983 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
984{
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
986 gimple stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
988
989 switch (alignment_support_scheme)
990 {
991 case dr_aligned:
992 {
92345349
BS
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
720f5239 996
73fbfcad 997 if (dump_enabled_p ())
78c60e3d
SS
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.");
720f5239
IR
1000 break;
1001 }
1002
1003 case dr_unaligned_supported:
1004 {
720f5239 1005 /* Here, we assign an additional cost for the unaligned store. */
92345349 1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1007 unaligned_store, stmt_info,
92345349 1008 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1009 if (dump_enabled_p ())
78c60e3d
SS
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.");
720f5239
IR
1013 break;
1014 }
1015
38eec4c6
UW
1016 case dr_unaligned_unsupported:
1017 {
1018 *inside_cost = VECT_MAX_COST;
1019
73fbfcad 1020 if (dump_enabled_p ())
78c60e3d
SS
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.");
38eec4c6
UW
1023 break;
1024 }
1025
720f5239
IR
1026 default:
1027 gcc_unreachable ();
1028 }
1029}
1030
1031
ebfd146a
IR
1032/* Function vect_model_load_cost
1033
0d0293ac
MM
1034 Models cost for loads. In the case of grouped accesses, the last access
1035 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1036 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1037 access scheme chosen. */
1038
1039void
92345349
BS
1040vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 bool load_lanes_p, slp_tree slp_node,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1044{
1045 int group_size;
ebfd146a
IR
1046 gimple first_stmt;
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1048 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1049
1050 /* The SLP costs were already calculated during SLP tree build. */
1051 if (PURE_SLP_STMT (stmt_info))
1052 return;
1053
0d0293ac 1054 /* Grouped accesses? */
e14c1050 1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1056 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1057 {
0d0293ac 1058 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1059 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 }
0d0293ac 1061 /* Not a grouped access. */
ebfd146a
IR
1062 else
1063 {
1064 group_size = 1;
1065 first_dr = dr;
1066 }
1067
272c6793 1068 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1069 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1070 access is instead being provided by a load-and-permute operation,
1071 include the cost of the permutes. */
1072 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1073 {
1074 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1075 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1076 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
ebfd146a 1078
73fbfcad 1079 if (dump_enabled_p ())
78c60e3d
SS
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .",
1082 group_size);
ebfd146a
IR
1083 }
1084
1085 /* The loads themselves. */
a82960aa
RG
1086 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1087 {
a21892ad
BS
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1090 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1092 scalar_load, stmt_info, 0, vect_body);
1093 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1094 stmt_info, 0, vect_body);
a82960aa
RG
1095 }
1096 else
1097 vect_get_load_cost (first_dr, ncopies,
1098 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1099 || group_size > 1 || slp_node),
92345349
BS
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
720f5239 1102
73fbfcad 1103 if (dump_enabled_p ())
78c60e3d
SS
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .", inside_cost, prologue_cost);
720f5239
IR
1107}
1108
1109
1110/* Calculate cost of DR's memory access. */
1111void
1112vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1113 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
720f5239
IR
1118{
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1120 gimple stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1122
1123 switch (alignment_support_scheme)
ebfd146a
IR
1124 {
1125 case dr_aligned:
1126 {
92345349
BS
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
ebfd146a 1129
73fbfcad 1130 if (dump_enabled_p ())
78c60e3d
SS
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.");
ebfd146a
IR
1133
1134 break;
1135 }
1136 case dr_unaligned_supported:
1137 {
720f5239 1138 /* Here, we assign an additional cost for the unaligned load. */
92345349 1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1140 unaligned_load, stmt_info,
92345349 1141 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1142
73fbfcad 1143 if (dump_enabled_p ())
78c60e3d
SS
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.");
ebfd146a
IR
1147
1148 break;
1149 }
1150 case dr_explicit_realign:
1151 {
92345349
BS
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1156
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
92345349 1159 prologue costs. */
ebfd146a 1160 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
ebfd146a 1163
73fbfcad 1164 if (dump_enabled_p ())
78c60e3d
SS
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign");
8bd37302 1167
ebfd146a
IR
1168 break;
1169 }
1170 case dr_explicit_realign_optimized:
1171 {
73fbfcad 1172 if (dump_enabled_p ())
78c60e3d
SS
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.");
ebfd146a
IR
1176
1177 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1178 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1179 if this is an access in a group of loads, which provide grouped
ebfd146a 1180 access, then the above cost should only be considered for one
ff802fa1 1181 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1182 and a realignment op. */
1183
92345349 1184 if (add_realign_cost && record_prologue_costs)
ebfd146a 1185 {
92345349
BS
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
ebfd146a 1189 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
ebfd146a
IR
1193 }
1194
92345349
BS
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
8bd37302 1199
73fbfcad 1200 if (dump_enabled_p ())
78c60e3d
SS
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized");
8bd37302 1203
ebfd146a
IR
1204 break;
1205 }
1206
38eec4c6
UW
1207 case dr_unaligned_unsupported:
1208 {
1209 *inside_cost = VECT_MAX_COST;
1210
73fbfcad 1211 if (dump_enabled_p ())
78c60e3d
SS
1212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1213 "vect_model_load_cost: unsupported access.");
38eec4c6
UW
1214 break;
1215 }
1216
ebfd146a
IR
1217 default:
1218 gcc_unreachable ();
1219 }
ebfd146a
IR
1220}
1221
418b7df3
RG
1222/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1223 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1224
418b7df3
RG
1225static void
1226vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1227{
ebfd146a 1228 if (gsi)
418b7df3 1229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1230 else
1231 {
418b7df3 1232 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1233 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1234
a70d6342
IR
1235 if (loop_vinfo)
1236 {
1237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1238 basic_block new_bb;
1239 edge pe;
a70d6342
IR
1240
1241 if (nested_in_vect_loop_p (loop, stmt))
1242 loop = loop->inner;
b8698a0f 1243
a70d6342 1244 pe = loop_preheader_edge (loop);
418b7df3 1245 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1246 gcc_assert (!new_bb);
1247 }
1248 else
1249 {
1250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1251 basic_block bb;
1252 gimple_stmt_iterator gsi_bb_start;
1253
1254 gcc_assert (bb_vinfo);
1255 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1256 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1257 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1258 }
ebfd146a
IR
1259 }
1260
73fbfcad 1261 if (dump_enabled_p ())
ebfd146a 1262 {
78c60e3d
SS
1263 dump_printf_loc (MSG_NOTE, vect_location,
1264 "created new init_stmt: ");
1265 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1266 }
418b7df3
RG
1267}
1268
1269/* Function vect_init_vector.
ebfd146a 1270
5467ee52
RG
1271 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1272 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1273 vector type a vector with all elements equal to VAL is created first.
1274 Place the initialization at BSI if it is not NULL. Otherwise, place the
1275 initialization at the loop preheader.
418b7df3
RG
1276 Return the DEF of INIT_STMT.
1277 It will be used in the vectorization of STMT. */
1278
1279tree
5467ee52 1280vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1281{
1282 tree new_var;
1283 gimple init_stmt;
1284 tree vec_oprnd;
1285 tree new_temp;
1286
5467ee52
RG
1287 if (TREE_CODE (type) == VECTOR_TYPE
1288 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1289 {
5467ee52 1290 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1291 {
5467ee52
RG
1292 if (CONSTANT_CLASS_P (val))
1293 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1294 else
1295 {
83d5977e 1296 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1297 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1298 new_temp, val,
418b7df3 1299 NULL_TREE);
418b7df3 1300 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1301 val = new_temp;
418b7df3
RG
1302 }
1303 }
5467ee52 1304 val = build_vector_from_val (type, val);
418b7df3
RG
1305 }
1306
5467ee52 1307 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1308 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1309 new_temp = make_ssa_name (new_var, init_stmt);
1310 gimple_assign_set_lhs (init_stmt, new_temp);
1311 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1312 vec_oprnd = gimple_assign_lhs (init_stmt);
1313 return vec_oprnd;
1314}
1315
a70d6342 1316
ebfd146a
IR
1317/* Function vect_get_vec_def_for_operand.
1318
ff802fa1 1319 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1320 used in the vectorized stmt for STMT.
1321
1322 In the case that OP is an SSA_NAME which is defined in the loop, then
1323 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1324
1325 In case OP is an invariant or constant, a new stmt that creates a vector def
1326 needs to be introduced. */
1327
1328tree
1329vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1330{
1331 tree vec_oprnd;
1332 gimple vec_stmt;
1333 gimple def_stmt;
1334 stmt_vec_info def_stmt_info = NULL;
1335 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1336 unsigned int nunits;
ebfd146a 1337 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1338 tree def;
ebfd146a
IR
1339 enum vect_def_type dt;
1340 bool is_simple_use;
1341 tree vector_type;
1342
73fbfcad 1343 if (dump_enabled_p ())
ebfd146a 1344 {
78c60e3d
SS
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_get_vec_def_for_operand: ");
1347 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
ebfd146a
IR
1348 }
1349
24ee1384
IR
1350 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1351 &def_stmt, &def, &dt);
ebfd146a 1352 gcc_assert (is_simple_use);
73fbfcad 1353 if (dump_enabled_p ())
ebfd146a 1354 {
78c60e3d 1355 int loc_printed = 0;
ebfd146a
IR
1356 if (def)
1357 {
78c60e3d
SS
1358 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1359 loc_printed = 1;
1360 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
ebfd146a
IR
1361 }
1362 if (def_stmt)
1363 {
78c60e3d
SS
1364 if (loc_printed)
1365 dump_printf (MSG_NOTE, " def_stmt = ");
1366 else
1367 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1368 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1369 }
1370 }
1371
1372 switch (dt)
1373 {
1374 /* Case 1: operand is a constant. */
1375 case vect_constant_def:
1376 {
7569a6cc
RG
1377 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1378 gcc_assert (vector_type);
9dc3f7de 1379 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1380
b8698a0f 1381 if (scalar_def)
ebfd146a
IR
1382 *scalar_def = op;
1383
1384 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1385 if (dump_enabled_p ())
78c60e3d
SS
1386 dump_printf_loc (MSG_NOTE, vect_location,
1387 "Create vector_cst. nunits = %d", nunits);
ebfd146a 1388
418b7df3 1389 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1390 }
1391
1392 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1393 case vect_external_def:
ebfd146a
IR
1394 {
1395 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1396 gcc_assert (vector_type);
ebfd146a 1397
b8698a0f 1398 if (scalar_def)
ebfd146a
IR
1399 *scalar_def = def;
1400
1401 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1402 if (dump_enabled_p ())
78c60e3d 1403 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
ebfd146a 1404
418b7df3 1405 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1406 }
1407
1408 /* Case 3: operand is defined inside the loop. */
8644a673 1409 case vect_internal_def:
ebfd146a 1410 {
b8698a0f 1411 if (scalar_def)
ebfd146a
IR
1412 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1413
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1416
ebfd146a 1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1432 }
1433
1434 /* Case 4: operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
06066f92 1436 case vect_double_reduction_def:
7c5222ff 1437 case vect_nested_cycle:
ebfd146a
IR
1438 {
1439 struct loop *loop;
1440
1441 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1442 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1443
1444 /* Get the def before the loop */
1445 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1446 return get_initial_def_for_reduction (stmt, op, scalar_def);
1447 }
1448
1449 /* Case 5: operand is defined by loop-header phi - induction. */
1450 case vect_induction_def:
1451 {
1452 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1453
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1457 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1458 vec_oprnd = PHI_RESULT (vec_stmt);
1459 else
1460 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1461 return vec_oprnd;
1462 }
1463
1464 default:
1465 gcc_unreachable ();
1466 }
1467}
1468
1469
1470/* Function vect_get_vec_def_for_stmt_copy
1471
ff802fa1 1472 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1475 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1477 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1478 DT is the type of the vector def VEC_OPRND.
1479
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1483 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1484 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1488 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1491
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1493
ebfd146a
IR
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1497 VS1.3: vx.3 = memref3
ebfd146a
IR
1498
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1503
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
b8698a0f
L
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1508 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1509 returns the vector-def 'vx.0'.
1510
b8698a0f
L
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515
b8698a0f
L
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525
1526tree
1527vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528{
1529 gimple vec_stmt_for_operand;
1530 stmt_vec_info def_stmt_info;
1531
1532 /* Do nothing; can reuse same def. */
8644a673 1533 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1534 return vec_oprnd;
1535
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
1541 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1542 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1543 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1544 else
1545 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1546 return vec_oprnd;
1547}
1548
1549
1550/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1551 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1552
1553static void
b8698a0f 1554vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1555 vec<tree> *vec_oprnds0,
1556 vec<tree> *vec_oprnds1)
ebfd146a 1557{
9771b263 1558 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1559
1560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1561 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1562
9771b263 1563 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1564 {
9771b263 1565 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1566 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1567 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1568 }
1569}
1570
1571
d092494c
IR
1572/* Get vectorized definitions for OP0 and OP1.
1573 REDUC_INDEX is the index of reduction operand in case of reduction,
1574 and -1 otherwise. */
ebfd146a 1575
d092494c 1576void
ebfd146a 1577vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1578 vec<tree> *vec_oprnds0,
1579 vec<tree> *vec_oprnds1,
d092494c 1580 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1581{
1582 if (slp_node)
d092494c
IR
1583 {
1584 int nops = (op1 == NULL_TREE) ? 1 : 2;
9771b263
DN
1585 vec<tree> ops;
1586 ops.create (nops);
1587 vec<slp_void_p> vec_defs;
1588 vec_defs.create (nops);
d092494c 1589
9771b263 1590 ops.quick_push (op0);
d092494c 1591 if (op1)
9771b263 1592 ops.quick_push (op1);
d092494c
IR
1593
1594 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1595
9771b263 1596 *vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
d092494c 1597 if (op1)
9771b263 1598 *vec_oprnds1 = *((vec<tree> *) vec_defs[1]);
d092494c 1599
9771b263
DN
1600 ops.release ();
1601 vec_defs.release ();
d092494c 1602 }
ebfd146a
IR
1603 else
1604 {
1605 tree vec_oprnd;
1606
9771b263 1607 vec_oprnds0->create (1);
b8698a0f 1608 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1609 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1610
1611 if (op1)
1612 {
9771b263 1613 vec_oprnds1->create (1);
b8698a0f 1614 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1615 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1616 }
1617 }
1618}
1619
1620
1621/* Function vect_finish_stmt_generation.
1622
1623 Insert a new stmt. */
1624
1625void
1626vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1627 gimple_stmt_iterator *gsi)
1628{
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1632
1633 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1634
54e8e2c3
RG
1635 if (!gsi_end_p (*gsi)
1636 && gimple_has_mem_ops (vec_stmt))
1637 {
1638 gimple at_stmt = gsi_stmt (*gsi);
1639 tree vuse = gimple_vuse (at_stmt);
1640 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1641 {
1642 tree vdef = gimple_vdef (at_stmt);
1643 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1644 /* If we have an SSA vuse and insert a store, update virtual
1645 SSA form to avoid triggering the renamer. Do so only
1646 if we can easily see all uses - which is what almost always
1647 happens with the way vectorized stmts are inserted. */
1648 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1649 && ((is_gimple_assign (vec_stmt)
1650 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1651 || (is_gimple_call (vec_stmt)
1652 && !(gimple_call_flags (vec_stmt)
1653 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1654 {
1655 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1656 gimple_set_vdef (vec_stmt, new_vdef);
1657 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1658 }
1659 }
1660 }
ebfd146a
IR
1661 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1662
b8698a0f 1663 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1664 bb_vinfo));
ebfd146a 1665
73fbfcad 1666 if (dump_enabled_p ())
ebfd146a 1667 {
78c60e3d
SS
1668 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1670 }
1671
ad885386 1672 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1673}
1674
1675/* Checks if CALL can be vectorized in type VECTYPE. Returns
1676 a function declaration if the target has a vectorized version
1677 of the function, or NULL_TREE if the function cannot be vectorized. */
1678
1679tree
1680vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1681{
1682 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1683
1684 /* We only handle functions that do not read or clobber memory -- i.e.
1685 const or novops ones. */
1686 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1687 return NULL_TREE;
1688
1689 if (!fndecl
1690 || TREE_CODE (fndecl) != FUNCTION_DECL
1691 || !DECL_BUILT_IN (fndecl))
1692 return NULL_TREE;
1693
62f7fd21 1694 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1695 vectype_in);
1696}
1697
1698/* Function vectorizable_call.
1699
b8698a0f
L
1700 Check if STMT performs a function call that can be vectorized.
1701 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1702 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1704
1705static bool
190c2236
JJ
1706vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1707 slp_tree slp_node)
ebfd146a
IR
1708{
1709 tree vec_dest;
1710 tree scalar_dest;
1711 tree op, type;
1712 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1714 tree vectype_out, vectype_in;
1715 int nunits_in;
1716 int nunits_out;
1717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 1718 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 1719 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1720 gimple def_stmt;
0502fb85
UB
1721 enum vect_def_type dt[3]
1722 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1723 gimple new_stmt = NULL;
ebfd146a 1724 int ncopies, j;
9771b263 1725 vec<tree> vargs = vec<tree>();
ebfd146a
IR
1726 enum { NARROW, NONE, WIDEN } modifier;
1727 size_t i, nargs;
9d5e7640 1728 tree lhs;
ebfd146a 1729
190c2236 1730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1731 return false;
1732
8644a673 1733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1734 return false;
1735
ebfd146a
IR
1736 /* Is STMT a vectorizable call? */
1737 if (!is_gimple_call (stmt))
1738 return false;
1739
1740 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1741 return false;
1742
822ba6d7 1743 if (stmt_can_throw_internal (stmt))
5a2c1986
IR
1744 return false;
1745
b690cc0f
RG
1746 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1747
ebfd146a
IR
1748 /* Process function arguments. */
1749 rhs_type = NULL_TREE;
b690cc0f 1750 vectype_in = NULL_TREE;
ebfd146a
IR
1751 nargs = gimple_call_num_args (stmt);
1752
1b1562a5
MM
1753 /* Bail out if the function has more than three arguments, we do not have
1754 interesting builtin functions to vectorize with more than two arguments
1755 except for fma. No arguments is also not good. */
1756 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1757 return false;
1758
1759 for (i = 0; i < nargs; i++)
1760 {
b690cc0f
RG
1761 tree opvectype;
1762
ebfd146a
IR
1763 op = gimple_call_arg (stmt, i);
1764
1765 /* We can only handle calls with arguments of the same type. */
1766 if (rhs_type
8533c9d8 1767 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 1768 {
73fbfcad 1769 if (dump_enabled_p ())
78c60e3d
SS
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1771 "argument types differ.");
ebfd146a
IR
1772 return false;
1773 }
b690cc0f
RG
1774 if (!rhs_type)
1775 rhs_type = TREE_TYPE (op);
ebfd146a 1776
24ee1384 1777 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 1778 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 1779 {
73fbfcad 1780 if (dump_enabled_p ())
78c60e3d
SS
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782 "use not simple.");
ebfd146a
IR
1783 return false;
1784 }
ebfd146a 1785
b690cc0f
RG
1786 if (!vectype_in)
1787 vectype_in = opvectype;
1788 else if (opvectype
1789 && opvectype != vectype_in)
1790 {
73fbfcad 1791 if (dump_enabled_p ())
78c60e3d
SS
1792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1793 "argument vector types differ.");
b690cc0f
RG
1794 return false;
1795 }
1796 }
1797 /* If all arguments are external or constant defs use a vector type with
1798 the same size as the output vector type. */
ebfd146a 1799 if (!vectype_in)
b690cc0f 1800 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1801 if (vec_stmt)
1802 gcc_assert (vectype_in);
1803 if (!vectype_in)
1804 {
73fbfcad 1805 if (dump_enabled_p ())
7d8930a0 1806 {
78c60e3d
SS
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "no vectype for scalar type ");
1809 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
7d8930a0
IR
1810 }
1811
1812 return false;
1813 }
ebfd146a
IR
1814
1815 /* FORNOW */
b690cc0f
RG
1816 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1817 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1818 if (nunits_in == nunits_out / 2)
1819 modifier = NARROW;
1820 else if (nunits_out == nunits_in)
1821 modifier = NONE;
1822 else if (nunits_out == nunits_in / 2)
1823 modifier = WIDEN;
1824 else
1825 return false;
1826
1827 /* For now, we only vectorize functions if a target specific builtin
1828 is available. TODO -- in some cases, it might be profitable to
1829 insert the calls for pieces of the vector, in order to be able
1830 to vectorize other operations in the loop. */
1831 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1832 if (fndecl == NULL_TREE)
1833 {
73fbfcad 1834 if (dump_enabled_p ())
78c60e3d
SS
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "function is not vectorizable.");
ebfd146a
IR
1837
1838 return false;
1839 }
1840
5006671f 1841 gcc_assert (!gimple_vuse (stmt));
ebfd146a 1842
190c2236
JJ
1843 if (slp_node || PURE_SLP_STMT (stmt_info))
1844 ncopies = 1;
1845 else if (modifier == NARROW)
ebfd146a
IR
1846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1847 else
1848 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1849
1850 /* Sanity check: make sure that at least one copy of the vectorized stmt
1851 needs to be generated. */
1852 gcc_assert (ncopies >= 1);
1853
1854 if (!vec_stmt) /* transformation not required. */
1855 {
1856 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 1857 if (dump_enabled_p ())
78c60e3d 1858 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
c3e7ee41 1859 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
1860 return true;
1861 }
1862
1863 /** Transform. **/
1864
73fbfcad 1865 if (dump_enabled_p ())
78c60e3d 1866 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
ebfd146a
IR
1867
1868 /* Handle def. */
1869 scalar_dest = gimple_call_lhs (stmt);
1870 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1871
1872 prev_stmt_info = NULL;
1873 switch (modifier)
1874 {
1875 case NONE:
1876 for (j = 0; j < ncopies; ++j)
1877 {
1878 /* Build argument list for the vectorized call. */
1879 if (j == 0)
9771b263 1880 vargs.create (nargs);
ebfd146a 1881 else
9771b263 1882 vargs.truncate (0);
ebfd146a 1883
190c2236
JJ
1884 if (slp_node)
1885 {
9771b263
DN
1886 vec<slp_void_p> vec_defs;
1887 vec_defs.create (nargs);
1888 vec<tree> vec_oprnds0;
190c2236
JJ
1889
1890 for (i = 0; i < nargs; i++)
9771b263 1891 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1892 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
9771b263 1893 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
190c2236
JJ
1894
1895 /* Arguments are ready. Create the new vector stmt. */
9771b263 1896 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
1897 {
1898 size_t k;
1899 for (k = 0; k < nargs; k++)
1900 {
9771b263
DN
1901 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1902 vargs[k] = vec_oprndsk[i];
190c2236
JJ
1903 }
1904 new_stmt = gimple_build_call_vec (fndecl, vargs);
1905 new_temp = make_ssa_name (vec_dest, new_stmt);
1906 gimple_call_set_lhs (new_stmt, new_temp);
1907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1908 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1909 }
1910
1911 for (i = 0; i < nargs; i++)
1912 {
9771b263
DN
1913 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1914 vec_oprndsi.release ();
190c2236 1915 }
9771b263 1916 vec_defs.release ();
190c2236
JJ
1917 continue;
1918 }
1919
ebfd146a
IR
1920 for (i = 0; i < nargs; i++)
1921 {
1922 op = gimple_call_arg (stmt, i);
1923 if (j == 0)
1924 vec_oprnd0
1925 = vect_get_vec_def_for_operand (op, stmt, NULL);
1926 else
63827fb8
IR
1927 {
1928 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1929 vec_oprnd0
1930 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1931 }
ebfd146a 1932
9771b263 1933 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
1934 }
1935
1936 new_stmt = gimple_build_call_vec (fndecl, vargs);
1937 new_temp = make_ssa_name (vec_dest, new_stmt);
1938 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
1939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1940
1941 if (j == 0)
1942 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1943 else
1944 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1945
1946 prev_stmt_info = vinfo_for_stmt (new_stmt);
1947 }
1948
1949 break;
1950
1951 case NARROW:
1952 for (j = 0; j < ncopies; ++j)
1953 {
1954 /* Build argument list for the vectorized call. */
1955 if (j == 0)
9771b263 1956 vargs.create (nargs * 2);
ebfd146a 1957 else
9771b263 1958 vargs.truncate (0);
ebfd146a 1959
190c2236
JJ
1960 if (slp_node)
1961 {
9771b263
DN
1962 vec<slp_void_p> vec_defs;
1963 vec_defs.create (nargs);
1964 vec<tree> vec_oprnds0;
190c2236
JJ
1965
1966 for (i = 0; i < nargs; i++)
9771b263 1967 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 1968 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
9771b263 1969 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
190c2236
JJ
1970
1971 /* Arguments are ready. Create the new vector stmt. */
9771b263 1972 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
1973 {
1974 size_t k;
9771b263 1975 vargs.truncate (0);
190c2236
JJ
1976 for (k = 0; k < nargs; k++)
1977 {
9771b263
DN
1978 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1979 vargs.quick_push (vec_oprndsk[i]);
1980 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
1981 }
1982 new_stmt = gimple_build_call_vec (fndecl, vargs);
1983 new_temp = make_ssa_name (vec_dest, new_stmt);
1984 gimple_call_set_lhs (new_stmt, new_temp);
1985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 1986 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
1987 }
1988
1989 for (i = 0; i < nargs; i++)
1990 {
9771b263
DN
1991 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1992 vec_oprndsi.release ();
190c2236 1993 }
9771b263 1994 vec_defs.release ();
190c2236
JJ
1995 continue;
1996 }
1997
ebfd146a
IR
1998 for (i = 0; i < nargs; i++)
1999 {
2000 op = gimple_call_arg (stmt, i);
2001 if (j == 0)
2002 {
2003 vec_oprnd0
2004 = vect_get_vec_def_for_operand (op, stmt, NULL);
2005 vec_oprnd1
63827fb8 2006 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2007 }
2008 else
2009 {
336ecb65 2010 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2011 vec_oprnd0
63827fb8 2012 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2013 vec_oprnd1
63827fb8 2014 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2015 }
2016
9771b263
DN
2017 vargs.quick_push (vec_oprnd0);
2018 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2019 }
2020
2021 new_stmt = gimple_build_call_vec (fndecl, vargs);
2022 new_temp = make_ssa_name (vec_dest, new_stmt);
2023 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025
2026 if (j == 0)
2027 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2028 else
2029 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2030
2031 prev_stmt_info = vinfo_for_stmt (new_stmt);
2032 }
2033
2034 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2035
2036 break;
2037
2038 case WIDEN:
2039 /* No current target implements this case. */
2040 return false;
2041 }
2042
9771b263 2043 vargs.release ();
ebfd146a
IR
2044
2045 /* Update the exception handling table with the vector stmt if necessary. */
2046 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2047 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2048
2049 /* The call in STMT might prevent it from being removed in dce.
2050 We however cannot remove it here, due to the way the ssa name
2051 it defines is mapped to the new definition. So just replace
2052 rhs of the statement with something harmless. */
2053
dd34c087
JJ
2054 if (slp_node)
2055 return true;
2056
ebfd146a 2057 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2058 if (is_pattern_stmt_p (stmt_info))
2059 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2060 else
2061 lhs = gimple_call_lhs (stmt);
2062 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2063 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2064 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2065 STMT_VINFO_STMT (stmt_info) = new_stmt;
2066 gsi_replace (gsi, new_stmt, false);
2067 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2068
2069 return true;
2070}
2071
2072
2073/* Function vect_gen_widened_results_half
2074
2075 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 2076 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 2077 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
2078 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2079 needs to be created (DECL is a function-decl of a target-builtin).
2080 STMT is the original scalar stmt that we are vectorizing. */
2081
2082static gimple
2083vect_gen_widened_results_half (enum tree_code code,
2084 tree decl,
2085 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2086 tree vec_dest, gimple_stmt_iterator *gsi,
2087 gimple stmt)
b8698a0f 2088{
ebfd146a 2089 gimple new_stmt;
b8698a0f
L
2090 tree new_temp;
2091
2092 /* Generate half of the widened result: */
2093 if (code == CALL_EXPR)
2094 {
2095 /* Target specific support */
ebfd146a
IR
2096 if (op_type == binary_op)
2097 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2098 else
2099 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2100 new_temp = make_ssa_name (vec_dest, new_stmt);
2101 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
2102 }
2103 else
ebfd146a 2104 {
b8698a0f
L
2105 /* Generic support */
2106 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
2107 if (op_type != binary_op)
2108 vec_oprnd1 = NULL;
2109 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2110 vec_oprnd1);
2111 new_temp = make_ssa_name (vec_dest, new_stmt);
2112 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 2113 }
ebfd146a
IR
2114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2115
ebfd146a
IR
2116 return new_stmt;
2117}
2118
4a00c761
JJ
2119
2120/* Get vectorized definitions for loop-based vectorization. For the first
2121 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2122 scalar operand), and for the rest we get a copy with
2123 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2124 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2125 The vectors are collected into VEC_OPRNDS. */
2126
2127static void
2128vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 2129 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
2130{
2131 tree vec_oprnd;
2132
2133 /* Get first vector operand. */
2134 /* All the vector operands except the very first one (that is scalar oprnd)
2135 are stmt copies. */
2136 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2137 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2138 else
2139 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2140
9771b263 2141 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2142
2143 /* Get second vector operand. */
2144 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 2145 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
2146
2147 *oprnd = vec_oprnd;
2148
2149 /* For conversion in multiple steps, continue to get operands
2150 recursively. */
2151 if (multi_step_cvt)
2152 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2153}
2154
2155
2156/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2157 For multi-step conversions store the resulting vectors and call the function
2158 recursively. */
2159
2160static void
9771b263 2161vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 2162 int multi_step_cvt, gimple stmt,
9771b263 2163 vec<tree> vec_dsts,
4a00c761
JJ
2164 gimple_stmt_iterator *gsi,
2165 slp_tree slp_node, enum tree_code code,
2166 stmt_vec_info *prev_stmt_info)
2167{
2168 unsigned int i;
2169 tree vop0, vop1, new_tmp, vec_dest;
2170 gimple new_stmt;
2171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2172
9771b263 2173 vec_dest = vec_dsts.pop ();
4a00c761 2174
9771b263 2175 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
2176 {
2177 /* Create demotion operation. */
9771b263
DN
2178 vop0 = (*vec_oprnds)[i];
2179 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
2180 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2181 new_tmp = make_ssa_name (vec_dest, new_stmt);
2182 gimple_assign_set_lhs (new_stmt, new_tmp);
2183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2184
2185 if (multi_step_cvt)
2186 /* Store the resulting vector for next recursive call. */
9771b263 2187 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
2188 else
2189 {
2190 /* This is the last step of the conversion sequence. Store the
2191 vectors in SLP_NODE or in vector info of the scalar statement
2192 (or in STMT_VINFO_RELATED_STMT chain). */
2193 if (slp_node)
9771b263 2194 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2195 else
2196 {
2197 if (!*prev_stmt_info)
2198 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2199 else
2200 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2201
2202 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2203 }
2204 }
2205 }
2206
2207 /* For multi-step demotion operations we first generate demotion operations
2208 from the source type to the intermediate types, and then combine the
2209 results (stored in VEC_OPRNDS) in demotion operation to the destination
2210 type. */
2211 if (multi_step_cvt)
2212 {
2213 /* At each level of recursion we have half of the operands we had at the
2214 previous level. */
9771b263 2215 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
2216 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2217 stmt, vec_dsts, gsi, slp_node,
2218 VEC_PACK_TRUNC_EXPR,
2219 prev_stmt_info);
2220 }
2221
9771b263 2222 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2223}
2224
2225
2226/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2227 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2228 the resulting vectors and call the function recursively. */
2229
2230static void
9771b263
DN
2231vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2232 vec<tree> *vec_oprnds1,
4a00c761
JJ
2233 gimple stmt, tree vec_dest,
2234 gimple_stmt_iterator *gsi,
2235 enum tree_code code1,
2236 enum tree_code code2, tree decl1,
2237 tree decl2, int op_type)
2238{
2239 int i;
2240 tree vop0, vop1, new_tmp1, new_tmp2;
2241 gimple new_stmt1, new_stmt2;
9771b263 2242 vec<tree> vec_tmp = vec<tree>();
4a00c761 2243
9771b263
DN
2244 vec_tmp.create (vec_oprnds0->length () * 2);
2245 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
2246 {
2247 if (op_type == binary_op)
9771b263 2248 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
2249 else
2250 vop1 = NULL_TREE;
2251
2252 /* Generate the two halves of promotion operation. */
2253 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2254 op_type, vec_dest, gsi, stmt);
2255 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2256 op_type, vec_dest, gsi, stmt);
2257 if (is_gimple_call (new_stmt1))
2258 {
2259 new_tmp1 = gimple_call_lhs (new_stmt1);
2260 new_tmp2 = gimple_call_lhs (new_stmt2);
2261 }
2262 else
2263 {
2264 new_tmp1 = gimple_assign_lhs (new_stmt1);
2265 new_tmp2 = gimple_assign_lhs (new_stmt2);
2266 }
2267
2268 /* Store the results for the next step. */
9771b263
DN
2269 vec_tmp.quick_push (new_tmp1);
2270 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
2271 }
2272
9771b263 2273 vec_oprnds0->truncate (0);
4a00c761
JJ
2274 *vec_oprnds0 = vec_tmp;
2275}
2276
2277
b8698a0f
L
2278/* Check if STMT performs a conversion operation, that can be vectorized.
2279 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 2280 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
2281 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2282
2283static bool
2284vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2285 gimple *vec_stmt, slp_tree slp_node)
2286{
2287 tree vec_dest;
2288 tree scalar_dest;
4a00c761 2289 tree op0, op1 = NULL_TREE;
ebfd146a
IR
2290 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2292 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2293 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 2294 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
2295 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2296 tree new_temp;
2297 tree def;
2298 gimple def_stmt;
2299 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2300 gimple new_stmt = NULL;
2301 stmt_vec_info prev_stmt_info;
2302 int nunits_in;
2303 int nunits_out;
2304 tree vectype_out, vectype_in;
4a00c761
JJ
2305 int ncopies, i, j;
2306 tree lhs_type, rhs_type;
ebfd146a 2307 enum { NARROW, NONE, WIDEN } modifier;
9771b263
DN
2308 vec<tree> vec_oprnds0 = vec<tree>();
2309 vec<tree> vec_oprnds1 = vec<tree>();
ebfd146a 2310 tree vop0;
4a00c761
JJ
2311 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2312 int multi_step_cvt = 0;
9771b263
DN
2313 vec<tree> vec_dsts = vec<tree>();
2314 vec<tree> interm_types = vec<tree>();
4a00c761
JJ
2315 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2316 int op_type;
2317 enum machine_mode rhs_mode;
2318 unsigned short fltsz;
ebfd146a
IR
2319
2320 /* Is STMT a vectorizable conversion? */
2321
4a00c761 2322 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2323 return false;
2324
8644a673 2325 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2326 return false;
2327
2328 if (!is_gimple_assign (stmt))
2329 return false;
2330
2331 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2332 return false;
2333
2334 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
2335 if (!CONVERT_EXPR_CODE_P (code)
2336 && code != FIX_TRUNC_EXPR
2337 && code != FLOAT_EXPR
2338 && code != WIDEN_MULT_EXPR
2339 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
2340 return false;
2341
4a00c761
JJ
2342 op_type = TREE_CODE_LENGTH (code);
2343
ebfd146a 2344 /* Check types of lhs and rhs. */
b690cc0f 2345 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 2346 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
2347 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2348
ebfd146a
IR
2349 op0 = gimple_assign_rhs1 (stmt);
2350 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
2351
2352 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2353 && !((INTEGRAL_TYPE_P (lhs_type)
2354 && INTEGRAL_TYPE_P (rhs_type))
2355 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2356 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2357 return false;
2358
2359 if ((INTEGRAL_TYPE_P (lhs_type)
2360 && (TYPE_PRECISION (lhs_type)
2361 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2362 || (INTEGRAL_TYPE_P (rhs_type)
2363 && (TYPE_PRECISION (rhs_type)
2364 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2365 {
73fbfcad 2366 if (dump_enabled_p ())
78c60e3d
SS
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "type conversion to/from bit-precision unsupported.");
4a00c761
JJ
2369 return false;
2370 }
2371
b690cc0f 2372 /* Check the operands of the operation. */
24ee1384 2373 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
2374 &def_stmt, &def, &dt[0], &vectype_in))
2375 {
73fbfcad 2376 if (dump_enabled_p ())
78c60e3d
SS
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "use not simple.");
b690cc0f
RG
2379 return false;
2380 }
4a00c761
JJ
2381 if (op_type == binary_op)
2382 {
2383 bool ok;
2384
2385 op1 = gimple_assign_rhs2 (stmt);
2386 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2387 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2388 OP1. */
2389 if (CONSTANT_CLASS_P (op0))
f5709183 2390 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
2391 &def_stmt, &def, &dt[1], &vectype_in);
2392 else
f5709183 2393 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2394 &def, &dt[1]);
4a00c761
JJ
2395
2396 if (!ok)
2397 {
73fbfcad 2398 if (dump_enabled_p ())
78c60e3d
SS
2399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2400 "use not simple.");
4a00c761
JJ
2401 return false;
2402 }
2403 }
2404
b690cc0f
RG
2405 /* If op0 is an external or constant defs use a vector type of
2406 the same size as the output vector type. */
ebfd146a 2407 if (!vectype_in)
b690cc0f 2408 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2409 if (vec_stmt)
2410 gcc_assert (vectype_in);
2411 if (!vectype_in)
2412 {
73fbfcad 2413 if (dump_enabled_p ())
4a00c761 2414 {
78c60e3d
SS
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416 "no vectype for scalar type ");
2417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4a00c761 2418 }
7d8930a0
IR
2419
2420 return false;
2421 }
ebfd146a 2422
b690cc0f
RG
2423 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2424 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 2425 if (nunits_in < nunits_out)
ebfd146a
IR
2426 modifier = NARROW;
2427 else if (nunits_out == nunits_in)
2428 modifier = NONE;
ebfd146a 2429 else
4a00c761 2430 modifier = WIDEN;
ebfd146a 2431
ff802fa1
IR
2432 /* Multiple types in SLP are handled by creating the appropriate number of
2433 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2434 case of SLP. */
437f4a00 2435 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 2436 ncopies = 1;
4a00c761
JJ
2437 else if (modifier == NARROW)
2438 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2439 else
2440 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 2441
ebfd146a
IR
2442 /* Sanity check: make sure that at least one copy of the vectorized stmt
2443 needs to be generated. */
2444 gcc_assert (ncopies >= 1);
2445
ebfd146a 2446 /* Supportable by target? */
4a00c761 2447 switch (modifier)
ebfd146a 2448 {
4a00c761
JJ
2449 case NONE:
2450 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2451 return false;
2452 if (supportable_convert_operation (code, vectype_out, vectype_in,
2453 &decl1, &code1))
2454 break;
2455 /* FALLTHRU */
2456 unsupported:
73fbfcad 2457 if (dump_enabled_p ())
78c60e3d
SS
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "conversion not supported by target.");
ebfd146a 2460 return false;
ebfd146a 2461
4a00c761
JJ
2462 case WIDEN:
2463 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
2464 &code1, &code2, &multi_step_cvt,
2465 &interm_types))
4a00c761
JJ
2466 {
2467 /* Binary widening operation can only be supported directly by the
2468 architecture. */
2469 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2470 break;
2471 }
2472
2473 if (code != FLOAT_EXPR
2474 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2475 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2476 goto unsupported;
2477
2478 rhs_mode = TYPE_MODE (rhs_type);
2479 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2480 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2481 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2482 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2483 {
2484 cvt_type
2485 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2486 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2487 if (cvt_type == NULL_TREE)
2488 goto unsupported;
2489
2490 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2491 {
2492 if (!supportable_convert_operation (code, vectype_out,
2493 cvt_type, &decl1, &codecvt1))
2494 goto unsupported;
2495 }
2496 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
2497 cvt_type, &codecvt1,
2498 &codecvt2, &multi_step_cvt,
4a00c761
JJ
2499 &interm_types))
2500 continue;
2501 else
2502 gcc_assert (multi_step_cvt == 0);
2503
2504 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
2505 vectype_in, &code1, &code2,
2506 &multi_step_cvt, &interm_types))
4a00c761
JJ
2507 break;
2508 }
2509
2510 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2511 goto unsupported;
2512
2513 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2514 codecvt2 = ERROR_MARK;
2515 else
2516 {
2517 multi_step_cvt++;
9771b263 2518 interm_types.safe_push (cvt_type);
4a00c761
JJ
2519 cvt_type = NULL_TREE;
2520 }
2521 break;
2522
2523 case NARROW:
2524 gcc_assert (op_type == unary_op);
2525 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2526 &code1, &multi_step_cvt,
2527 &interm_types))
2528 break;
2529
2530 if (code != FIX_TRUNC_EXPR
2531 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2532 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2533 goto unsupported;
2534
2535 rhs_mode = TYPE_MODE (rhs_type);
2536 cvt_type
2537 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2538 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2539 if (cvt_type == NULL_TREE)
2540 goto unsupported;
2541 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2542 &decl1, &codecvt1))
2543 goto unsupported;
2544 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2545 &code1, &multi_step_cvt,
2546 &interm_types))
2547 break;
2548 goto unsupported;
2549
2550 default:
2551 gcc_unreachable ();
ebfd146a
IR
2552 }
2553
2554 if (!vec_stmt) /* transformation not required. */
2555 {
73fbfcad 2556 if (dump_enabled_p ())
78c60e3d
SS
2557 dump_printf_loc (MSG_NOTE, vect_location,
2558 "=== vectorizable_conversion ===");
4a00c761 2559 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
2560 {
2561 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 2562 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 2563 }
4a00c761
JJ
2564 else if (modifier == NARROW)
2565 {
2566 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 2567 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
2568 }
2569 else
2570 {
2571 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 2572 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 2573 }
9771b263 2574 interm_types.release ();
ebfd146a
IR
2575 return true;
2576 }
2577
2578 /** Transform. **/
73fbfcad 2579 if (dump_enabled_p ())
78c60e3d
SS
2580 dump_printf_loc (MSG_NOTE, vect_location,
2581 "transform conversion. ncopies = %d.", ncopies);
ebfd146a 2582
4a00c761
JJ
2583 if (op_type == binary_op)
2584 {
2585 if (CONSTANT_CLASS_P (op0))
2586 op0 = fold_convert (TREE_TYPE (op1), op0);
2587 else if (CONSTANT_CLASS_P (op1))
2588 op1 = fold_convert (TREE_TYPE (op0), op1);
2589 }
2590
2591 /* In case of multi-step conversion, we first generate conversion operations
2592 to the intermediate types, and then from that types to the final one.
2593 We create vector destinations for the intermediate type (TYPES) received
2594 from supportable_*_operation, and store them in the correct order
2595 for future use in vect_create_vectorized_*_stmts (). */
9771b263 2596 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
2597 vec_dest = vect_create_destination_var (scalar_dest,
2598 (cvt_type && modifier == WIDEN)
2599 ? cvt_type : vectype_out);
9771b263 2600 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2601
2602 if (multi_step_cvt)
2603 {
9771b263
DN
2604 for (i = interm_types.length () - 1;
2605 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
2606 {
2607 vec_dest = vect_create_destination_var (scalar_dest,
2608 intermediate_type);
9771b263 2609 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
2610 }
2611 }
ebfd146a 2612
4a00c761 2613 if (cvt_type)
82294ec1
JJ
2614 vec_dest = vect_create_destination_var (scalar_dest,
2615 modifier == WIDEN
2616 ? vectype_out : cvt_type);
4a00c761
JJ
2617
2618 if (!slp_node)
2619 {
2620 if (modifier == NONE)
9771b263 2621 vec_oprnds0.create (1);
4a00c761
JJ
2622 else if (modifier == WIDEN)
2623 {
9771b263 2624 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
4a00c761 2625 if (op_type == binary_op)
9771b263 2626 vec_oprnds1.create (1);
4a00c761
JJ
2627 }
2628 else
9771b263
DN
2629 vec_oprnds0.create (
2630 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
2631 }
2632 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 2633 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 2634
4a00c761 2635 last_oprnd = op0;
ebfd146a
IR
2636 prev_stmt_info = NULL;
2637 switch (modifier)
2638 {
2639 case NONE:
2640 for (j = 0; j < ncopies; j++)
2641 {
ebfd146a 2642 if (j == 0)
d092494c
IR
2643 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2644 -1);
ebfd146a
IR
2645 else
2646 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2647
9771b263 2648 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2649 {
2650 /* Arguments are ready, create the new vector stmt. */
2651 if (code1 == CALL_EXPR)
2652 {
2653 new_stmt = gimple_build_call (decl1, 1, vop0);
2654 new_temp = make_ssa_name (vec_dest, new_stmt);
2655 gimple_call_set_lhs (new_stmt, new_temp);
2656 }
2657 else
2658 {
2659 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2660 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2661 vop0, NULL);
2662 new_temp = make_ssa_name (vec_dest, new_stmt);
2663 gimple_assign_set_lhs (new_stmt, new_temp);
2664 }
2665
2666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2667 if (slp_node)
9771b263 2668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2669 }
2670
ebfd146a
IR
2671 if (j == 0)
2672 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2673 else
2674 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2675 prev_stmt_info = vinfo_for_stmt (new_stmt);
2676 }
2677 break;
2678
2679 case WIDEN:
2680 /* In case the vectorization factor (VF) is bigger than the number
2681 of elements that we can fit in a vectype (nunits), we have to
2682 generate more than one vector stmt - i.e - we need to "unroll"
2683 the vector stmt by a factor VF/nunits. */
2684 for (j = 0; j < ncopies; j++)
2685 {
4a00c761 2686 /* Handle uses. */
ebfd146a 2687 if (j == 0)
4a00c761
JJ
2688 {
2689 if (slp_node)
2690 {
2691 if (code == WIDEN_LSHIFT_EXPR)
2692 {
2693 unsigned int k;
ebfd146a 2694
4a00c761
JJ
2695 vec_oprnd1 = op1;
2696 /* Store vec_oprnd1 for every vector stmt to be created
2697 for SLP_NODE. We check during the analysis that all
2698 the shift arguments are the same. */
2699 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 2700 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2701
2702 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2703 slp_node, -1);
2704 }
2705 else
2706 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2707 &vec_oprnds1, slp_node, -1);
2708 }
2709 else
2710 {
2711 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 2712 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2713 if (op_type == binary_op)
2714 {
2715 if (code == WIDEN_LSHIFT_EXPR)
2716 vec_oprnd1 = op1;
2717 else
2718 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2719 NULL);
9771b263 2720 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2721 }
2722 }
2723 }
ebfd146a 2724 else
4a00c761
JJ
2725 {
2726 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
2727 vec_oprnds0.truncate (0);
2728 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
2729 if (op_type == binary_op)
2730 {
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 vec_oprnd1 = op1;
2733 else
2734 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2735 vec_oprnd1);
9771b263
DN
2736 vec_oprnds1.truncate (0);
2737 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
2738 }
2739 }
ebfd146a 2740
4a00c761
JJ
2741 /* Arguments are ready. Create the new vector stmts. */
2742 for (i = multi_step_cvt; i >= 0; i--)
2743 {
9771b263 2744 tree this_dest = vec_dsts[i];
4a00c761
JJ
2745 enum tree_code c1 = code1, c2 = code2;
2746 if (i == 0 && codecvt2 != ERROR_MARK)
2747 {
2748 c1 = codecvt1;
2749 c2 = codecvt2;
2750 }
2751 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2752 &vec_oprnds1,
2753 stmt, this_dest, gsi,
2754 c1, c2, decl1, decl2,
2755 op_type);
2756 }
2757
9771b263 2758 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2759 {
2760 if (cvt_type)
2761 {
2762 if (codecvt1 == CALL_EXPR)
2763 {
2764 new_stmt = gimple_build_call (decl1, 1, vop0);
2765 new_temp = make_ssa_name (vec_dest, new_stmt);
2766 gimple_call_set_lhs (new_stmt, new_temp);
2767 }
2768 else
2769 {
2770 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2771 new_temp = make_ssa_name (vec_dest, NULL);
2772 new_stmt = gimple_build_assign_with_ops (codecvt1,
2773 new_temp,
2774 vop0, NULL);
2775 }
2776
2777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2778 }
2779 else
2780 new_stmt = SSA_NAME_DEF_STMT (vop0);
2781
2782 if (slp_node)
9771b263 2783 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
2784 else
2785 {
2786 if (!prev_stmt_info)
2787 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2788 else
2789 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2790 prev_stmt_info = vinfo_for_stmt (new_stmt);
2791 }
2792 }
ebfd146a 2793 }
4a00c761
JJ
2794
2795 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
2796 break;
2797
2798 case NARROW:
2799 /* In case the vectorization factor (VF) is bigger than the number
2800 of elements that we can fit in a vectype (nunits), we have to
2801 generate more than one vector stmt - i.e - we need to "unroll"
2802 the vector stmt by a factor VF/nunits. */
2803 for (j = 0; j < ncopies; j++)
2804 {
2805 /* Handle uses. */
4a00c761
JJ
2806 if (slp_node)
2807 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2808 slp_node, -1);
ebfd146a
IR
2809 else
2810 {
9771b263 2811 vec_oprnds0.truncate (0);
4a00c761
JJ
2812 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2813 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
2814 }
2815
4a00c761
JJ
2816 /* Arguments are ready. Create the new vector stmts. */
2817 if (cvt_type)
9771b263 2818 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
2819 {
2820 if (codecvt1 == CALL_EXPR)
2821 {
2822 new_stmt = gimple_build_call (decl1, 1, vop0);
2823 new_temp = make_ssa_name (vec_dest, new_stmt);
2824 gimple_call_set_lhs (new_stmt, new_temp);
2825 }
2826 else
2827 {
2828 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2829 new_temp = make_ssa_name (vec_dest, NULL);
2830 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2831 vop0, NULL);
2832 }
ebfd146a 2833
4a00c761 2834 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2835 vec_oprnds0[i] = new_temp;
4a00c761 2836 }
ebfd146a 2837
4a00c761
JJ
2838 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2839 stmt, vec_dsts, gsi,
2840 slp_node, code1,
2841 &prev_stmt_info);
ebfd146a
IR
2842 }
2843
2844 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 2845 break;
ebfd146a
IR
2846 }
2847
9771b263
DN
2848 vec_oprnds0.release ();
2849 vec_oprnds1.release ();
2850 vec_dsts.release ();
2851 interm_types.release ();
ebfd146a
IR
2852
2853 return true;
2854}
ff802fa1
IR
2855
2856
ebfd146a
IR
2857/* Function vectorizable_assignment.
2858
b8698a0f
L
2859 Check if STMT performs an assignment (copy) that can be vectorized.
2860 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2861 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2863
2864static bool
2865vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2866 gimple *vec_stmt, slp_tree slp_node)
2867{
2868 tree vec_dest;
2869 tree scalar_dest;
2870 tree op;
2871 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2874 tree new_temp;
2875 tree def;
2876 gimple def_stmt;
2877 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 2878 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 2879 int ncopies;
f18b55bd 2880 int i, j;
9771b263 2881 vec<tree> vec_oprnds = vec<tree>();
ebfd146a 2882 tree vop;
a70d6342 2883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
2884 gimple new_stmt = NULL;
2885 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
2886 enum tree_code code;
2887 tree vectype_in;
ebfd146a
IR
2888
2889 /* Multiple types in SLP are handled by creating the appropriate number of
2890 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2891 case of SLP. */
437f4a00 2892 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
2893 ncopies = 1;
2894 else
2895 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2896
2897 gcc_assert (ncopies >= 1);
ebfd146a 2898
a70d6342 2899 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2900 return false;
2901
8644a673 2902 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2903 return false;
2904
2905 /* Is vectorizable assignment? */
2906 if (!is_gimple_assign (stmt))
2907 return false;
2908
2909 scalar_dest = gimple_assign_lhs (stmt);
2910 if (TREE_CODE (scalar_dest) != SSA_NAME)
2911 return false;
2912
fde9c428 2913 code = gimple_assign_rhs_code (stmt);
ebfd146a 2914 if (gimple_assign_single_p (stmt)
fde9c428
RG
2915 || code == PAREN_EXPR
2916 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
2917 op = gimple_assign_rhs1 (stmt);
2918 else
2919 return false;
2920
7b7ec6c5
RG
2921 if (code == VIEW_CONVERT_EXPR)
2922 op = TREE_OPERAND (op, 0);
2923
24ee1384 2924 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 2925 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 2926 {
73fbfcad 2927 if (dump_enabled_p ())
78c60e3d
SS
2928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2929 "use not simple.");
ebfd146a
IR
2930 return false;
2931 }
2932
fde9c428
RG
2933 /* We can handle NOP_EXPR conversions that do not change the number
2934 of elements or the vector size. */
7b7ec6c5
RG
2935 if ((CONVERT_EXPR_CODE_P (code)
2936 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
2937 && (!vectype_in
2938 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2939 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2940 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2941 return false;
2942
7b7b1813
RG
2943 /* We do not handle bit-precision changes. */
2944 if ((CONVERT_EXPR_CODE_P (code)
2945 || code == VIEW_CONVERT_EXPR)
2946 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2947 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2948 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2949 || ((TYPE_PRECISION (TREE_TYPE (op))
2950 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2951 /* But a conversion that does not change the bit-pattern is ok. */
2952 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2953 > TYPE_PRECISION (TREE_TYPE (op)))
2954 && TYPE_UNSIGNED (TREE_TYPE (op))))
2955 {
73fbfcad 2956 if (dump_enabled_p ())
78c60e3d
SS
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "type conversion to/from bit-precision "
2959 "unsupported.");
7b7b1813
RG
2960 return false;
2961 }
2962
ebfd146a
IR
2963 if (!vec_stmt) /* transformation not required. */
2964 {
2965 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 2966 if (dump_enabled_p ())
78c60e3d
SS
2967 dump_printf_loc (MSG_NOTE, vect_location,
2968 "=== vectorizable_assignment ===");
c3e7ee41 2969 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2970 return true;
2971 }
2972
2973 /** Transform. **/
73fbfcad 2974 if (dump_enabled_p ())
78c60e3d 2975 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
ebfd146a
IR
2976
2977 /* Handle def. */
2978 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2979
2980 /* Handle use. */
f18b55bd 2981 for (j = 0; j < ncopies; j++)
ebfd146a 2982 {
f18b55bd
IR
2983 /* Handle uses. */
2984 if (j == 0)
d092494c 2985 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
2986 else
2987 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2988
2989 /* Arguments are ready. create the new vector stmt. */
9771b263 2990 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 2991 {
7b7ec6c5
RG
2992 if (CONVERT_EXPR_CODE_P (code)
2993 || code == VIEW_CONVERT_EXPR)
4a73490d 2994 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2995 new_stmt = gimple_build_assign (vec_dest, vop);
2996 new_temp = make_ssa_name (vec_dest, new_stmt);
2997 gimple_assign_set_lhs (new_stmt, new_temp);
2998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2999 if (slp_node)
9771b263 3000 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 3001 }
ebfd146a
IR
3002
3003 if (slp_node)
f18b55bd
IR
3004 continue;
3005
3006 if (j == 0)
3007 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3008 else
3009 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3010
3011 prev_stmt_info = vinfo_for_stmt (new_stmt);
3012 }
b8698a0f 3013
9771b263 3014 vec_oprnds.release ();
ebfd146a
IR
3015 return true;
3016}
3017
9dc3f7de 3018
1107f3ae
IR
3019/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3020 either as shift by a scalar or by a vector. */
3021
3022bool
3023vect_supportable_shift (enum tree_code code, tree scalar_type)
3024{
3025
3026 enum machine_mode vec_mode;
3027 optab optab;
3028 int icode;
3029 tree vectype;
3030
3031 vectype = get_vectype_for_scalar_type (scalar_type);
3032 if (!vectype)
3033 return false;
3034
3035 optab = optab_for_tree_code (code, vectype, optab_scalar);
3036 if (!optab
3037 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3038 {
3039 optab = optab_for_tree_code (code, vectype, optab_vector);
3040 if (!optab
3041 || (optab_handler (optab, TYPE_MODE (vectype))
3042 == CODE_FOR_nothing))
3043 return false;
3044 }
3045
3046 vec_mode = TYPE_MODE (vectype);
3047 icode = (int) optab_handler (optab, vec_mode);
3048 if (icode == CODE_FOR_nothing)
3049 return false;
3050
3051 return true;
3052}
3053
3054
9dc3f7de
IR
3055/* Function vectorizable_shift.
3056
3057 Check if STMT performs a shift operation that can be vectorized.
3058 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3059 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3060 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3061
3062static bool
3063vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3064 gimple *vec_stmt, slp_tree slp_node)
3065{
3066 tree vec_dest;
3067 tree scalar_dest;
3068 tree op0, op1 = NULL;
3069 tree vec_oprnd1 = NULL_TREE;
3070 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3071 tree vectype;
3072 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3073 enum tree_code code;
3074 enum machine_mode vec_mode;
3075 tree new_temp;
3076 optab optab;
3077 int icode;
3078 enum machine_mode optab_op2_mode;
3079 tree def;
3080 gimple def_stmt;
3081 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3082 gimple new_stmt = NULL;
3083 stmt_vec_info prev_stmt_info;
3084 int nunits_in;
3085 int nunits_out;
3086 tree vectype_out;
cede2577 3087 tree op1_vectype;
9dc3f7de
IR
3088 int ncopies;
3089 int j, i;
9771b263
DN
3090 vec<tree> vec_oprnds0 = vec<tree>();
3091 vec<tree> vec_oprnds1 = vec<tree>();
9dc3f7de
IR
3092 tree vop0, vop1;
3093 unsigned int k;
49eab32e 3094 bool scalar_shift_arg = true;
9dc3f7de
IR
3095 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3096 int vf;
3097
3098 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3099 return false;
3100
3101 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3102 return false;
3103
3104 /* Is STMT a vectorizable binary/unary operation? */
3105 if (!is_gimple_assign (stmt))
3106 return false;
3107
3108 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3109 return false;
3110
3111 code = gimple_assign_rhs_code (stmt);
3112
3113 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3114 || code == RROTATE_EXPR))
3115 return false;
3116
3117 scalar_dest = gimple_assign_lhs (stmt);
3118 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
3119 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3121 {
73fbfcad 3122 if (dump_enabled_p ())
78c60e3d
SS
3123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3124 "bit-precision shifts not supported.");
7b7b1813
RG
3125 return false;
3126 }
9dc3f7de
IR
3127
3128 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3129 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
3130 &def_stmt, &def, &dt[0], &vectype))
3131 {
73fbfcad 3132 if (dump_enabled_p ())
78c60e3d
SS
3133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3134 "use not simple.");
9dc3f7de
IR
3135 return false;
3136 }
3137 /* If op0 is an external or constant def use a vector type with
3138 the same size as the output vector type. */
3139 if (!vectype)
3140 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3141 if (vec_stmt)
3142 gcc_assert (vectype);
3143 if (!vectype)
3144 {
73fbfcad 3145 if (dump_enabled_p ())
78c60e3d
SS
3146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3147 "no vectype for scalar type ");
9dc3f7de
IR
3148 return false;
3149 }
3150
3151 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3152 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3153 if (nunits_out != nunits_in)
3154 return false;
3155
3156 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3157 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3158 &def, &dt[1], &op1_vectype))
9dc3f7de 3159 {
73fbfcad 3160 if (dump_enabled_p ())
78c60e3d
SS
3161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3162 "use not simple.");
9dc3f7de
IR
3163 return false;
3164 }
3165
3166 if (loop_vinfo)
3167 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3168 else
3169 vf = 1;
3170
3171 /* Multiple types in SLP are handled by creating the appropriate number of
3172 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3173 case of SLP. */
437f4a00 3174 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
3175 ncopies = 1;
3176 else
3177 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3178
3179 gcc_assert (ncopies >= 1);
3180
3181 /* Determine whether the shift amount is a vector, or scalar. If the
3182 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3183
49eab32e
JJ
3184 if (dt[1] == vect_internal_def && !slp_node)
3185 scalar_shift_arg = false;
3186 else if (dt[1] == vect_constant_def
3187 || dt[1] == vect_external_def
3188 || dt[1] == vect_internal_def)
3189 {
3190 /* In SLP, need to check whether the shift count is the same,
3191 in loops if it is a constant or invariant, it is always
3192 a scalar shift. */
3193 if (slp_node)
3194 {
9771b263 3195 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
3196 gimple slpstmt;
3197
9771b263 3198 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
3199 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3200 scalar_shift_arg = false;
3201 }
3202 }
3203 else
3204 {
73fbfcad 3205 if (dump_enabled_p ())
78c60e3d
SS
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "operand mode requires invariant argument.");
49eab32e
JJ
3208 return false;
3209 }
3210
9dc3f7de 3211 /* Vector shifted by vector. */
49eab32e 3212 if (!scalar_shift_arg)
9dc3f7de
IR
3213 {
3214 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 3215 if (dump_enabled_p ())
78c60e3d
SS
3216 dump_printf_loc (MSG_NOTE, vect_location,
3217 "vector/vector shift/rotate found.");
3218
aa948027
JJ
3219 if (!op1_vectype)
3220 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3221 if (op1_vectype == NULL_TREE
3222 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 3223 {
73fbfcad 3224 if (dump_enabled_p ())
78c60e3d
SS
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3226 "unusable type for last operand in"
3227 " vector/vector shift/rotate.");
cede2577
JJ
3228 return false;
3229 }
9dc3f7de
IR
3230 }
3231 /* See if the machine has a vector shifted by scalar insn and if not
3232 then see if it has a vector shifted by vector insn. */
49eab32e 3233 else
9dc3f7de
IR
3234 {
3235 optab = optab_for_tree_code (code, vectype, optab_scalar);
3236 if (optab
3237 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3238 {
73fbfcad 3239 if (dump_enabled_p ())
78c60e3d
SS
3240 dump_printf_loc (MSG_NOTE, vect_location,
3241 "vector/scalar shift/rotate found.");
9dc3f7de
IR
3242 }
3243 else
3244 {
3245 optab = optab_for_tree_code (code, vectype, optab_vector);
3246 if (optab
3247 && (optab_handler (optab, TYPE_MODE (vectype))
3248 != CODE_FOR_nothing))
3249 {
49eab32e
JJ
3250 scalar_shift_arg = false;
3251
73fbfcad 3252 if (dump_enabled_p ())
78c60e3d
SS
3253 dump_printf_loc (MSG_NOTE, vect_location,
3254 "vector/vector shift/rotate found.");
9dc3f7de
IR
3255
3256 /* Unlike the other binary operators, shifts/rotates have
3257 the rhs being int, instead of the same type as the lhs,
3258 so make sure the scalar is the right type if we are
aa948027 3259 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
3260 if (dt[1] == vect_constant_def)
3261 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
3262 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3263 TREE_TYPE (op1)))
3264 {
3265 if (slp_node
3266 && TYPE_MODE (TREE_TYPE (vectype))
3267 != TYPE_MODE (TREE_TYPE (op1)))
3268 {
73fbfcad 3269 if (dump_enabled_p ())
78c60e3d
SS
3270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3271 "unusable type for last operand in"
3272 " vector/vector shift/rotate.");
aa948027
JJ
3273 return false;
3274 }
3275 if (vec_stmt && !slp_node)
3276 {
3277 op1 = fold_convert (TREE_TYPE (vectype), op1);
3278 op1 = vect_init_vector (stmt, op1,
3279 TREE_TYPE (vectype), NULL);
3280 }
3281 }
9dc3f7de
IR
3282 }
3283 }
3284 }
9dc3f7de
IR
3285
3286 /* Supportable by target? */
3287 if (!optab)
3288 {
73fbfcad 3289 if (dump_enabled_p ())
78c60e3d
SS
3290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3291 "no optab.");
9dc3f7de
IR
3292 return false;
3293 }
3294 vec_mode = TYPE_MODE (vectype);
3295 icode = (int) optab_handler (optab, vec_mode);
3296 if (icode == CODE_FOR_nothing)
3297 {
73fbfcad 3298 if (dump_enabled_p ())
78c60e3d
SS
3299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3300 "op not supported by target.");
9dc3f7de
IR
3301 /* Check only during analysis. */
3302 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3303 || (vf < vect_min_worthwhile_factor (code)
3304 && !vec_stmt))
3305 return false;
73fbfcad 3306 if (dump_enabled_p ())
78c60e3d 3307 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
9dc3f7de
IR
3308 }
3309
3310 /* Worthwhile without SIMD support? Check only during analysis. */
3311 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3312 && vf < vect_min_worthwhile_factor (code)
3313 && !vec_stmt)
3314 {
73fbfcad 3315 if (dump_enabled_p ())
78c60e3d
SS
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 "not worthwhile without SIMD support.");
9dc3f7de
IR
3318 return false;
3319 }
3320
3321 if (!vec_stmt) /* transformation not required. */
3322 {
3323 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 3324 if (dump_enabled_p ())
78c60e3d 3325 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
c3e7ee41 3326 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
3327 return true;
3328 }
3329
3330 /** Transform. **/
3331
73fbfcad 3332 if (dump_enabled_p ())
78c60e3d
SS
3333 dump_printf_loc (MSG_NOTE, vect_location,
3334 "transform binary/unary operation.");
9dc3f7de
IR
3335
3336 /* Handle def. */
3337 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3338
3339 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3340 created in the previous stages of the recursion, so no allocation is
3341 needed, except for the case of shift with scalar shift argument. In that
3342 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3343 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3344 In case of loop-based vectorization we allocate VECs of size 1. We
3345 allocate VEC_OPRNDS1 only in case of binary operation. */
3346 if (!slp_node)
3347 {
9771b263
DN
3348 vec_oprnds0.create (1);
3349 vec_oprnds1.create (1);
9dc3f7de
IR
3350 }
3351 else if (scalar_shift_arg)
9771b263 3352 vec_oprnds1.create (slp_node->vec_stmts_size);
9dc3f7de
IR
3353
3354 prev_stmt_info = NULL;
3355 for (j = 0; j < ncopies; j++)
3356 {
3357 /* Handle uses. */
3358 if (j == 0)
3359 {
3360 if (scalar_shift_arg)
3361 {
3362 /* Vector shl and shr insn patterns can be defined with scalar
3363 operand 2 (shift operand). In this case, use constant or loop
3364 invariant op1 directly, without extending it to vector mode
3365 first. */
3366 optab_op2_mode = insn_data[icode].operand[2].mode;
3367 if (!VECTOR_MODE_P (optab_op2_mode))
3368 {
73fbfcad 3369 if (dump_enabled_p ())
78c60e3d
SS
3370 dump_printf_loc (MSG_NOTE, vect_location,
3371 "operand 1 using scalar mode.");
9dc3f7de 3372 vec_oprnd1 = op1;
9771b263 3373 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3374 if (slp_node)
3375 {
3376 /* Store vec_oprnd1 for every vector stmt to be created
3377 for SLP_NODE. We check during the analysis that all
3378 the shift arguments are the same.
3379 TODO: Allow different constants for different vector
3380 stmts generated for an SLP instance. */
3381 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3382 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
3383 }
3384 }
3385 }
3386
3387 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3388 (a special case for certain kind of vector shifts); otherwise,
3389 operand 1 should be of a vector type (the usual case). */
3390 if (vec_oprnd1)
3391 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 3392 slp_node, -1);
9dc3f7de
IR
3393 else
3394 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 3395 slp_node, -1);
9dc3f7de
IR
3396 }
3397 else
3398 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3399
3400 /* Arguments are ready. Create the new vector stmt. */
9771b263 3401 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 3402 {
9771b263 3403 vop1 = vec_oprnds1[i];
9dc3f7de
IR
3404 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3405 new_temp = make_ssa_name (vec_dest, new_stmt);
3406 gimple_assign_set_lhs (new_stmt, new_temp);
3407 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3408 if (slp_node)
9771b263 3409 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
3410 }
3411
3412 if (slp_node)
3413 continue;
3414
3415 if (j == 0)
3416 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3417 else
3418 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3419 prev_stmt_info = vinfo_for_stmt (new_stmt);
3420 }
3421
9771b263
DN
3422 vec_oprnds0.release ();
3423 vec_oprnds1.release ();
9dc3f7de
IR
3424
3425 return true;
3426}
3427
3428
5deb57cb
JJ
3429static tree permute_vec_elements (tree, tree, tree, gimple,
3430 gimple_stmt_iterator *);
3431
3432
ebfd146a
IR
3433/* Function vectorizable_operation.
3434
16949072
RG
3435 Check if STMT performs a binary, unary or ternary operation that can
3436 be vectorized.
b8698a0f 3437 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3438 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3439 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3440
3441static bool
3442vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3443 gimple *vec_stmt, slp_tree slp_node)
3444{
00f07b86 3445 tree vec_dest;
ebfd146a 3446 tree scalar_dest;
16949072 3447 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 3448 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 3449 tree vectype;
ebfd146a
IR
3450 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3451 enum tree_code code;
3452 enum machine_mode vec_mode;
3453 tree new_temp;
3454 int op_type;
00f07b86 3455 optab optab;
ebfd146a 3456 int icode;
ebfd146a
IR
3457 tree def;
3458 gimple def_stmt;
16949072
RG
3459 enum vect_def_type dt[3]
3460 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
3461 gimple new_stmt = NULL;
3462 stmt_vec_info prev_stmt_info;
b690cc0f 3463 int nunits_in;
ebfd146a
IR
3464 int nunits_out;
3465 tree vectype_out;
3466 int ncopies;
3467 int j, i;
9771b263
DN
3468 vec<tree> vec_oprnds0 = vec<tree>();
3469 vec<tree> vec_oprnds1 = vec<tree>();
3470 vec<tree> vec_oprnds2 = vec<tree>();
16949072 3471 tree vop0, vop1, vop2;
a70d6342
IR
3472 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3473 int vf;
3474
a70d6342 3475 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3476 return false;
3477
8644a673 3478 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3479 return false;
3480
3481 /* Is STMT a vectorizable binary/unary operation? */
3482 if (!is_gimple_assign (stmt))
3483 return false;
3484
3485 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3486 return false;
3487
ebfd146a
IR
3488 code = gimple_assign_rhs_code (stmt);
3489
3490 /* For pointer addition, we should use the normal plus for
3491 the vector addition. */
3492 if (code == POINTER_PLUS_EXPR)
3493 code = PLUS_EXPR;
3494
3495 /* Support only unary or binary operations. */
3496 op_type = TREE_CODE_LENGTH (code);
16949072 3497 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 3498 {
73fbfcad 3499 if (dump_enabled_p ())
78c60e3d
SS
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "num. args = %d (not unary/binary/ternary op).",
3502 op_type);
ebfd146a
IR
3503 return false;
3504 }
3505
b690cc0f
RG
3506 scalar_dest = gimple_assign_lhs (stmt);
3507 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3508
7b7b1813
RG
3509 /* Most operations cannot handle bit-precision types without extra
3510 truncations. */
3511 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3512 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3513 /* Exception are bitwise binary operations. */
3514 && code != BIT_IOR_EXPR
3515 && code != BIT_XOR_EXPR
3516 && code != BIT_AND_EXPR)
3517 {
73fbfcad 3518 if (dump_enabled_p ())
78c60e3d
SS
3519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3520 "bit-precision arithmetic not supported.");
7b7b1813
RG
3521 return false;
3522 }
3523
ebfd146a 3524 op0 = gimple_assign_rhs1 (stmt);
24ee1384 3525 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 3526 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 3527 {
73fbfcad 3528 if (dump_enabled_p ())
78c60e3d
SS
3529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3530 "use not simple.");
ebfd146a
IR
3531 return false;
3532 }
b690cc0f
RG
3533 /* If op0 is an external or constant def use a vector type with
3534 the same size as the output vector type. */
3535 if (!vectype)
3536 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3537 if (vec_stmt)
3538 gcc_assert (vectype);
3539 if (!vectype)
3540 {
73fbfcad 3541 if (dump_enabled_p ())
7d8930a0 3542 {
78c60e3d
SS
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "no vectype for scalar type ");
3545 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3546 TREE_TYPE (op0));
7d8930a0
IR
3547 }
3548
3549 return false;
3550 }
b690cc0f
RG
3551
3552 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3553 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3554 if (nunits_out != nunits_in)
3555 return false;
ebfd146a 3556
16949072 3557 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
3558 {
3559 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
3560 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3561 &def, &dt[1]))
ebfd146a 3562 {
73fbfcad 3563 if (dump_enabled_p ())
78c60e3d
SS
3564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3565 "use not simple.");
ebfd146a
IR
3566 return false;
3567 }
3568 }
16949072
RG
3569 if (op_type == ternary_op)
3570 {
3571 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
3572 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3573 &def, &dt[2]))
16949072 3574 {
73fbfcad 3575 if (dump_enabled_p ())
78c60e3d
SS
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "use not simple.");
16949072
RG
3578 return false;
3579 }
3580 }
ebfd146a 3581
b690cc0f
RG
3582 if (loop_vinfo)
3583 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3584 else
3585 vf = 1;
3586
3587 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3588 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 3589 case of SLP. */
437f4a00 3590 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
3591 ncopies = 1;
3592 else
3593 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3594
3595 gcc_assert (ncopies >= 1);
3596
9dc3f7de 3597 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
3598 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3599 || code == RROTATE_EXPR)
9dc3f7de 3600 return false;
ebfd146a 3601
ebfd146a 3602 /* Supportable by target? */
00f07b86
RH
3603
3604 vec_mode = TYPE_MODE (vectype);
3605 if (code == MULT_HIGHPART_EXPR)
ebfd146a 3606 {
00f07b86 3607 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 3608 icode = LAST_INSN_CODE;
00f07b86
RH
3609 else
3610 icode = CODE_FOR_nothing;
ebfd146a 3611 }
00f07b86
RH
3612 else
3613 {
3614 optab = optab_for_tree_code (code, vectype, optab_default);
3615 if (!optab)
5deb57cb 3616 {
73fbfcad 3617 if (dump_enabled_p ())
78c60e3d
SS
3618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3619 "no optab.");
00f07b86 3620 return false;
5deb57cb 3621 }
00f07b86 3622 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
3623 }
3624
ebfd146a
IR
3625 if (icode == CODE_FOR_nothing)
3626 {
73fbfcad 3627 if (dump_enabled_p ())
78c60e3d
SS
3628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3629 "op not supported by target.");
ebfd146a
IR
3630 /* Check only during analysis. */
3631 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 3632 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 3633 return false;
73fbfcad 3634 if (dump_enabled_p ())
78c60e3d 3635 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
383d9c83
IR
3636 }
3637
4a00c761 3638 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
3639 if (!VECTOR_MODE_P (vec_mode)
3640 && !vec_stmt
3641 && vf < vect_min_worthwhile_factor (code))
7d8930a0 3642 {
73fbfcad 3643 if (dump_enabled_p ())
78c60e3d
SS
3644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3645 "not worthwhile without SIMD support.");
e34842c6 3646 return false;
7d8930a0 3647 }
ebfd146a 3648
ebfd146a
IR
3649 if (!vec_stmt) /* transformation not required. */
3650 {
4a00c761 3651 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 3652 if (dump_enabled_p ())
78c60e3d
SS
3653 dump_printf_loc (MSG_NOTE, vect_location,
3654 "=== vectorizable_operation ===");
c3e7ee41 3655 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
3656 return true;
3657 }
3658
3659 /** Transform. **/
3660
73fbfcad 3661 if (dump_enabled_p ())
78c60e3d
SS
3662 dump_printf_loc (MSG_NOTE, vect_location,
3663 "transform binary/unary operation.");
383d9c83 3664
ebfd146a 3665 /* Handle def. */
00f07b86 3666 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 3667
ebfd146a
IR
3668 /* In case the vectorization factor (VF) is bigger than the number
3669 of elements that we can fit in a vectype (nunits), we have to generate
3670 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
3671 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3672 from one copy of the vector stmt to the next, in the field
3673 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3674 stages to find the correct vector defs to be used when vectorizing
3675 stmts that use the defs of the current stmt. The example below
3676 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3677 we need to create 4 vectorized stmts):
3678
3679 before vectorization:
3680 RELATED_STMT VEC_STMT
3681 S1: x = memref - -
3682 S2: z = x + 1 - -
3683
3684 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3685 there):
3686 RELATED_STMT VEC_STMT
3687 VS1_0: vx0 = memref0 VS1_1 -
3688 VS1_1: vx1 = memref1 VS1_2 -
3689 VS1_2: vx2 = memref2 VS1_3 -
3690 VS1_3: vx3 = memref3 - -
3691 S1: x = load - VS1_0
3692 S2: z = x + 1 - -
3693
3694 step2: vectorize stmt S2 (done here):
3695 To vectorize stmt S2 we first need to find the relevant vector
3696 def for the first operand 'x'. This is, as usual, obtained from
3697 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3698 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3699 relevant vector def 'vx0'. Having found 'vx0' we can generate
3700 the vector stmt VS2_0, and as usual, record it in the
3701 STMT_VINFO_VEC_STMT of stmt S2.
3702 When creating the second copy (VS2_1), we obtain the relevant vector
3703 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3704 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3705 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3706 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3707 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3708 chain of stmts and pointers:
3709 RELATED_STMT VEC_STMT
3710 VS1_0: vx0 = memref0 VS1_1 -
3711 VS1_1: vx1 = memref1 VS1_2 -
3712 VS1_2: vx2 = memref2 VS1_3 -
3713 VS1_3: vx3 = memref3 - -
3714 S1: x = load - VS1_0
3715 VS2_0: vz0 = vx0 + v1 VS2_1 -
3716 VS2_1: vz1 = vx1 + v1 VS2_2 -
3717 VS2_2: vz2 = vx2 + v1 VS2_3 -
3718 VS2_3: vz3 = vx3 + v1 - -
3719 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
3720
3721 prev_stmt_info = NULL;
3722 for (j = 0; j < ncopies; j++)
3723 {
3724 /* Handle uses. */
3725 if (j == 0)
4a00c761
JJ
3726 {
3727 if (op_type == binary_op || op_type == ternary_op)
3728 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3729 slp_node, -1);
3730 else
3731 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3732 slp_node, -1);
3733 if (op_type == ternary_op)
36ba4aae 3734 {
9771b263
DN
3735 vec_oprnds2.create (1);
3736 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3737 stmt,
3738 NULL));
36ba4aae 3739 }
4a00c761 3740 }
ebfd146a 3741 else
4a00c761
JJ
3742 {
3743 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3744 if (op_type == ternary_op)
3745 {
9771b263
DN
3746 tree vec_oprnd = vec_oprnds2.pop ();
3747 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3748 vec_oprnd));
4a00c761
JJ
3749 }
3750 }
3751
3752 /* Arguments are ready. Create the new vector stmt. */
9771b263 3753 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 3754 {
4a00c761 3755 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 3756 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 3757 vop2 = ((op_type == ternary_op)
9771b263 3758 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
3759 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3760 vop0, vop1, vop2);
4a00c761
JJ
3761 new_temp = make_ssa_name (vec_dest, new_stmt);
3762 gimple_assign_set_lhs (new_stmt, new_temp);
3763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3764 if (slp_node)
9771b263 3765 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
3766 }
3767
4a00c761
JJ
3768 if (slp_node)
3769 continue;
3770
3771 if (j == 0)
3772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3773 else
3774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3775 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3776 }
3777
9771b263
DN
3778 vec_oprnds0.release ();
3779 vec_oprnds1.release ();
3780 vec_oprnds2.release ();
ebfd146a 3781
ebfd146a
IR
3782 return true;
3783}
3784
3785
3786/* Function vectorizable_store.
3787
b8698a0f
L
3788 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3789 can be vectorized.
3790 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3791 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3792 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3793
3794static bool
3795vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3796 slp_tree slp_node)
3797{
3798 tree scalar_dest;
3799 tree data_ref;
3800 tree op;
3801 tree vec_oprnd = NULL_TREE;
3802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3803 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3804 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 3805 tree elem_type;
ebfd146a 3806 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3807 struct loop *loop = NULL;
ebfd146a
IR
3808 enum machine_mode vec_mode;
3809 tree dummy;
3810 enum dr_alignment_support alignment_support_scheme;
3811 tree def;
3812 gimple def_stmt;
3813 enum vect_def_type dt;
3814 stmt_vec_info prev_stmt_info = NULL;
3815 tree dataref_ptr = NULL_TREE;
3816 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3817 int ncopies;
3818 int j;
3819 gimple next_stmt, first_stmt = NULL;
0d0293ac 3820 bool grouped_store = false;
272c6793 3821 bool store_lanes_p = false;
ebfd146a 3822 unsigned int group_size, i;
9771b263
DN
3823 vec<tree> dr_chain = vec<tree>();
3824 vec<tree> oprnds = vec<tree>();
3825 vec<tree> result_chain = vec<tree>();
ebfd146a 3826 bool inv_p;
9771b263 3827 vec<tree> vec_oprnds = vec<tree>();
ebfd146a 3828 bool slp = (slp_node != NULL);
ebfd146a 3829 unsigned int vec_num;
a70d6342 3830 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 3831 tree aggr_type;
a70d6342
IR
3832
3833 if (loop_vinfo)
3834 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3835
3836 /* Multiple types in SLP are handled by creating the appropriate number of
3837 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3838 case of SLP. */
437f4a00 3839 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3840 ncopies = 1;
3841 else
3842 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3843
3844 gcc_assert (ncopies >= 1);
3845
3846 /* FORNOW. This restriction should be relaxed. */
a70d6342 3847 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 3848 {
73fbfcad 3849 if (dump_enabled_p ())
78c60e3d
SS
3850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3851 "multiple types in nested loop.");
ebfd146a
IR
3852 return false;
3853 }
3854
a70d6342 3855 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3856 return false;
3857
8644a673 3858 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3859 return false;
3860
3861 /* Is vectorizable store? */
3862
3863 if (!is_gimple_assign (stmt))
3864 return false;
3865
3866 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
3867 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3868 && is_pattern_stmt_p (stmt_info))
3869 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a
IR
3870 if (TREE_CODE (scalar_dest) != ARRAY_REF
3871 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3872 && TREE_CODE (scalar_dest) != COMPONENT_REF
3873 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3874 && TREE_CODE (scalar_dest) != REALPART_EXPR
3875 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3876 return false;
3877
3878 gcc_assert (gimple_assign_single_p (stmt));
3879 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
3880 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3881 &def, &dt))
ebfd146a 3882 {
73fbfcad 3883 if (dump_enabled_p ())
78c60e3d
SS
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "use not simple.");
ebfd146a
IR
3886 return false;
3887 }
3888
272c6793 3889 elem_type = TREE_TYPE (vectype);
ebfd146a 3890 vec_mode = TYPE_MODE (vectype);
7b7b1813 3891
ebfd146a
IR
3892 /* FORNOW. In some cases can vectorize even if data-type not supported
3893 (e.g. - array initialization with 0). */
947131ba 3894 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3895 return false;
3896
3897 if (!STMT_VINFO_DATA_REF (stmt_info))
3898 return false;
3899
a7ce6ec3
RG
3900 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3901 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3902 size_zero_node) < 0)
a1e53f3f 3903 {
73fbfcad 3904 if (dump_enabled_p ())
78c60e3d
SS
3905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3906 "negative step for store.");
a1e53f3f
L
3907 return false;
3908 }
3909
0d0293ac 3910 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 3911 {
0d0293ac 3912 grouped_store = true;
e14c1050 3913 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
3914 if (!slp && !PURE_SLP_STMT (stmt_info))
3915 {
e14c1050 3916 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
3917 if (vect_store_lanes_supported (vectype, group_size))
3918 store_lanes_p = true;
0d0293ac 3919 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
3920 return false;
3921 }
b8698a0f 3922
ebfd146a
IR
3923 if (first_stmt == stmt)
3924 {
3925 /* STMT is the leader of the group. Check the operands of all the
3926 stmts of the group. */
e14c1050 3927 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
3928 while (next_stmt)
3929 {
3930 gcc_assert (gimple_assign_single_p (next_stmt));
3931 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
3932 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3933 &def_stmt, &def, &dt))
ebfd146a 3934 {
73fbfcad 3935 if (dump_enabled_p ())
78c60e3d
SS
3936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3937 "use not simple.");
ebfd146a
IR
3938 return false;
3939 }
e14c1050 3940 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
3941 }
3942 }
3943 }
3944
3945 if (!vec_stmt) /* transformation not required. */
3946 {
3947 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
3948 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3949 NULL, NULL, NULL);
ebfd146a
IR
3950 return true;
3951 }
3952
3953 /** Transform. **/
3954
0d0293ac 3955 if (grouped_store)
ebfd146a
IR
3956 {
3957 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 3958 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 3959
e14c1050 3960 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
3961
3962 /* FORNOW */
a70d6342 3963 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3964
3965 /* We vectorize all the stmts of the interleaving group when we
3966 reach the last stmt in the group. */
e14c1050
IR
3967 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3968 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3969 && !slp)
3970 {
3971 *vec_stmt = NULL;
3972 return true;
3973 }
3974
3975 if (slp)
4b5caab7 3976 {
0d0293ac 3977 grouped_store = false;
4b5caab7
IR
3978 /* VEC_NUM is the number of vect stmts to be created for this
3979 group. */
3980 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 3981 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 3982 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 3983 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 3984 }
ebfd146a 3985 else
4b5caab7
IR
3986 /* VEC_NUM is the number of vect stmts to be created for this
3987 group. */
ebfd146a
IR
3988 vec_num = group_size;
3989 }
b8698a0f 3990 else
ebfd146a
IR
3991 {
3992 first_stmt = stmt;
3993 first_dr = dr;
3994 group_size = vec_num = 1;
ebfd146a 3995 }
b8698a0f 3996
73fbfcad 3997 if (dump_enabled_p ())
78c60e3d
SS
3998 dump_printf_loc (MSG_NOTE, vect_location,
3999 "transform store. ncopies = %d", ncopies);
ebfd146a 4000
9771b263
DN
4001 dr_chain.create (group_size);
4002 oprnds.create (group_size);
ebfd146a 4003
720f5239 4004 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4005 gcc_assert (alignment_support_scheme);
272c6793
RS
4006 /* Targets with store-lane instructions must not require explicit
4007 realignment. */
4008 gcc_assert (!store_lanes_p
4009 || alignment_support_scheme == dr_aligned
4010 || alignment_support_scheme == dr_unaligned_supported);
4011
4012 if (store_lanes_p)
4013 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4014 else
4015 aggr_type = vectype;
ebfd146a
IR
4016
4017 /* In case the vectorization factor (VF) is bigger than the number
4018 of elements that we can fit in a vectype (nunits), we have to generate
4019 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 4020 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
4021 vect_get_vec_def_for_copy_stmt. */
4022
0d0293ac 4023 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4024
4025 S1: &base + 2 = x2
4026 S2: &base = x0
4027 S3: &base + 1 = x1
4028 S4: &base + 3 = x3
4029
4030 We create vectorized stores starting from base address (the access of the
4031 first stmt in the chain (S2 in the above example), when the last store stmt
4032 of the chain (S4) is reached:
4033
4034 VS1: &base = vx2
4035 VS2: &base + vec_size*1 = vx0
4036 VS3: &base + vec_size*2 = vx1
4037 VS4: &base + vec_size*3 = vx3
4038
4039 Then permutation statements are generated:
4040
3fcc1b55
JJ
4041 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4042 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 4043 ...
b8698a0f 4044
ebfd146a
IR
4045 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4046 (the order of the data-refs in the output of vect_permute_store_chain
4047 corresponds to the order of scalar stmts in the interleaving chain - see
4048 the documentation of vect_permute_store_chain()).
4049
4050 In case of both multiple types and interleaving, above vector stores and
ff802fa1 4051 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 4052 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 4053 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
4054 */
4055
4056 prev_stmt_info = NULL;
4057 for (j = 0; j < ncopies; j++)
4058 {
4059 gimple new_stmt;
4060 gimple ptr_incr;
4061
4062 if (j == 0)
4063 {
4064 if (slp)
4065 {
4066 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
4067 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4068 NULL, slp_node, -1);
ebfd146a 4069
9771b263 4070 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
4071 }
4072 else
4073 {
b8698a0f
L
4074 /* For interleaved stores we collect vectorized defs for all the
4075 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4076 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
4077 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4078
0d0293ac 4079 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 4080 OPRNDS are of size 1. */
b8698a0f 4081 next_stmt = first_stmt;
ebfd146a
IR
4082 for (i = 0; i < group_size; i++)
4083 {
b8698a0f
L
4084 /* Since gaps are not supported for interleaved stores,
4085 GROUP_SIZE is the exact number of stmts in the chain.
4086 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4087 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
4088 iteration of the loop will be executed. */
4089 gcc_assert (next_stmt
4090 && gimple_assign_single_p (next_stmt));
4091 op = gimple_assign_rhs1 (next_stmt);
4092
b8698a0f 4093 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 4094 NULL);
9771b263
DN
4095 dr_chain.quick_push (vec_oprnd);
4096 oprnds.quick_push (vec_oprnd);
e14c1050 4097 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4098 }
4099 }
4100
4101 /* We should have catched mismatched types earlier. */
4102 gcc_assert (useless_type_conversion_p (vectype,
4103 TREE_TYPE (vec_oprnd)));
272c6793 4104 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
920e8172
RS
4105 NULL_TREE, &dummy, gsi,
4106 &ptr_incr, false, &inv_p);
a70d6342 4107 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 4108 }
b8698a0f 4109 else
ebfd146a 4110 {
b8698a0f
L
4111 /* For interleaved stores we created vectorized defs for all the
4112 defs stored in OPRNDS in the previous iteration (previous copy).
4113 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
4114 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4115 next copy.
0d0293ac 4116 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
4117 OPRNDS are of size 1. */
4118 for (i = 0; i < group_size; i++)
4119 {
9771b263 4120 op = oprnds[i];
24ee1384
IR
4121 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4122 &def, &dt);
b8698a0f 4123 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
4124 dr_chain[i] = vec_oprnd;
4125 oprnds[i] = vec_oprnd;
ebfd146a 4126 }
272c6793
RS
4127 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4128 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
4129 }
4130
272c6793 4131 if (store_lanes_p)
ebfd146a 4132 {
272c6793 4133 tree vec_array;
267d3070 4134
272c6793
RS
4135 /* Combine all the vectors into an array. */
4136 vec_array = create_vector_array (vectype, vec_num);
4137 for (i = 0; i < vec_num; i++)
c2d7ab2a 4138 {
9771b263 4139 vec_oprnd = dr_chain[i];
272c6793 4140 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 4141 }
b8698a0f 4142
272c6793
RS
4143 /* Emit:
4144 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4145 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4146 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4147 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 4148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4149 }
4150 else
4151 {
4152 new_stmt = NULL;
0d0293ac 4153 if (grouped_store)
272c6793 4154 {
9771b263 4155 result_chain.create (group_size);
272c6793
RS
4156 /* Permute. */
4157 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4158 &result_chain);
4159 }
c2d7ab2a 4160
272c6793
RS
4161 next_stmt = first_stmt;
4162 for (i = 0; i < vec_num; i++)
4163 {
644ffefd 4164 unsigned align, misalign;
272c6793
RS
4165
4166 if (i > 0)
4167 /* Bump the vector pointer. */
4168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4169 stmt, NULL_TREE);
4170
4171 if (slp)
9771b263 4172 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
4173 else if (grouped_store)
4174 /* For grouped stores vectorized defs are interleaved in
272c6793 4175 vect_permute_store_chain(). */
9771b263 4176 vec_oprnd = result_chain[i];
272c6793
RS
4177
4178 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4179 build_int_cst (reference_alias_ptr_type
4180 (DR_REF (first_dr)), 0));
644ffefd 4181 align = TYPE_ALIGN_UNIT (vectype);
272c6793 4182 if (aligned_access_p (first_dr))
644ffefd 4183 misalign = 0;
272c6793
RS
4184 else if (DR_MISALIGNMENT (first_dr) == -1)
4185 {
4186 TREE_TYPE (data_ref)
4187 = build_aligned_type (TREE_TYPE (data_ref),
4188 TYPE_ALIGN (elem_type));
644ffefd
MJ
4189 align = TYPE_ALIGN_UNIT (elem_type);
4190 misalign = 0;
272c6793
RS
4191 }
4192 else
4193 {
4194 TREE_TYPE (data_ref)
4195 = build_aligned_type (TREE_TYPE (data_ref),
4196 TYPE_ALIGN (elem_type));
644ffefd 4197 misalign = DR_MISALIGNMENT (first_dr);
272c6793 4198 }
644ffefd
MJ
4199 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4200 misalign);
c2d7ab2a 4201
272c6793
RS
4202 /* Arguments are ready. Create the new vector stmt. */
4203 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
4205
4206 if (slp)
4207 continue;
4208
e14c1050 4209 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
4210 if (!next_stmt)
4211 break;
4212 }
ebfd146a 4213 }
1da0876c
RS
4214 if (!slp)
4215 {
4216 if (j == 0)
4217 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4218 else
4219 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4220 prev_stmt_info = vinfo_for_stmt (new_stmt);
4221 }
ebfd146a
IR
4222 }
4223
9771b263
DN
4224 dr_chain.release ();
4225 oprnds.release ();
4226 result_chain.release ();
4227 vec_oprnds.release ();
ebfd146a
IR
4228
4229 return true;
4230}
4231
aec7ae7d
JJ
4232/* Given a vector type VECTYPE and permutation SEL returns
4233 the VECTOR_CST mask that implements the permutation of the
4234 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 4235
3fcc1b55
JJ
4236tree
4237vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 4238{
d2a12ae7 4239 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 4240 int i, nunits;
a1e53f3f 4241
22e4dee7 4242 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
4243
4244 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
4245 return NULL;
4246
96f9265a
RG
4247 mask_elt_type = lang_hooks.types.type_for_mode
4248 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 4249 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 4250
d2a12ae7 4251 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 4252 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
4253 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4254 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 4255
2635892a 4256 return mask_vec;
a1e53f3f
L
4257}
4258
aec7ae7d
JJ
4259/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4260 reversal of the vector elements. If that is impossible to do,
4261 returns NULL. */
4262
4263static tree
4264perm_mask_for_reverse (tree vectype)
4265{
4266 int i, nunits;
4267 unsigned char *sel;
4268
4269 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4270 sel = XALLOCAVEC (unsigned char, nunits);
4271
4272 for (i = 0; i < nunits; ++i)
4273 sel[i] = nunits - 1 - i;
4274
3fcc1b55 4275 return vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4276}
4277
4278/* Given a vector variable X and Y, that was generated for the scalar
4279 STMT, generate instructions to permute the vector elements of X and Y
4280 using permutation mask MASK_VEC, insert them at *GSI and return the
4281 permuted vector variable. */
a1e53f3f
L
4282
4283static tree
aec7ae7d
JJ
4284permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4285 gimple_stmt_iterator *gsi)
a1e53f3f
L
4286{
4287 tree vectype = TREE_TYPE (x);
aec7ae7d 4288 tree perm_dest, data_ref;
a1e53f3f
L
4289 gimple perm_stmt;
4290
a1e53f3f 4291 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 4292 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
4293
4294 /* Generate the permute statement. */
73804b12
RG
4295 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4296 x, y, mask_vec);
a1e53f3f
L
4297 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4298
4299 return data_ref;
4300}
4301
ebfd146a
IR
4302/* vectorizable_load.
4303
b8698a0f
L
4304 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4305 can be vectorized.
4306 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4307 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4308 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4309
4310static bool
4311vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4312 slp_tree slp_node, slp_instance slp_node_instance)
4313{
4314 tree scalar_dest;
4315 tree vec_dest = NULL;
4316 tree data_ref = NULL;
4317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 4318 stmt_vec_info prev_stmt_info;
ebfd146a 4319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4320 struct loop *loop = NULL;
ebfd146a 4321 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 4322 bool nested_in_vect_loop = false;
ebfd146a
IR
4323 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4324 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4325 tree elem_type;
ebfd146a 4326 tree new_temp;
947131ba 4327 enum machine_mode mode;
ebfd146a
IR
4328 gimple new_stmt = NULL;
4329 tree dummy;
4330 enum dr_alignment_support alignment_support_scheme;
4331 tree dataref_ptr = NULL_TREE;
4332 gimple ptr_incr;
4333 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4334 int ncopies;
4335 int i, j, group_size;
4336 tree msq = NULL_TREE, lsq;
4337 tree offset = NULL_TREE;
4338 tree realignment_token = NULL_TREE;
4339 gimple phi = NULL;
9771b263 4340 vec<tree> dr_chain = vec<tree>();
0d0293ac 4341 bool grouped_load = false;
272c6793 4342 bool load_lanes_p = false;
ebfd146a 4343 gimple first_stmt;
ebfd146a 4344 bool inv_p;
319e6439 4345 bool negative = false;
ebfd146a
IR
4346 bool compute_in_loop = false;
4347 struct loop *at_loop;
4348 int vec_num;
4349 bool slp = (slp_node != NULL);
4350 bool slp_perm = false;
4351 enum tree_code code;
a70d6342
IR
4352 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4353 int vf;
272c6793 4354 tree aggr_type;
aec7ae7d
JJ
4355 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4356 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
7d75abc8 4357 tree stride_base, stride_step;
aec7ae7d
JJ
4358 int gather_scale = 1;
4359 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
4360
4361 if (loop_vinfo)
4362 {
4363 loop = LOOP_VINFO_LOOP (loop_vinfo);
4364 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4365 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4366 }
4367 else
3533e503 4368 vf = 1;
ebfd146a
IR
4369
4370 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4371 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 4372 case of SLP. */
437f4a00 4373 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4374 ncopies = 1;
4375 else
4376 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4377
4378 gcc_assert (ncopies >= 1);
4379
4380 /* FORNOW. This restriction should be relaxed. */
4381 if (nested_in_vect_loop && ncopies > 1)
4382 {
73fbfcad 4383 if (dump_enabled_p ())
78c60e3d
SS
4384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4385 "multiple types in nested loop.");
ebfd146a
IR
4386 return false;
4387 }
4388
a70d6342 4389 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4390 return false;
4391
8644a673 4392 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4393 return false;
4394
4395 /* Is vectorizable load? */
4396 if (!is_gimple_assign (stmt))
4397 return false;
4398
4399 scalar_dest = gimple_assign_lhs (stmt);
4400 if (TREE_CODE (scalar_dest) != SSA_NAME)
4401 return false;
4402
4403 code = gimple_assign_rhs_code (stmt);
4404 if (code != ARRAY_REF
4405 && code != INDIRECT_REF
e9dbe7bb
IR
4406 && code != COMPONENT_REF
4407 && code != IMAGPART_EXPR
70f34814 4408 && code != REALPART_EXPR
42373e0b
RG
4409 && code != MEM_REF
4410 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
4411 return false;
4412
4413 if (!STMT_VINFO_DATA_REF (stmt_info))
4414 return false;
4415
7b7b1813 4416 elem_type = TREE_TYPE (vectype);
947131ba 4417 mode = TYPE_MODE (vectype);
ebfd146a
IR
4418
4419 /* FORNOW. In some cases can vectorize even if data-type not supported
4420 (e.g. - data copies). */
947131ba 4421 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 4422 {
73fbfcad 4423 if (dump_enabled_p ())
78c60e3d
SS
4424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4425 "Aligned load, but unsupported type.");
ebfd146a
IR
4426 return false;
4427 }
4428
ebfd146a 4429 /* Check if the load is a part of an interleaving chain. */
0d0293ac 4430 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 4431 {
0d0293ac 4432 grouped_load = true;
ebfd146a 4433 /* FORNOW */
aec7ae7d 4434 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 4435
e14c1050 4436 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
4437 if (!slp && !PURE_SLP_STMT (stmt_info))
4438 {
e14c1050 4439 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
4440 if (vect_load_lanes_supported (vectype, group_size))
4441 load_lanes_p = true;
0d0293ac 4442 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
4443 return false;
4444 }
ebfd146a
IR
4445 }
4446
a1e53f3f 4447
aec7ae7d
JJ
4448 if (STMT_VINFO_GATHER_P (stmt_info))
4449 {
4450 gimple def_stmt;
4451 tree def;
4452 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4453 &gather_off, &gather_scale);
4454 gcc_assert (gather_decl);
24ee1384 4455 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
4456 &def_stmt, &def, &gather_dt,
4457 &gather_off_vectype))
4458 {
73fbfcad 4459 if (dump_enabled_p ())
78c60e3d
SS
4460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4461 "gather index use not simple.");
aec7ae7d
JJ
4462 return false;
4463 }
4464 }
7d75abc8
MM
4465 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4466 {
319e6439
RG
4467 if (!vect_check_strided_load (stmt, loop_vinfo,
4468 &stride_base, &stride_step))
4469 return false;
4470 }
4471 else
4472 {
4473 negative = tree_int_cst_compare (nested_in_vect_loop
4474 ? STMT_VINFO_DR_STEP (stmt_info)
4475 : DR_STEP (dr),
4476 size_zero_node) < 0;
4477 if (negative && ncopies > 1)
4478 {
73fbfcad 4479 if (dump_enabled_p ())
78c60e3d
SS
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "multiple types with negative step.");
319e6439
RG
4482 return false;
4483 }
4484
4485 if (negative)
4486 {
4487 gcc_assert (!grouped_load);
4488 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4489 if (alignment_support_scheme != dr_aligned
4490 && alignment_support_scheme != dr_unaligned_supported)
4491 {
73fbfcad 4492 if (dump_enabled_p ())
78c60e3d
SS
4493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4494 "negative step but alignment required.");
319e6439
RG
4495 return false;
4496 }
4497 if (!perm_mask_for_reverse (vectype))
4498 {
73fbfcad 4499 if (dump_enabled_p ())
78c60e3d
SS
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4501 "negative step and reversing not supported.");
319e6439
RG
4502 return false;
4503 }
4504 }
7d75abc8 4505 }
aec7ae7d 4506
ebfd146a
IR
4507 if (!vec_stmt) /* transformation not required. */
4508 {
4509 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 4510 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
4511 return true;
4512 }
4513
73fbfcad 4514 if (dump_enabled_p ())
78c60e3d
SS
4515 dump_printf_loc (MSG_NOTE, vect_location,
4516 "transform load. ncopies = %d", ncopies);
ebfd146a
IR
4517
4518 /** Transform. **/
4519
aec7ae7d
JJ
4520 if (STMT_VINFO_GATHER_P (stmt_info))
4521 {
4522 tree vec_oprnd0 = NULL_TREE, op;
4523 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4524 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4525 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4526 edge pe = loop_preheader_edge (loop);
4527 gimple_seq seq;
4528 basic_block new_bb;
4529 enum { NARROW, NONE, WIDEN } modifier;
4530 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4531
4532 if (nunits == gather_off_nunits)
4533 modifier = NONE;
4534 else if (nunits == gather_off_nunits / 2)
4535 {
4536 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4537 modifier = WIDEN;
4538
4539 for (i = 0; i < gather_off_nunits; ++i)
4540 sel[i] = i | nunits;
4541
3fcc1b55 4542 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
4543 gcc_assert (perm_mask != NULL_TREE);
4544 }
4545 else if (nunits == gather_off_nunits * 2)
4546 {
4547 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4548 modifier = NARROW;
4549
4550 for (i = 0; i < nunits; ++i)
4551 sel[i] = i < gather_off_nunits
4552 ? i : i + nunits - gather_off_nunits;
4553
3fcc1b55 4554 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
4555 gcc_assert (perm_mask != NULL_TREE);
4556 ncopies *= 2;
4557 }
4558 else
4559 gcc_unreachable ();
4560
4561 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4562 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4563 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4564 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4565 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4566 scaletype = TREE_VALUE (arglist);
4567 gcc_checking_assert (types_compatible_p (srctype, rettype)
4568 && types_compatible_p (srctype, masktype));
4569
4570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4571
4572 ptr = fold_convert (ptrtype, gather_base);
4573 if (!is_gimple_min_invariant (ptr))
4574 {
4575 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4576 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4577 gcc_assert (!new_bb);
4578 }
4579
4580 /* Currently we support only unconditional gather loads,
4581 so mask should be all ones. */
4582 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4583 mask = build_int_cst (TREE_TYPE (masktype), -1);
4584 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4585 {
4586 REAL_VALUE_TYPE r;
4587 long tmp[6];
4588 for (j = 0; j < 6; ++j)
4589 tmp[j] = -1;
4590 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4591 mask = build_real (TREE_TYPE (masktype), r);
4592 }
4593 else
4594 gcc_unreachable ();
4595 mask = build_vector_from_val (masktype, mask);
4596 mask = vect_init_vector (stmt, mask, masktype, NULL);
4597
4598 scale = build_int_cst (scaletype, gather_scale);
4599
4600 prev_stmt_info = NULL;
4601 for (j = 0; j < ncopies; ++j)
4602 {
4603 if (modifier == WIDEN && (j & 1))
4604 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4605 perm_mask, stmt, gsi);
4606 else if (j == 0)
4607 op = vec_oprnd0
4608 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4609 else
4610 op = vec_oprnd0
4611 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4612
4613 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4614 {
4615 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4616 == TYPE_VECTOR_SUBPARTS (idxtype));
4617 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
4618 var = make_ssa_name (var, NULL);
4619 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4620 new_stmt
4621 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4622 op, NULL_TREE);
4623 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4624 op = var;
4625 }
4626
4627 new_stmt
4628 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4629
4630 if (!useless_type_conversion_p (vectype, rettype))
4631 {
4632 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4633 == TYPE_VECTOR_SUBPARTS (rettype));
4634 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
4635 op = make_ssa_name (var, new_stmt);
4636 gimple_call_set_lhs (new_stmt, op);
4637 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4638 var = make_ssa_name (vec_dest, NULL);
4639 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4640 new_stmt
4641 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4642 NULL_TREE);
4643 }
4644 else
4645 {
4646 var = make_ssa_name (vec_dest, new_stmt);
4647 gimple_call_set_lhs (new_stmt, var);
4648 }
4649
4650 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4651
4652 if (modifier == NARROW)
4653 {
4654 if ((j & 1) == 0)
4655 {
4656 prev_res = var;
4657 continue;
4658 }
4659 var = permute_vec_elements (prev_res, var,
4660 perm_mask, stmt, gsi);
4661 new_stmt = SSA_NAME_DEF_STMT (var);
4662 }
4663
4664 if (prev_stmt_info == NULL)
4665 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4666 else
4667 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4668 prev_stmt_info = vinfo_for_stmt (new_stmt);
4669 }
4670 return true;
4671 }
7d75abc8
MM
4672 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4673 {
4674 gimple_stmt_iterator incr_gsi;
4675 bool insert_after;
4676 gimple incr;
4677 tree offvar;
4678 tree ref = DR_REF (dr);
4679 tree ivstep;
4680 tree running_off;
9771b263 4681 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8
MM
4682 gimple_seq stmts = NULL;
4683
4684 gcc_assert (stride_base && stride_step);
4685
4686 /* For a load with loop-invariant (but other than power-of-2)
4687 stride (i.e. not a grouped access) like so:
4688
4689 for (i = 0; i < n; i += stride)
4690 ... = array[i];
4691
4692 we generate a new induction variable and new accesses to
4693 form a new vector (or vectors, depending on ncopies):
4694
4695 for (j = 0; ; j += VF*stride)
4696 tmp1 = array[j];
4697 tmp2 = array[j + stride];
4698 ...
4699 vectemp = {tmp1, tmp2, ...}
4700 */
4701
4702 ivstep = stride_step;
4703 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4704 build_int_cst (TREE_TYPE (ivstep), vf));
4705
4706 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4707
4708 create_iv (stride_base, ivstep, NULL,
4709 loop, &incr_gsi, insert_after,
4710 &offvar, NULL);
4711 incr = gsi_stmt (incr_gsi);
4712 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4713
4714 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4715 if (stmts)
4716 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4717
4718 prev_stmt_info = NULL;
4719 running_off = offvar;
4720 for (j = 0; j < ncopies; j++)
4721 {
4722 tree vec_inv;
4723
9771b263 4724 vec_alloc (v, nunits);
7d75abc8
MM
4725 for (i = 0; i < nunits; i++)
4726 {
4727 tree newref, newoff;
4728 gimple incr;
4729 if (TREE_CODE (ref) == ARRAY_REF)
cded4e9e
JJ
4730 {
4731 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4732 unshare_expr (TREE_OPERAND (ref, 0)),
4733 running_off,
4734 NULL_TREE, NULL_TREE);
4735 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4736 TREE_TYPE (newref)))
4737 newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
4738 newref);
4739 }
7d75abc8 4740 else
cded4e9e 4741 newref = build2 (MEM_REF, TREE_TYPE (vectype),
7d75abc8
MM
4742 running_off,
4743 TREE_OPERAND (ref, 1));
4744
4745 newref = force_gimple_operand_gsi (gsi, newref, true,
4746 NULL_TREE, true,
4747 GSI_SAME_STMT);
4748 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 4749 newoff = copy_ssa_name (running_off, NULL);
7d75abc8
MM
4750 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4751 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4752 running_off, stride_step);
4753 else
4754 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4755 running_off, stride_step);
7d75abc8
MM
4756 vect_finish_stmt_generation (stmt, incr, gsi);
4757
4758 running_off = newoff;
4759 }
4760
4761 vec_inv = build_constructor (vectype, v);
4762 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4763 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
4764
4765 if (j == 0)
4766 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4767 else
4768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4769 prev_stmt_info = vinfo_for_stmt (new_stmt);
4770 }
4771 return true;
4772 }
aec7ae7d 4773
0d0293ac 4774 if (grouped_load)
ebfd146a 4775 {
e14c1050 4776 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 4777 if (slp
9771b263
DN
4778 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4779 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4780 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 4781
ebfd146a
IR
4782 /* Check if the chain of loads is already vectorized. */
4783 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4784 {
4785 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4786 return true;
4787 }
4788 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 4789 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
4790
4791 /* VEC_NUM is the number of vect stmts to be created for this group. */
4792 if (slp)
4793 {
0d0293ac 4794 grouped_load = false;
ebfd146a 4795 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 4796 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
a70d6342
IR
4797 slp_perm = true;
4798 }
ebfd146a
IR
4799 else
4800 vec_num = group_size;
ebfd146a
IR
4801 }
4802 else
4803 {
4804 first_stmt = stmt;
4805 first_dr = dr;
4806 group_size = vec_num = 1;
4807 }
4808
720f5239 4809 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 4810 gcc_assert (alignment_support_scheme);
272c6793
RS
4811 /* Targets with load-lane instructions must not require explicit
4812 realignment. */
4813 gcc_assert (!load_lanes_p
4814 || alignment_support_scheme == dr_aligned
4815 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
4816
4817 /* In case the vectorization factor (VF) is bigger than the number
4818 of elements that we can fit in a vectype (nunits), we have to generate
4819 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 4820 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 4821 from one copy of the vector stmt to the next, in the field
ff802fa1 4822 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 4823 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
4824 stmts that use the defs of the current stmt. The example below
4825 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4826 need to create 4 vectorized stmts):
ebfd146a
IR
4827
4828 before vectorization:
4829 RELATED_STMT VEC_STMT
4830 S1: x = memref - -
4831 S2: z = x + 1 - -
4832
4833 step 1: vectorize stmt S1:
4834 We first create the vector stmt VS1_0, and, as usual, record a
4835 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4836 Next, we create the vector stmt VS1_1, and record a pointer to
4837 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 4838 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
4839 stmts and pointers:
4840 RELATED_STMT VEC_STMT
4841 VS1_0: vx0 = memref0 VS1_1 -
4842 VS1_1: vx1 = memref1 VS1_2 -
4843 VS1_2: vx2 = memref2 VS1_3 -
4844 VS1_3: vx3 = memref3 - -
4845 S1: x = load - VS1_0
4846 S2: z = x + 1 - -
4847
b8698a0f
L
4848 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4849 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4850 stmt S2. */
4851
0d0293ac 4852 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
4853
4854 S1: x2 = &base + 2
4855 S2: x0 = &base
4856 S3: x1 = &base + 1
4857 S4: x3 = &base + 3
4858
b8698a0f 4859 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4860 starting from the access of the first stmt of the chain:
4861
4862 VS1: vx0 = &base
4863 VS2: vx1 = &base + vec_size*1
4864 VS3: vx3 = &base + vec_size*2
4865 VS4: vx4 = &base + vec_size*3
4866
4867 Then permutation statements are generated:
4868
e2c83630
RH
4869 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4870 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
4871 ...
4872
4873 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4874 (the order of the data-refs in the output of vect_permute_load_chain
4875 corresponds to the order of scalar stmts in the interleaving chain - see
4876 the documentation of vect_permute_load_chain()).
4877 The generation of permutation stmts and recording them in
0d0293ac 4878 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 4879
b8698a0f 4880 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4881 permutation stmts above are created for every copy. The result vector
4882 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4883 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4884
4885 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4886 on a target that supports unaligned accesses (dr_unaligned_supported)
4887 we generate the following code:
4888 p = initial_addr;
4889 indx = 0;
4890 loop {
4891 p = p + indx * vectype_size;
4892 vec_dest = *(p);
4893 indx = indx + 1;
4894 }
4895
4896 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4897 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4898 then generate the following code, in which the data in each iteration is
4899 obtained by two vector loads, one from the previous iteration, and one
4900 from the current iteration:
4901 p1 = initial_addr;
4902 msq_init = *(floor(p1))
4903 p2 = initial_addr + VS - 1;
4904 realignment_token = call target_builtin;
4905 indx = 0;
4906 loop {
4907 p2 = p2 + indx * vectype_size
4908 lsq = *(floor(p2))
4909 vec_dest = realign_load (msq, lsq, realignment_token)
4910 indx = indx + 1;
4911 msq = lsq;
4912 } */
4913
4914 /* If the misalignment remains the same throughout the execution of the
4915 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4916 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4917 This can only occur when vectorizing memory accesses in the inner-loop
4918 nested within an outer-loop that is being vectorized. */
4919
d1e4b493 4920 if (nested_in_vect_loop
211bea38 4921 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
4922 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4923 {
4924 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4925 compute_in_loop = true;
4926 }
4927
4928 if ((alignment_support_scheme == dr_explicit_realign_optimized
4929 || alignment_support_scheme == dr_explicit_realign)
4930 && !compute_in_loop)
4931 {
4932 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4933 alignment_support_scheme, NULL_TREE,
4934 &at_loop);
4935 if (alignment_support_scheme == dr_explicit_realign_optimized)
4936 {
4937 phi = SSA_NAME_DEF_STMT (msq);
4938 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4939 }
4940 }
4941 else
4942 at_loop = loop;
4943
a1e53f3f
L
4944 if (negative)
4945 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4946
272c6793
RS
4947 if (load_lanes_p)
4948 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4949 else
4950 aggr_type = vectype;
4951
ebfd146a
IR
4952 prev_stmt_info = NULL;
4953 for (j = 0; j < ncopies; j++)
b8698a0f 4954 {
272c6793 4955 /* 1. Create the vector or array pointer update chain. */
ebfd146a 4956 if (j == 0)
272c6793 4957 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
920e8172
RS
4958 offset, &dummy, gsi,
4959 &ptr_incr, false, &inv_p);
ebfd146a 4960 else
272c6793
RS
4961 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4962 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 4963
0d0293ac 4964 if (grouped_load || slp_perm)
9771b263 4965 dr_chain.create (vec_num);
5ce1ee7f 4966
272c6793 4967 if (load_lanes_p)
ebfd146a 4968 {
272c6793
RS
4969 tree vec_array;
4970
4971 vec_array = create_vector_array (vectype, vec_num);
4972
4973 /* Emit:
4974 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4975 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4976 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4977 gimple_call_set_lhs (new_stmt, vec_array);
4978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 4979
272c6793
RS
4980 /* Extract each vector into an SSA_NAME. */
4981 for (i = 0; i < vec_num; i++)
ebfd146a 4982 {
272c6793
RS
4983 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4984 vec_array, i);
9771b263 4985 dr_chain.quick_push (new_temp);
272c6793
RS
4986 }
4987
4988 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 4989 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
4990 }
4991 else
4992 {
4993 for (i = 0; i < vec_num; i++)
4994 {
4995 if (i > 0)
4996 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4997 stmt, NULL_TREE);
4998
4999 /* 2. Create the vector-load in the loop. */
5000 switch (alignment_support_scheme)
5001 {
5002 case dr_aligned:
5003 case dr_unaligned_supported:
be1ac4ec 5004 {
644ffefd
MJ
5005 unsigned int align, misalign;
5006
272c6793
RS
5007 data_ref
5008 = build2 (MEM_REF, vectype, dataref_ptr,
5009 build_int_cst (reference_alias_ptr_type
5010 (DR_REF (first_dr)), 0));
644ffefd 5011 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
5012 if (alignment_support_scheme == dr_aligned)
5013 {
5014 gcc_assert (aligned_access_p (first_dr));
644ffefd 5015 misalign = 0;
272c6793
RS
5016 }
5017 else if (DR_MISALIGNMENT (first_dr) == -1)
5018 {
5019 TREE_TYPE (data_ref)
5020 = build_aligned_type (TREE_TYPE (data_ref),
5021 TYPE_ALIGN (elem_type));
644ffefd
MJ
5022 align = TYPE_ALIGN_UNIT (elem_type);
5023 misalign = 0;
272c6793
RS
5024 }
5025 else
5026 {
5027 TREE_TYPE (data_ref)
5028 = build_aligned_type (TREE_TYPE (data_ref),
5029 TYPE_ALIGN (elem_type));
644ffefd 5030 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5031 }
644ffefd
MJ
5032 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5033 align, misalign);
272c6793 5034 break;
be1ac4ec 5035 }
272c6793 5036 case dr_explicit_realign:
267d3070 5037 {
272c6793
RS
5038 tree ptr, bump;
5039 tree vs_minus_1;
5040
5041 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5042
5043 if (compute_in_loop)
5044 msq = vect_setup_realignment (first_stmt, gsi,
5045 &realignment_token,
5046 dr_explicit_realign,
5047 dataref_ptr, NULL);
5048
070ecdfd 5049 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 5050 new_stmt = gimple_build_assign_with_ops
070ecdfd 5051 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
5052 build_int_cst
5053 (TREE_TYPE (dataref_ptr),
5054 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5056 data_ref
5057 = build2 (MEM_REF, vectype, ptr,
5058 build_int_cst (reference_alias_ptr_type
5059 (DR_REF (first_dr)), 0));
5060 vec_dest = vect_create_destination_var (scalar_dest,
5061 vectype);
5062 new_stmt = gimple_build_assign (vec_dest, data_ref);
5063 new_temp = make_ssa_name (vec_dest, new_stmt);
5064 gimple_assign_set_lhs (new_stmt, new_temp);
5065 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5066 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5068 msq = new_temp;
5069
5070 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 5071 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
5072 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5073 new_stmt = gimple_build_assign_with_ops
5074 (BIT_AND_EXPR, NULL_TREE, ptr,
5075 build_int_cst
5076 (TREE_TYPE (ptr),
5077 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 5078 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
5079 gimple_assign_set_lhs (new_stmt, ptr);
5080 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5081 data_ref
5082 = build2 (MEM_REF, vectype, ptr,
5083 build_int_cst (reference_alias_ptr_type
5084 (DR_REF (first_dr)), 0));
5085 break;
267d3070 5086 }
272c6793 5087 case dr_explicit_realign_optimized:
070ecdfd 5088 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 5089 new_stmt = gimple_build_assign_with_ops
070ecdfd 5090 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
5091 build_int_cst
5092 (TREE_TYPE (dataref_ptr),
5093 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
5094 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5095 data_ref
5096 = build2 (MEM_REF, vectype, new_temp,
5097 build_int_cst (reference_alias_ptr_type
5098 (DR_REF (first_dr)), 0));
5099 break;
5100 default:
5101 gcc_unreachable ();
5102 }
ebfd146a 5103 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 5104 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
5105 new_temp = make_ssa_name (vec_dest, new_stmt);
5106 gimple_assign_set_lhs (new_stmt, new_temp);
5107 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5108
272c6793
RS
5109 /* 3. Handle explicit realignment if necessary/supported.
5110 Create in loop:
5111 vec_dest = realign_load (msq, lsq, realignment_token) */
5112 if (alignment_support_scheme == dr_explicit_realign_optimized
5113 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 5114 {
272c6793
RS
5115 lsq = gimple_assign_lhs (new_stmt);
5116 if (!realignment_token)
5117 realignment_token = dataref_ptr;
5118 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5119 new_stmt
73804b12
RG
5120 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5121 vec_dest, msq, lsq,
5122 realignment_token);
272c6793
RS
5123 new_temp = make_ssa_name (vec_dest, new_stmt);
5124 gimple_assign_set_lhs (new_stmt, new_temp);
5125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5126
5127 if (alignment_support_scheme == dr_explicit_realign_optimized)
5128 {
5129 gcc_assert (phi);
5130 if (i == vec_num - 1 && j == ncopies - 1)
5131 add_phi_arg (phi, lsq,
5132 loop_latch_edge (containing_loop),
9e227d60 5133 UNKNOWN_LOCATION);
272c6793
RS
5134 msq = lsq;
5135 }
ebfd146a 5136 }
ebfd146a 5137
272c6793
RS
5138 /* 4. Handle invariant-load. */
5139 if (inv_p && !bb_vinfo)
ebfd146a 5140 {
ab70d825 5141 gimple_stmt_iterator gsi2 = *gsi;
0d0293ac 5142 gcc_assert (!grouped_load);
ab70d825 5143 gsi_next (&gsi2);
418b7df3 5144 new_temp = vect_init_vector (stmt, scalar_dest,
ab70d825
RG
5145 vectype, &gsi2);
5146 new_stmt = SSA_NAME_DEF_STMT (new_temp);
272c6793 5147 }
ebfd146a 5148
272c6793
RS
5149 if (negative)
5150 {
aec7ae7d
JJ
5151 tree perm_mask = perm_mask_for_reverse (vectype);
5152 new_temp = permute_vec_elements (new_temp, new_temp,
5153 perm_mask, stmt, gsi);
ebfd146a
IR
5154 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5155 }
267d3070 5156
272c6793 5157 /* Collect vector loads and later create their permutation in
0d0293ac
MM
5158 vect_transform_grouped_load (). */
5159 if (grouped_load || slp_perm)
9771b263 5160 dr_chain.quick_push (new_temp);
267d3070 5161
272c6793
RS
5162 /* Store vector loads in the corresponding SLP_NODE. */
5163 if (slp && !slp_perm)
9771b263 5164 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 5165 }
ebfd146a
IR
5166 }
5167
5168 if (slp && !slp_perm)
5169 continue;
5170
5171 if (slp_perm)
5172 {
a70d6342 5173 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
5174 slp_node_instance, false))
5175 {
9771b263 5176 dr_chain.release ();
ebfd146a
IR
5177 return false;
5178 }
5179 }
5180 else
5181 {
0d0293ac 5182 if (grouped_load)
ebfd146a 5183 {
272c6793 5184 if (!load_lanes_p)
0d0293ac 5185 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 5186 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5187 }
5188 else
5189 {
5190 if (j == 0)
5191 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5192 else
5193 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5194 prev_stmt_info = vinfo_for_stmt (new_stmt);
5195 }
5196 }
9771b263 5197 dr_chain.release ();
ebfd146a
IR
5198 }
5199
ebfd146a
IR
5200 return true;
5201}
5202
5203/* Function vect_is_simple_cond.
b8698a0f 5204
ebfd146a
IR
5205 Input:
5206 LOOP - the loop that is being vectorized.
5207 COND - Condition that is checked for simple use.
5208
e9e1d143
RG
5209 Output:
5210 *COMP_VECTYPE - the vector type for the comparison.
5211
ebfd146a
IR
5212 Returns whether a COND can be vectorized. Checks whether
5213 condition operands are supportable using vec_is_simple_use. */
5214
87aab9b2 5215static bool
24ee1384
IR
5216vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5217 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
5218{
5219 tree lhs, rhs;
5220 tree def;
5221 enum vect_def_type dt;
e9e1d143 5222 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
5223
5224 if (!COMPARISON_CLASS_P (cond))
5225 return false;
5226
5227 lhs = TREE_OPERAND (cond, 0);
5228 rhs = TREE_OPERAND (cond, 1);
5229
5230 if (TREE_CODE (lhs) == SSA_NAME)
5231 {
5232 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
5233 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5234 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
5235 return false;
5236 }
5237 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5238 && TREE_CODE (lhs) != FIXED_CST)
5239 return false;
5240
5241 if (TREE_CODE (rhs) == SSA_NAME)
5242 {
5243 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
5244 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5245 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
5246 return false;
5247 }
f7e531cf 5248 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
5249 && TREE_CODE (rhs) != FIXED_CST)
5250 return false;
5251
e9e1d143 5252 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
5253 return true;
5254}
5255
5256/* vectorizable_condition.
5257
b8698a0f
L
5258 Check if STMT is conditional modify expression that can be vectorized.
5259 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5260 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
5261 at GSI.
5262
5263 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5264 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5265 else caluse if it is 2).
ebfd146a
IR
5266
5267 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5268
4bbe8262 5269bool
ebfd146a 5270vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
5271 gimple *vec_stmt, tree reduc_def, int reduc_index,
5272 slp_tree slp_node)
ebfd146a
IR
5273{
5274 tree scalar_dest = NULL_TREE;
5275 tree vec_dest = NULL_TREE;
ebfd146a
IR
5276 tree cond_expr, then_clause, else_clause;
5277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5278 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 5279 tree comp_vectype = NULL_TREE;
ff802fa1
IR
5280 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5281 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
5282 tree vec_compare, vec_cond_expr;
5283 tree new_temp;
5284 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 5285 tree def;
a855b1b1 5286 enum vect_def_type dt, dts[4];
ebfd146a 5287 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 5288 int ncopies;
ebfd146a 5289 enum tree_code code;
a855b1b1 5290 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
5291 int i, j;
5292 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9771b263
DN
5293 vec<tree> vec_oprnds0 = vec<tree>();
5294 vec<tree> vec_oprnds1 = vec<tree>();
5295 vec<tree> vec_oprnds2 = vec<tree>();
5296 vec<tree> vec_oprnds3 = vec<tree>();
784fb9b3 5297 tree vec_cmp_type = vectype;
b8698a0f 5298
f7e531cf
IR
5299 if (slp_node || PURE_SLP_STMT (stmt_info))
5300 ncopies = 1;
5301 else
5302 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 5303
ebfd146a 5304 gcc_assert (ncopies >= 1);
a855b1b1 5305 if (reduc_index && ncopies > 1)
ebfd146a
IR
5306 return false; /* FORNOW */
5307
f7e531cf
IR
5308 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5309 return false;
5310
5311 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5312 return false;
5313
4bbe8262
IR
5314 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5315 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5316 && reduc_def))
ebfd146a
IR
5317 return false;
5318
ebfd146a 5319 /* FORNOW: not yet supported. */
b8698a0f 5320 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5321 {
73fbfcad 5322 if (dump_enabled_p ())
78c60e3d
SS
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "value used after loop.");
ebfd146a
IR
5325 return false;
5326 }
5327
5328 /* Is vectorizable conditional operation? */
5329 if (!is_gimple_assign (stmt))
5330 return false;
5331
5332 code = gimple_assign_rhs_code (stmt);
5333
5334 if (code != COND_EXPR)
5335 return false;
5336
4e71066d
RG
5337 cond_expr = gimple_assign_rhs1 (stmt);
5338 then_clause = gimple_assign_rhs2 (stmt);
5339 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 5340
24ee1384
IR
5341 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5342 &comp_vectype)
e9e1d143 5343 || !comp_vectype)
ebfd146a
IR
5344 return false;
5345
5346 if (TREE_CODE (then_clause) == SSA_NAME)
5347 {
5348 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 5349 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5350 &then_def_stmt, &def, &dt))
5351 return false;
5352 }
b8698a0f 5353 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
5354 && TREE_CODE (then_clause) != REAL_CST
5355 && TREE_CODE (then_clause) != FIXED_CST)
5356 return false;
5357
5358 if (TREE_CODE (else_clause) == SSA_NAME)
5359 {
5360 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 5361 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
5362 &else_def_stmt, &def, &dt))
5363 return false;
5364 }
b8698a0f 5365 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
5366 && TREE_CODE (else_clause) != REAL_CST
5367 && TREE_CODE (else_clause) != FIXED_CST)
5368 return false;
5369
784fb9b3
JJ
5370 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5371 {
5372 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5373 tree cmp_type = build_nonstandard_integer_type (prec, 1);
5374 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5375 if (vec_cmp_type == NULL_TREE)
5376 return false;
5377 }
5378
b8698a0f 5379 if (!vec_stmt)
ebfd146a
IR
5380 {
5381 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 5382 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
5383 }
5384
f7e531cf
IR
5385 /* Transform. */
5386
5387 if (!slp_node)
5388 {
9771b263
DN
5389 vec_oprnds0.create (1);
5390 vec_oprnds1.create (1);
5391 vec_oprnds2.create (1);
5392 vec_oprnds3.create (1);
f7e531cf 5393 }
ebfd146a
IR
5394
5395 /* Handle def. */
5396 scalar_dest = gimple_assign_lhs (stmt);
5397 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5398
5399 /* Handle cond expr. */
a855b1b1
MM
5400 for (j = 0; j < ncopies; j++)
5401 {
f7e531cf 5402 gimple new_stmt = NULL;
a855b1b1
MM
5403 if (j == 0)
5404 {
f7e531cf
IR
5405 if (slp_node)
5406 {
9771b263
DN
5407 vec<tree> ops;
5408 ops.create (4);
5409 vec<slp_void_p> vec_defs;
5410
5411 vec_defs.create (4);
5412 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5413 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5414 ops.safe_push (then_clause);
5415 ops.safe_push (else_clause);
f7e531cf 5416 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
9771b263
DN
5417 vec_oprnds3 = *((vec<tree> *) vec_defs.pop ());
5418 vec_oprnds2 = *((vec<tree> *) vec_defs.pop ());
5419 vec_oprnds1 = *((vec<tree> *) vec_defs.pop ());
5420 vec_oprnds0 = *((vec<tree> *) vec_defs.pop ());
f7e531cf 5421
9771b263
DN
5422 ops.release ();
5423 vec_defs.release ();
f7e531cf
IR
5424 }
5425 else
5426 {
5427 gimple gtemp;
5428 vec_cond_lhs =
a855b1b1
MM
5429 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5430 stmt, NULL);
24ee1384
IR
5431 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5432 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
5433
5434 vec_cond_rhs =
5435 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5436 stmt, NULL);
24ee1384
IR
5437 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5438 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
5439 if (reduc_index == 1)
5440 vec_then_clause = reduc_def;
5441 else
5442 {
5443 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5444 stmt, NULL);
24ee1384 5445 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
5446 NULL, &gtemp, &def, &dts[2]);
5447 }
5448 if (reduc_index == 2)
5449 vec_else_clause = reduc_def;
5450 else
5451 {
5452 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 5453 stmt, NULL);
24ee1384 5454 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 5455 NULL, &gtemp, &def, &dts[3]);
f7e531cf 5456 }
a855b1b1
MM
5457 }
5458 }
5459 else
5460 {
f7e531cf 5461 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 5462 vec_oprnds0.pop ());
f7e531cf 5463 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 5464 vec_oprnds1.pop ());
a855b1b1 5465 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 5466 vec_oprnds2.pop ());
a855b1b1 5467 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 5468 vec_oprnds3.pop ());
f7e531cf
IR
5469 }
5470
5471 if (!slp_node)
5472 {
9771b263
DN
5473 vec_oprnds0.quick_push (vec_cond_lhs);
5474 vec_oprnds1.quick_push (vec_cond_rhs);
5475 vec_oprnds2.quick_push (vec_then_clause);
5476 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
5477 }
5478
9dc3f7de 5479 /* Arguments are ready. Create the new vector stmt. */
9771b263 5480 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 5481 {
9771b263
DN
5482 vec_cond_rhs = vec_oprnds1[i];
5483 vec_then_clause = vec_oprnds2[i];
5484 vec_else_clause = vec_oprnds3[i];
a855b1b1 5485
784fb9b3
JJ
5486 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5487 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
5488 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5489 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 5490
f7e531cf
IR
5491 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5492 new_temp = make_ssa_name (vec_dest, new_stmt);
5493 gimple_assign_set_lhs (new_stmt, new_temp);
5494 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5495 if (slp_node)
9771b263 5496 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
5497 }
5498
5499 if (slp_node)
5500 continue;
5501
5502 if (j == 0)
5503 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5504 else
5505 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5506
5507 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 5508 }
b8698a0f 5509
9771b263
DN
5510 vec_oprnds0.release ();
5511 vec_oprnds1.release ();
5512 vec_oprnds2.release ();
5513 vec_oprnds3.release ();
f7e531cf 5514
ebfd146a
IR
5515 return true;
5516}
5517
5518
8644a673 5519/* Make sure the statement is vectorizable. */
ebfd146a
IR
5520
5521bool
a70d6342 5522vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 5523{
8644a673 5524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 5525 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 5526 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 5527 bool ok;
a70d6342 5528 tree scalar_type, vectype;
363477c0
JJ
5529 gimple pattern_stmt;
5530 gimple_seq pattern_def_seq;
ebfd146a 5531
73fbfcad 5532 if (dump_enabled_p ())
ebfd146a 5533 {
78c60e3d
SS
5534 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5535 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 5536 }
ebfd146a 5537
1825a1f3 5538 if (gimple_has_volatile_ops (stmt))
b8698a0f 5539 {
73fbfcad 5540 if (dump_enabled_p ())
78c60e3d
SS
5541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5542 "not vectorized: stmt has volatile operands");
1825a1f3
IR
5543
5544 return false;
5545 }
b8698a0f
L
5546
5547 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
5548 to include:
5549 - the COND_EXPR which is the loop exit condition
5550 - any LABEL_EXPRs in the loop
b8698a0f 5551 - computations that are used only for array indexing or loop control.
8644a673 5552 In basic blocks we only analyze statements that are a part of some SLP
83197f37 5553 instance, therefore, all the statements are relevant.
ebfd146a 5554
d092494c 5555 Pattern statement needs to be analyzed instead of the original statement
83197f37 5556 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
5557 statements. In basic blocks we are called from some SLP instance
5558 traversal, don't analyze pattern stmts instead, the pattern stmts
5559 already will be part of SLP instance. */
83197f37
IR
5560
5561 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 5562 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 5563 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 5564 {
9d5e7640 5565 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 5566 && pattern_stmt
9d5e7640
IR
5567 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5568 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5569 {
83197f37 5570 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
5571 stmt = pattern_stmt;
5572 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 5573 if (dump_enabled_p ())
9d5e7640 5574 {
78c60e3d
SS
5575 dump_printf_loc (MSG_NOTE, vect_location,
5576 "==> examining pattern statement: ");
5577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
5578 }
5579 }
5580 else
5581 {
73fbfcad 5582 if (dump_enabled_p ())
78c60e3d 5583 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
ebfd146a 5584
9d5e7640
IR
5585 return true;
5586 }
8644a673 5587 }
83197f37 5588 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 5589 && node == NULL
83197f37
IR
5590 && pattern_stmt
5591 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5592 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5593 {
5594 /* Analyze PATTERN_STMT too. */
73fbfcad 5595 if (dump_enabled_p ())
83197f37 5596 {
78c60e3d
SS
5597 dump_printf_loc (MSG_NOTE, vect_location,
5598 "==> examining pattern statement: ");
5599 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
5600 }
5601
5602 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5603 return false;
5604 }
ebfd146a 5605
1107f3ae 5606 if (is_pattern_stmt_p (stmt_info)
079c527f 5607 && node == NULL
363477c0 5608 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 5609 {
363477c0 5610 gimple_stmt_iterator si;
1107f3ae 5611
363477c0
JJ
5612 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5613 {
5614 gimple pattern_def_stmt = gsi_stmt (si);
5615 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5616 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5617 {
5618 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 5619 if (dump_enabled_p ())
363477c0 5620 {
78c60e3d
SS
5621 dump_printf_loc (MSG_NOTE, vect_location,
5622 "==> examining pattern def statement: ");
5623 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 5624 }
1107f3ae 5625
363477c0
JJ
5626 if (!vect_analyze_stmt (pattern_def_stmt,
5627 need_to_vectorize, node))
5628 return false;
5629 }
5630 }
5631 }
1107f3ae 5632
8644a673
IR
5633 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5634 {
5635 case vect_internal_def:
5636 break;
ebfd146a 5637
8644a673 5638 case vect_reduction_def:
7c5222ff 5639 case vect_nested_cycle:
a70d6342 5640 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 5641 || relevance == vect_used_in_outer_by_reduction
a70d6342 5642 || relevance == vect_unused_in_scope));
8644a673
IR
5643 break;
5644
5645 case vect_induction_def:
5646 case vect_constant_def:
5647 case vect_external_def:
5648 case vect_unknown_def_type:
5649 default:
5650 gcc_unreachable ();
5651 }
ebfd146a 5652
a70d6342
IR
5653 if (bb_vinfo)
5654 {
5655 gcc_assert (PURE_SLP_STMT (stmt_info));
5656
b690cc0f 5657 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 5658 if (dump_enabled_p ())
a70d6342 5659 {
78c60e3d
SS
5660 dump_printf_loc (MSG_NOTE, vect_location,
5661 "get vectype for scalar type: ");
5662 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
a70d6342
IR
5663 }
5664
5665 vectype = get_vectype_for_scalar_type (scalar_type);
5666 if (!vectype)
5667 {
73fbfcad 5668 if (dump_enabled_p ())
a70d6342 5669 {
78c60e3d
SS
5670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5671 "not SLPed: unsupported data-type ");
5672 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5673 scalar_type);
a70d6342
IR
5674 }
5675 return false;
5676 }
5677
73fbfcad 5678 if (dump_enabled_p ())
a70d6342 5679 {
78c60e3d
SS
5680 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5681 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
a70d6342
IR
5682 }
5683
5684 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5685 }
5686
8644a673 5687 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 5688 {
8644a673
IR
5689 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5690 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5691 *need_to_vectorize = true;
ebfd146a
IR
5692 }
5693
8644a673 5694 ok = true;
b8698a0f 5695 if (!bb_vinfo
a70d6342
IR
5696 && (STMT_VINFO_RELEVANT_P (stmt_info)
5697 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4a00c761 5698 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 5699 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
5700 || vectorizable_operation (stmt, NULL, NULL, NULL)
5701 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5702 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 5703 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 5704 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 5705 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 5706 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
5707 else
5708 {
5709 if (bb_vinfo)
4a00c761
JJ
5710 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5711 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 5712 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
5713 || vectorizable_assignment (stmt, NULL, NULL, node)
5714 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 5715 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
5716 || vectorizable_store (stmt, NULL, NULL, node)
5717 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 5718 }
8644a673
IR
5719
5720 if (!ok)
ebfd146a 5721 {
73fbfcad 5722 if (dump_enabled_p ())
8644a673 5723 {
78c60e3d
SS
5724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5725 "not vectorized: relevant stmt not ");
5726 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5728 }
b8698a0f 5729
ebfd146a
IR
5730 return false;
5731 }
5732
a70d6342
IR
5733 if (bb_vinfo)
5734 return true;
5735
8644a673
IR
5736 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5737 need extra handling, except for vectorizable reductions. */
5738 if (STMT_VINFO_LIVE_P (stmt_info)
5739 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5740 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 5741
8644a673 5742 if (!ok)
ebfd146a 5743 {
73fbfcad 5744 if (dump_enabled_p ())
8644a673 5745 {
78c60e3d
SS
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747 "not vectorized: live stmt not ");
5748 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5749 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 5750 }
b8698a0f 5751
8644a673 5752 return false;
ebfd146a
IR
5753 }
5754
ebfd146a
IR
5755 return true;
5756}
5757
5758
5759/* Function vect_transform_stmt.
5760
5761 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5762
5763bool
5764vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 5765 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
5766 slp_instance slp_node_instance)
5767{
5768 bool is_store = false;
5769 gimple vec_stmt = NULL;
5770 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 5771 bool done;
ebfd146a
IR
5772
5773 switch (STMT_VINFO_TYPE (stmt_info))
5774 {
5775 case type_demotion_vec_info_type:
ebfd146a 5776 case type_promotion_vec_info_type:
ebfd146a
IR
5777 case type_conversion_vec_info_type:
5778 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5779 gcc_assert (done);
5780 break;
5781
5782 case induc_vec_info_type:
5783 gcc_assert (!slp_node);
5784 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5785 gcc_assert (done);
5786 break;
5787
9dc3f7de
IR
5788 case shift_vec_info_type:
5789 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5790 gcc_assert (done);
5791 break;
5792
ebfd146a
IR
5793 case op_vec_info_type:
5794 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5795 gcc_assert (done);
5796 break;
5797
5798 case assignment_vec_info_type:
5799 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5800 gcc_assert (done);
5801 break;
5802
5803 case load_vec_info_type:
b8698a0f 5804 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
5805 slp_node_instance);
5806 gcc_assert (done);
5807 break;
5808
5809 case store_vec_info_type:
5810 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5811 gcc_assert (done);
0d0293ac 5812 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
5813 {
5814 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 5815 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
5816 one are skipped, and there vec_stmt_info shouldn't be freed
5817 meanwhile. */
0d0293ac 5818 *grouped_store = true;
ebfd146a
IR
5819 if (STMT_VINFO_VEC_STMT (stmt_info))
5820 is_store = true;
5821 }
5822 else
5823 is_store = true;
5824 break;
5825
5826 case condition_vec_info_type:
f7e531cf 5827 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
5828 gcc_assert (done);
5829 break;
5830
5831 case call_vec_info_type:
190c2236 5832 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 5833 stmt = gsi_stmt (*gsi);
ebfd146a
IR
5834 break;
5835
5836 case reduc_vec_info_type:
b5aeb3bb 5837 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
5838 gcc_assert (done);
5839 break;
5840
5841 default:
5842 if (!STMT_VINFO_LIVE_P (stmt_info))
5843 {
73fbfcad 5844 if (dump_enabled_p ())
78c60e3d
SS
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "stmt not supported.");
ebfd146a
IR
5847 gcc_unreachable ();
5848 }
5849 }
5850
5851 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5852 is being vectorized, but outside the immediately enclosing loop. */
5853 if (vec_stmt
a70d6342
IR
5854 && STMT_VINFO_LOOP_VINFO (stmt_info)
5855 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5856 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
5857 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5858 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 5859 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 5860 vect_used_in_outer_by_reduction))
ebfd146a 5861 {
a70d6342
IR
5862 struct loop *innerloop = LOOP_VINFO_LOOP (
5863 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
5864 imm_use_iterator imm_iter;
5865 use_operand_p use_p;
5866 tree scalar_dest;
5867 gimple exit_phi;
5868
73fbfcad 5869 if (dump_enabled_p ())
78c60e3d
SS
5870 dump_printf_loc (MSG_NOTE, vect_location,
5871 "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
5872
5873 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5874 (to be used when vectorizing outer-loop stmts that use the DEF of
5875 STMT). */
5876 if (gimple_code (stmt) == GIMPLE_PHI)
5877 scalar_dest = PHI_RESULT (stmt);
5878 else
5879 scalar_dest = gimple_assign_lhs (stmt);
5880
5881 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5882 {
5883 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5884 {
5885 exit_phi = USE_STMT (use_p);
5886 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5887 }
5888 }
5889 }
5890
5891 /* Handle stmts whose DEF is used outside the loop-nest that is
5892 being vectorized. */
5893 if (STMT_VINFO_LIVE_P (stmt_info)
5894 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5895 {
5896 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5897 gcc_assert (done);
5898 }
5899
5900 if (vec_stmt)
83197f37 5901 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 5902
b8698a0f 5903 return is_store;
ebfd146a
IR
5904}
5905
5906
b8698a0f 5907/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
5908 stmt_vec_info. */
5909
5910void
5911vect_remove_stores (gimple first_stmt)
5912{
5913 gimple next = first_stmt;
5914 gimple tmp;
5915 gimple_stmt_iterator next_si;
5916
5917 while (next)
5918 {
78048b1c
JJ
5919 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5920
5921 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5922 if (is_pattern_stmt_p (stmt_info))
5923 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
5924 /* Free the attached stmt_vec_info and remove the stmt. */
5925 next_si = gsi_for_stmt (next);
3d3f2249 5926 unlink_stmt_vdef (next);
ebfd146a 5927 gsi_remove (&next_si, true);
3d3f2249 5928 release_defs (next);
ebfd146a
IR
5929 free_stmt_vec_info (next);
5930 next = tmp;
5931 }
5932}
5933
5934
5935/* Function new_stmt_vec_info.
5936
5937 Create and initialize a new stmt_vec_info struct for STMT. */
5938
5939stmt_vec_info
b8698a0f 5940new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 5941 bb_vec_info bb_vinfo)
ebfd146a
IR
5942{
5943 stmt_vec_info res;
5944 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5945
5946 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5947 STMT_VINFO_STMT (res) = stmt;
5948 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 5949 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 5950 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
5951 STMT_VINFO_LIVE_P (res) = false;
5952 STMT_VINFO_VECTYPE (res) = NULL;
5953 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 5954 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
5955 STMT_VINFO_IN_PATTERN_P (res) = false;
5956 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 5957 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
5958 STMT_VINFO_DATA_REF (res) = NULL;
5959
5960 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5961 STMT_VINFO_DR_OFFSET (res) = NULL;
5962 STMT_VINFO_DR_INIT (res) = NULL;
5963 STMT_VINFO_DR_STEP (res) = NULL;
5964 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5965
5966 if (gimple_code (stmt) == GIMPLE_PHI
5967 && is_loop_header_bb_p (gimple_bb (stmt)))
5968 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5969 else
8644a673
IR
5970 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5971
9771b263 5972 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 5973 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
5974 GROUP_FIRST_ELEMENT (res) = NULL;
5975 GROUP_NEXT_ELEMENT (res) = NULL;
5976 GROUP_SIZE (res) = 0;
5977 GROUP_STORE_COUNT (res) = 0;
5978 GROUP_GAP (res) = 0;
5979 GROUP_SAME_DR_STMT (res) = NULL;
5980 GROUP_READ_WRITE_DEPENDENCE (res) = false;
ebfd146a
IR
5981
5982 return res;
5983}
5984
5985
5986/* Create a hash table for stmt_vec_info. */
5987
5988void
5989init_stmt_vec_info_vec (void)
5990{
9771b263
DN
5991 gcc_assert (!stmt_vec_info_vec.exists ());
5992 stmt_vec_info_vec.create (50);
ebfd146a
IR
5993}
5994
5995
5996/* Free hash table for stmt_vec_info. */
5997
5998void
5999free_stmt_vec_info_vec (void)
6000{
9771b263
DN
6001 gcc_assert (stmt_vec_info_vec.exists ());
6002 stmt_vec_info_vec.release ();
ebfd146a
IR
6003}
6004
6005
6006/* Free stmt vectorization related info. */
6007
6008void
6009free_stmt_vec_info (gimple stmt)
6010{
6011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6012
6013 if (!stmt_info)
6014 return;
6015
78048b1c
JJ
6016 /* Check if this statement has a related "pattern stmt"
6017 (introduced by the vectorizer during the pattern recognition
6018 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6019 too. */
6020 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6021 {
6022 stmt_vec_info patt_info
6023 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6024 if (patt_info)
6025 {
363477c0
JJ
6026 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6027 if (seq)
6028 {
6029 gimple_stmt_iterator si;
6030 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6031 free_stmt_vec_info (gsi_stmt (si));
6032 }
78048b1c
JJ
6033 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6034 }
6035 }
6036
9771b263 6037 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
6038 set_vinfo_for_stmt (stmt, NULL);
6039 free (stmt_info);
6040}
6041
6042
bb67d9c7 6043/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 6044
bb67d9c7 6045 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
6046 by the target. */
6047
bb67d9c7
RG
6048static tree
6049get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
6050{
6051 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 6052 enum machine_mode simd_mode;
2f816591 6053 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
6054 int nunits;
6055 tree vectype;
6056
cc4b5170 6057 if (nbytes == 0)
ebfd146a
IR
6058 return NULL_TREE;
6059
48f2e373
RB
6060 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6061 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6062 return NULL_TREE;
6063
7b7b1813
RG
6064 /* For vector types of elements whose mode precision doesn't
6065 match their types precision we use a element type of mode
6066 precision. The vectorization routines will have to make sure
48f2e373
RB
6067 they support the proper result truncation/extension.
6068 We also make sure to build vector types with INTEGER_TYPE
6069 component type only. */
6d7971b8 6070 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
6071 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6072 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
6073 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6074 TYPE_UNSIGNED (scalar_type));
6d7971b8 6075
ccbf5bb4
RG
6076 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6077 When the component mode passes the above test simply use a type
6078 corresponding to that mode. The theory is that any use that
6079 would cause problems with this will disable vectorization anyway. */
dfc2e2ac
RB
6080 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6081 && !INTEGRAL_TYPE_P (scalar_type)
6082 && !POINTER_TYPE_P (scalar_type))
60b95d28
RB
6083 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6084
6085 /* We can't build a vector type of elements with alignment bigger than
6086 their size. */
dfc2e2ac 6087 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
ccbf5bb4
RG
6088 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6089
dfc2e2ac
RB
6090 /* If we felt back to using the mode fail if there was
6091 no scalar type for it. */
6092 if (scalar_type == NULL_TREE)
6093 return NULL_TREE;
6094
bb67d9c7
RG
6095 /* If no size was supplied use the mode the target prefers. Otherwise
6096 lookup a vector mode of the specified size. */
6097 if (size == 0)
6098 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6099 else
6100 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
6101 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6102 if (nunits <= 1)
6103 return NULL_TREE;
ebfd146a
IR
6104
6105 vectype = build_vector_type (scalar_type, nunits);
73fbfcad 6106 if (dump_enabled_p ())
ebfd146a 6107 {
78c60e3d
SS
6108 dump_printf_loc (MSG_NOTE, vect_location,
6109 "get vectype with %d units of type ", nunits);
6110 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
ebfd146a
IR
6111 }
6112
6113 if (!vectype)
6114 return NULL_TREE;
6115
73fbfcad 6116 if (dump_enabled_p ())
ebfd146a 6117 {
78c60e3d
SS
6118 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6119 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
ebfd146a
IR
6120 }
6121
6122 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6123 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6124 {
73fbfcad 6125 if (dump_enabled_p ())
78c60e3d
SS
6126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127 "mode not supported by target.");
ebfd146a
IR
6128 return NULL_TREE;
6129 }
6130
6131 return vectype;
6132}
6133
bb67d9c7
RG
6134unsigned int current_vector_size;
6135
6136/* Function get_vectype_for_scalar_type.
6137
6138 Returns the vector type corresponding to SCALAR_TYPE as supported
6139 by the target. */
6140
6141tree
6142get_vectype_for_scalar_type (tree scalar_type)
6143{
6144 tree vectype;
6145 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6146 current_vector_size);
6147 if (vectype
6148 && current_vector_size == 0)
6149 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6150 return vectype;
6151}
6152
b690cc0f
RG
6153/* Function get_same_sized_vectype
6154
6155 Returns a vector type corresponding to SCALAR_TYPE of size
6156 VECTOR_TYPE if supported by the target. */
6157
6158tree
bb67d9c7 6159get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 6160{
bb67d9c7
RG
6161 return get_vectype_for_scalar_type_and_size
6162 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
6163}
6164
ebfd146a
IR
6165/* Function vect_is_simple_use.
6166
6167 Input:
a70d6342
IR
6168 LOOP_VINFO - the vect info of the loop that is being vectorized.
6169 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 6170 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
6171 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6172
6173 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 6174 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 6175 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 6176 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
6177 is the case in reduction/induction computations).
6178 For basic blocks, supportable operands are constants and bb invariants.
6179 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
6180
6181bool
24ee1384 6182vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 6183 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 6184 tree *def, enum vect_def_type *dt)
b8698a0f 6185{
ebfd146a
IR
6186 basic_block bb;
6187 stmt_vec_info stmt_vinfo;
a70d6342 6188 struct loop *loop = NULL;
b8698a0f 6189
a70d6342
IR
6190 if (loop_vinfo)
6191 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
6192
6193 *def_stmt = NULL;
6194 *def = NULL_TREE;
b8698a0f 6195
73fbfcad 6196 if (dump_enabled_p ())
ebfd146a 6197 {
78c60e3d
SS
6198 dump_printf_loc (MSG_NOTE, vect_location,
6199 "vect_is_simple_use: operand ");
6200 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 6201 }
b8698a0f 6202
b758f602 6203 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
6204 {
6205 *dt = vect_constant_def;
6206 return true;
6207 }
b8698a0f 6208
ebfd146a
IR
6209 if (is_gimple_min_invariant (operand))
6210 {
6211 *def = operand;
8644a673 6212 *dt = vect_external_def;
ebfd146a
IR
6213 return true;
6214 }
6215
6216 if (TREE_CODE (operand) == PAREN_EXPR)
6217 {
73fbfcad 6218 if (dump_enabled_p ())
78c60e3d 6219 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
ebfd146a
IR
6220 operand = TREE_OPERAND (operand, 0);
6221 }
b8698a0f 6222
ebfd146a
IR
6223 if (TREE_CODE (operand) != SSA_NAME)
6224 {
73fbfcad 6225 if (dump_enabled_p ())
78c60e3d
SS
6226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6227 "not ssa-name.");
ebfd146a
IR
6228 return false;
6229 }
b8698a0f 6230
ebfd146a
IR
6231 *def_stmt = SSA_NAME_DEF_STMT (operand);
6232 if (*def_stmt == NULL)
6233 {
73fbfcad 6234 if (dump_enabled_p ())
78c60e3d
SS
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6236 "no def_stmt.");
ebfd146a
IR
6237 return false;
6238 }
6239
73fbfcad 6240 if (dump_enabled_p ())
ebfd146a 6241 {
78c60e3d
SS
6242 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6243 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
6244 }
6245
8644a673 6246 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
6247 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6248 if (gimple_nop_p (*def_stmt))
6249 {
6250 *def = operand;
8644a673 6251 *dt = vect_external_def;
ebfd146a
IR
6252 return true;
6253 }
6254
6255 bb = gimple_bb (*def_stmt);
a70d6342
IR
6256
6257 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6258 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 6259 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 6260 *dt = vect_external_def;
ebfd146a
IR
6261 else
6262 {
6263 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6264 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6265 }
6266
24ee1384
IR
6267 if (*dt == vect_unknown_def_type
6268 || (stmt
6269 && *dt == vect_double_reduction_def
6270 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 6271 {
73fbfcad 6272 if (dump_enabled_p ())
78c60e3d
SS
6273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6274 "Unsupported pattern.");
ebfd146a
IR
6275 return false;
6276 }
6277
73fbfcad 6278 if (dump_enabled_p ())
78c60e3d 6279 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
ebfd146a
IR
6280
6281 switch (gimple_code (*def_stmt))
6282 {
6283 case GIMPLE_PHI:
6284 *def = gimple_phi_result (*def_stmt);
6285 break;
6286
6287 case GIMPLE_ASSIGN:
6288 *def = gimple_assign_lhs (*def_stmt);
6289 break;
6290
6291 case GIMPLE_CALL:
6292 *def = gimple_call_lhs (*def_stmt);
6293 if (*def != NULL)
6294 break;
6295 /* FALLTHRU */
6296 default:
73fbfcad 6297 if (dump_enabled_p ())
78c60e3d
SS
6298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6299 "unsupported defining stmt: ");
ebfd146a
IR
6300 return false;
6301 }
6302
6303 return true;
6304}
6305
b690cc0f
RG
6306/* Function vect_is_simple_use_1.
6307
6308 Same as vect_is_simple_use_1 but also determines the vector operand
6309 type of OPERAND and stores it to *VECTYPE. If the definition of
6310 OPERAND is vect_uninitialized_def, vect_constant_def or
6311 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6312 is responsible to compute the best suited vector type for the
6313 scalar operand. */
6314
6315bool
24ee1384 6316vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
6317 bb_vec_info bb_vinfo, gimple *def_stmt,
6318 tree *def, enum vect_def_type *dt, tree *vectype)
6319{
24ee1384
IR
6320 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6321 def, dt))
b690cc0f
RG
6322 return false;
6323
6324 /* Now get a vector type if the def is internal, otherwise supply
6325 NULL_TREE and leave it up to the caller to figure out a proper
6326 type for the use stmt. */
6327 if (*dt == vect_internal_def
6328 || *dt == vect_induction_def
6329 || *dt == vect_reduction_def
6330 || *dt == vect_double_reduction_def
6331 || *dt == vect_nested_cycle)
6332 {
6333 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
6334
6335 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6336 && !STMT_VINFO_RELEVANT (stmt_info)
6337 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 6338 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 6339
b690cc0f
RG
6340 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6341 gcc_assert (*vectype != NULL_TREE);
6342 }
6343 else if (*dt == vect_uninitialized_def
6344 || *dt == vect_constant_def
6345 || *dt == vect_external_def)
6346 *vectype = NULL_TREE;
6347 else
6348 gcc_unreachable ();
6349
6350 return true;
6351}
6352
ebfd146a
IR
6353
6354/* Function supportable_widening_operation
6355
b8698a0f
L
6356 Check whether an operation represented by the code CODE is a
6357 widening operation that is supported by the target platform in
b690cc0f
RG
6358 vector form (i.e., when operating on arguments of type VECTYPE_IN
6359 producing a result of type VECTYPE_OUT).
b8698a0f 6360
ebfd146a
IR
6361 Widening operations we currently support are NOP (CONVERT), FLOAT
6362 and WIDEN_MULT. This function checks if these operations are supported
6363 by the target platform either directly (via vector tree-codes), or via
6364 target builtins.
6365
6366 Output:
b8698a0f
L
6367 - CODE1 and CODE2 are codes of vector operations to be used when
6368 vectorizing the operation, if available.
ebfd146a
IR
6369 - MULTI_STEP_CVT determines the number of required intermediate steps in
6370 case of multi-step conversion (like char->short->int - in that case
6371 MULTI_STEP_CVT will be 1).
b8698a0f
L
6372 - INTERM_TYPES contains the intermediate type required to perform the
6373 widening operation (short in the above example). */
ebfd146a
IR
6374
6375bool
b690cc0f
RG
6376supportable_widening_operation (enum tree_code code, gimple stmt,
6377 tree vectype_out, tree vectype_in,
ebfd146a
IR
6378 enum tree_code *code1, enum tree_code *code2,
6379 int *multi_step_cvt,
9771b263 6380 vec<tree> *interm_types)
ebfd146a
IR
6381{
6382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6383 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 6384 struct loop *vect_loop = NULL;
ebfd146a 6385 enum machine_mode vec_mode;
81f40b79 6386 enum insn_code icode1, icode2;
ebfd146a 6387 optab optab1, optab2;
b690cc0f
RG
6388 tree vectype = vectype_in;
6389 tree wide_vectype = vectype_out;
ebfd146a 6390 enum tree_code c1, c2;
4a00c761
JJ
6391 int i;
6392 tree prev_type, intermediate_type;
6393 enum machine_mode intermediate_mode, prev_mode;
6394 optab optab3, optab4;
ebfd146a 6395
4a00c761 6396 *multi_step_cvt = 0;
4ef69dfc
IR
6397 if (loop_info)
6398 vect_loop = LOOP_VINFO_LOOP (loop_info);
6399
ebfd146a
IR
6400 switch (code)
6401 {
6402 case WIDEN_MULT_EXPR:
6ae6116f
RH
6403 /* The result of a vectorized widening operation usually requires
6404 two vectors (because the widened results do not fit into one vector).
6405 The generated vector results would normally be expected to be
6406 generated in the same order as in the original scalar computation,
6407 i.e. if 8 results are generated in each vector iteration, they are
6408 to be organized as follows:
6409 vect1: [res1,res2,res3,res4],
6410 vect2: [res5,res6,res7,res8].
6411
6412 However, in the special case that the result of the widening
6413 operation is used in a reduction computation only, the order doesn't
6414 matter (because when vectorizing a reduction we change the order of
6415 the computation). Some targets can take advantage of this and
6416 generate more efficient code. For example, targets like Altivec,
6417 that support widen_mult using a sequence of {mult_even,mult_odd}
6418 generate the following vectors:
6419 vect1: [res1,res3,res5,res7],
6420 vect2: [res2,res4,res6,res8].
6421
6422 When vectorizing outer-loops, we execute the inner-loop sequentially
6423 (each vectorized inner-loop iteration contributes to VF outer-loop
6424 iterations in parallel). We therefore don't allow to change the
6425 order of the computation in the inner-loop during outer-loop
6426 vectorization. */
6427 /* TODO: Another case in which order doesn't *really* matter is when we
6428 widen and then contract again, e.g. (short)((int)x * y >> 8).
6429 Normally, pack_trunc performs an even/odd permute, whereas the
6430 repack from an even/odd expansion would be an interleave, which
6431 would be significantly simpler for e.g. AVX2. */
6432 /* In any case, in order to avoid duplicating the code below, recurse
6433 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6434 are properly set up for the caller. If we fail, we'll continue with
6435 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6436 if (vect_loop
6437 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6438 && !nested_in_vect_loop_p (vect_loop, stmt)
6439 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6440 stmt, vectype_out, vectype_in,
a86ec597
RH
6441 code1, code2, multi_step_cvt,
6442 interm_types))
6ae6116f 6443 return true;
4a00c761
JJ
6444 c1 = VEC_WIDEN_MULT_LO_EXPR;
6445 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
6446 break;
6447
6ae6116f
RH
6448 case VEC_WIDEN_MULT_EVEN_EXPR:
6449 /* Support the recursion induced just above. */
6450 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6451 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6452 break;
6453
36ba4aae 6454 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
6455 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6456 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
6457 break;
6458
ebfd146a 6459 CASE_CONVERT:
4a00c761
JJ
6460 c1 = VEC_UNPACK_LO_EXPR;
6461 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
6462 break;
6463
6464 case FLOAT_EXPR:
4a00c761
JJ
6465 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6466 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
6467 break;
6468
6469 case FIX_TRUNC_EXPR:
6470 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6471 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6472 computing the operation. */
6473 return false;
6474
6475 default:
6476 gcc_unreachable ();
6477 }
6478
6ae6116f 6479 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
6480 {
6481 enum tree_code ctmp = c1;
6482 c1 = c2;
6483 c2 = ctmp;
6484 }
6485
ebfd146a
IR
6486 if (code == FIX_TRUNC_EXPR)
6487 {
6488 /* The signedness is determined from output operand. */
b690cc0f
RG
6489 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6490 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
6491 }
6492 else
6493 {
6494 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6495 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6496 }
6497
6498 if (!optab1 || !optab2)
6499 return false;
6500
6501 vec_mode = TYPE_MODE (vectype);
947131ba
RS
6502 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6503 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6504 return false;
6505
4a00c761
JJ
6506 *code1 = c1;
6507 *code2 = c2;
6508
6509 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6510 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6511 return true;
6512
b8698a0f 6513 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 6514 types. */
ebfd146a 6515
4a00c761
JJ
6516 prev_type = vectype;
6517 prev_mode = vec_mode;
b8698a0f 6518
4a00c761
JJ
6519 if (!CONVERT_EXPR_CODE_P (code))
6520 return false;
b8698a0f 6521
4a00c761
JJ
6522 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6523 intermediate steps in promotion sequence. We try
6524 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6525 not. */
9771b263 6526 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6527 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6528 {
6529 intermediate_mode = insn_data[icode1].operand[0].mode;
6530 intermediate_type
6531 = lang_hooks.types.type_for_mode (intermediate_mode,
6532 TYPE_UNSIGNED (prev_type));
6533 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6534 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6535
6536 if (!optab3 || !optab4
6537 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6538 || insn_data[icode1].operand[0].mode != intermediate_mode
6539 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6540 || insn_data[icode2].operand[0].mode != intermediate_mode
6541 || ((icode1 = optab_handler (optab3, intermediate_mode))
6542 == CODE_FOR_nothing)
6543 || ((icode2 = optab_handler (optab4, intermediate_mode))
6544 == CODE_FOR_nothing))
6545 break;
ebfd146a 6546
9771b263 6547 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6548 (*multi_step_cvt)++;
6549
6550 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6551 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6552 return true;
6553
6554 prev_type = intermediate_type;
6555 prev_mode = intermediate_mode;
ebfd146a
IR
6556 }
6557
9771b263 6558 interm_types->release ();
4a00c761 6559 return false;
ebfd146a
IR
6560}
6561
6562
6563/* Function supportable_narrowing_operation
6564
b8698a0f
L
6565 Check whether an operation represented by the code CODE is a
6566 narrowing operation that is supported by the target platform in
b690cc0f
RG
6567 vector form (i.e., when operating on arguments of type VECTYPE_IN
6568 and producing a result of type VECTYPE_OUT).
b8698a0f 6569
ebfd146a 6570 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 6571 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
6572 the target platform directly via vector tree-codes.
6573
6574 Output:
b8698a0f
L
6575 - CODE1 is the code of a vector operation to be used when
6576 vectorizing the operation, if available.
ebfd146a
IR
6577 - MULTI_STEP_CVT determines the number of required intermediate steps in
6578 case of multi-step conversion (like int->short->char - in that case
6579 MULTI_STEP_CVT will be 1).
6580 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 6581 narrowing operation (short in the above example). */
ebfd146a
IR
6582
6583bool
6584supportable_narrowing_operation (enum tree_code code,
b690cc0f 6585 tree vectype_out, tree vectype_in,
ebfd146a 6586 enum tree_code *code1, int *multi_step_cvt,
9771b263 6587 vec<tree> *interm_types)
ebfd146a
IR
6588{
6589 enum machine_mode vec_mode;
6590 enum insn_code icode1;
6591 optab optab1, interm_optab;
b690cc0f
RG
6592 tree vectype = vectype_in;
6593 tree narrow_vectype = vectype_out;
ebfd146a 6594 enum tree_code c1;
4a00c761
JJ
6595 tree intermediate_type;
6596 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 6597 int i;
4a00c761 6598 bool uns;
ebfd146a 6599
4a00c761 6600 *multi_step_cvt = 0;
ebfd146a
IR
6601 switch (code)
6602 {
6603 CASE_CONVERT:
6604 c1 = VEC_PACK_TRUNC_EXPR;
6605 break;
6606
6607 case FIX_TRUNC_EXPR:
6608 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6609 break;
6610
6611 case FLOAT_EXPR:
6612 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6613 tree code and optabs used for computing the operation. */
6614 return false;
6615
6616 default:
6617 gcc_unreachable ();
6618 }
6619
6620 if (code == FIX_TRUNC_EXPR)
6621 /* The signedness is determined from output operand. */
b690cc0f 6622 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
6623 else
6624 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6625
6626 if (!optab1)
6627 return false;
6628
6629 vec_mode = TYPE_MODE (vectype);
947131ba 6630 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
6631 return false;
6632
4a00c761
JJ
6633 *code1 = c1;
6634
6635 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6636 return true;
6637
ebfd146a
IR
6638 /* Check if it's a multi-step conversion that can be done using intermediate
6639 types. */
4a00c761
JJ
6640 prev_mode = vec_mode;
6641 if (code == FIX_TRUNC_EXPR)
6642 uns = TYPE_UNSIGNED (vectype_out);
6643 else
6644 uns = TYPE_UNSIGNED (vectype);
6645
6646 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6647 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6648 costly than signed. */
6649 if (code == FIX_TRUNC_EXPR && uns)
6650 {
6651 enum insn_code icode2;
6652
6653 intermediate_type
6654 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6655 interm_optab
6656 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 6657 if (interm_optab != unknown_optab
4a00c761
JJ
6658 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6659 && insn_data[icode1].operand[0].mode
6660 == insn_data[icode2].operand[0].mode)
6661 {
6662 uns = false;
6663 optab1 = interm_optab;
6664 icode1 = icode2;
6665 }
6666 }
ebfd146a 6667
4a00c761
JJ
6668 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6669 intermediate steps in promotion sequence. We try
6670 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 6671 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
6672 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6673 {
6674 intermediate_mode = insn_data[icode1].operand[0].mode;
6675 intermediate_type
6676 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6677 interm_optab
6678 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6679 optab_default);
6680 if (!interm_optab
6681 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6682 || insn_data[icode1].operand[0].mode != intermediate_mode
6683 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6684 == CODE_FOR_nothing))
6685 break;
6686
9771b263 6687 interm_types->quick_push (intermediate_type);
4a00c761
JJ
6688 (*multi_step_cvt)++;
6689
6690 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6691 return true;
6692
6693 prev_mode = intermediate_mode;
6694 optab1 = interm_optab;
ebfd146a
IR
6695 }
6696
9771b263 6697 interm_types->release ();
4a00c761 6698 return false;
ebfd146a 6699}