]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
arm.c (arm_expand_prologue): In a nested APCS frame with arguments to push onto the...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
ebfd146a 27#include "tree.h"
d8a2d370 28#include "stor-layout.h"
ebfd146a
IR
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
2fb9a547
AM
32#include "tree-ssa-alias.h"
33#include "internal-fn.h"
34#include "tree-eh.h"
35#include "gimple-expr.h"
36#include "is-a.h"
18f429e2 37#include "gimple.h"
45b0be94 38#include "gimplify.h"
5be5c238 39#include "gimple-iterator.h"
18f429e2 40#include "gimplify-me.h"
442b4905
AM
41#include "gimple-ssa.h"
42#include "tree-cfg.h"
43#include "tree-phinodes.h"
44#include "ssa-iterators.h"
d8a2d370 45#include "stringpool.h"
442b4905 46#include "tree-ssanames.h"
e28030cf 47#include "tree-ssa-loop-manip.h"
ebfd146a 48#include "cfgloop.h"
0136f8f0
AH
49#include "tree-ssa-loop.h"
50#include "tree-scalar-evolution.h"
ebfd146a 51#include "expr.h"
7ee2468b 52#include "recog.h" /* FIXME: for insn_data */
ebfd146a 53#include "optabs.h"
718f9c0f 54#include "diagnostic-core.h"
ebfd146a 55#include "tree-vectorizer.h"
7ee2468b 56#include "dumpfile.h"
0136f8f0 57#include "cgraph.h"
ebfd146a 58
7ee2468b
SB
59/* For lang_hooks.types.type_for_mode. */
60#include "langhooks.h"
ebfd146a 61
c3e7ee41
BS
62/* Return the vectorized type for the given statement. */
63
64tree
65stmt_vectype (struct _stmt_vec_info *stmt_info)
66{
67 return STMT_VINFO_VECTYPE (stmt_info);
68}
69
70/* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72bool
73stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
74{
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
79
80 if (!loop_vinfo)
81 return false;
82
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
84
85 return (bb->loop_father == loop->inner);
86}
87
88/* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
91
92unsigned
92345349 93record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 95 int misalign, enum vect_cost_model_location where)
c3e7ee41 96{
92345349 97 if (body_cost_vec)
c3e7ee41 98 {
92345349
BS
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
c3e7ee41 103 return (unsigned)
92345349 104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
105
106 }
107 else
108 {
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
112
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
117
92345349
BS
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
c3e7ee41
BS
120 }
121}
122
272c6793
RS
123/* Return a variable of type ELEM_TYPE[NELEMS]. */
124
125static tree
126create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
127{
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
130}
131
132/* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
136
137static tree
138read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
140{
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
143
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
150
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
155
156 return vect_name;
157}
158
159/* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
162
163static void
164write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
166{
167 tree array_ref;
168 gimple new_stmt;
169
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
173
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
176}
177
178/* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
181
182static tree
183create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
184{
272c6793
RS
185 tree mem_ref, alias_ptr_type;
186
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
644ffefd 190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
191 return mem_ref;
192}
193
ebfd146a
IR
194/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195
196/* Function vect_mark_relevant.
197
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
199
200static void
9771b263 201vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
ebfd146a
IR
204{
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 208 gimple pattern_stmt;
ebfd146a 209
73fbfcad 210 if (dump_enabled_p ())
78c60e3d 211 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 212 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
83197f37
IR
220 bool found = false;
221 if (!used_in_pattern)
222 {
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
13c931c9
JJ
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 229
83197f37
IR
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
ebfd146a 234
83197f37
IR
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
5ce9450f 238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
240 {
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
244
13c931c9
JJ
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
247
ab0ef706
JJ
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
250 {
251 found = true;
252 break;
253 }
254 }
83197f37
IR
255 }
256
257 if (!found)
258 {
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
263
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
265
73fbfcad 266 if (dump_enabled_p ())
78c60e3d
SS
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
e645e942 269 " relevant/live.\n");
83197f37
IR
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
275 }
ebfd146a
IR
276 }
277
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
281
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
284 {
73fbfcad 285 if (dump_enabled_p ())
78c60e3d 286 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 287 "already marked relevant/live.\n");
ebfd146a
IR
288 return;
289 }
290
9771b263 291 worklist->safe_push (stmt);
ebfd146a
IR
292}
293
294
295/* Function vect_stmt_relevant_p.
296
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
299
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
304
305 CHECKME: what other side effects would the vectorizer allow? */
306
307static bool
308vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
310{
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
316
8644a673 317 *relevant = vect_unused_in_scope;
ebfd146a
IR
318 *live_p = false;
319
320 /* cond stmt other than loop exit cond. */
b8698a0f
L
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
8644a673 324 *relevant = vect_used_in_scope;
ebfd146a
IR
325
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 328 if (gimple_vdef (stmt))
ebfd146a 329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 333 *relevant = vect_used_in_scope;
ebfd146a
IR
334 }
335
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
338 {
339 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
340 {
341 basic_block bb = gimple_bb (USE_STMT (use_p));
342 if (!flow_bb_inside_loop_p (loop, bb))
343 {
73fbfcad 344 if (dump_enabled_p ())
78c60e3d 345 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 346 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 347
3157b0c2
AO
348 if (is_gimple_debug (USE_STMT (use_p)))
349 continue;
350
ebfd146a
IR
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
354 gcc_assert (bb == single_exit (loop)->dest);
355
356 *live_p = true;
357 }
358 }
359 }
360
361 return (*live_p || *relevant);
362}
363
364
b8698a0f 365/* Function exist_non_indexing_operands_for_use_p
ebfd146a 366
ff802fa1 367 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
368 used in STMT for anything other than indexing an array. */
369
370static bool
371exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
372{
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 375
ff802fa1 376 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
59a05b0c 381
ebfd146a
IR
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
387
388 'var' in the second case corresponds to a def, not a use,
b8698a0f 389 so USE cannot correspond to any operands that are not used
ebfd146a
IR
390 for array indexing.
391
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
ebfd146a
IR
394
395 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
396 {
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
400 {
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
413 }
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b8698a0f 458process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 459 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 tree def;
467 gimple def_stmt;
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
24ee1384 475 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a
IR
577 case vect_used_by_reduction:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
580
8644a673 581 case vect_used_in_scope:
ebfd146a
IR
582 relevant = vect_used_in_outer;
583 break;
584
585 default:
586 gcc_unreachable ();
587 }
588 }
589
83197f37
IR
590 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
591 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
592 return true;
593}
594
595
596/* Function vect_mark_stmts_to_be_vectorized.
597
598 Not all stmts in the loop need to be vectorized. For example:
599
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
604
605 3. j = j + 1
606
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
609
610 This pass detects such stmts. */
611
612bool
613vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
614{
ebfd146a
IR
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 gimple stmt;
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
623 gimple phi;
624 bool live_p;
06066f92
IR
625 enum vect_relevant relevant, tmp_relevant;
626 enum vect_def_type def_type;
ebfd146a 627
73fbfcad 628 if (dump_enabled_p ())
78c60e3d 629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 630 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 631
07687835 632 stack_vec<gimple, 64> worklist;
ebfd146a
IR
633
634 /* 1. Init worklist. */
635 for (i = 0; i < nbbs; i++)
636 {
637 bb = bbs[i];
638 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 639 {
ebfd146a 640 phi = gsi_stmt (si);
73fbfcad 641 if (dump_enabled_p ())
ebfd146a 642 {
78c60e3d
SS
643 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 645 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
646 }
647
648 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 649 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
650 }
651 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
652 {
653 stmt = gsi_stmt (si);
73fbfcad 654 if (dump_enabled_p ())
ebfd146a 655 {
78c60e3d
SS
656 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 658 dump_printf (MSG_NOTE, "\n");
b8698a0f 659 }
ebfd146a
IR
660
661 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 662 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
663 }
664 }
665
666 /* 2. Process_worklist */
9771b263 667 while (worklist.length () > 0)
ebfd146a
IR
668 {
669 use_operand_p use_p;
670 ssa_op_iter iter;
671
9771b263 672 stmt = worklist.pop ();
73fbfcad 673 if (dump_enabled_p ())
ebfd146a 674 {
78c60e3d
SS
675 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 677 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
678 }
679
b8698a0f
L
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
682 liveness and relevance properties of STMT. */
683 stmt_vinfo = vinfo_for_stmt (stmt);
684 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
685 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
686
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
691
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
694 live_p = false
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 697 those that are used by a reduction computation, and those that are
ff802fa1 698 (also) used by a regular computation. This allows us later on to
b8698a0f 699 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 700 order of the results that they produce does not have to be kept. */
ebfd146a 701
06066f92
IR
702 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
703 tmp_relevant = relevant;
704 switch (def_type)
ebfd146a 705 {
06066f92
IR
706 case vect_reduction_def:
707 switch (tmp_relevant)
708 {
709 case vect_unused_in_scope:
710 relevant = vect_used_by_reduction;
711 break;
712
713 case vect_used_by_reduction:
714 if (gimple_code (stmt) == GIMPLE_PHI)
715 break;
716 /* fall through */
717
718 default:
73fbfcad 719 if (dump_enabled_p ())
78c60e3d 720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 721 "unsupported use of reduction.\n");
06066f92
IR
722 return false;
723 }
724
b8698a0f 725 live_p = false;
06066f92 726 break;
b8698a0f 727
06066f92
IR
728 case vect_nested_cycle:
729 if (tmp_relevant != vect_unused_in_scope
730 && tmp_relevant != vect_used_in_outer_by_reduction
731 && tmp_relevant != vect_used_in_outer)
732 {
73fbfcad 733 if (dump_enabled_p ())
78c60e3d 734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 735 "unsupported use of nested cycle.\n");
7c5222ff 736
06066f92
IR
737 return false;
738 }
7c5222ff 739
b8698a0f
L
740 live_p = false;
741 break;
742
06066f92
IR
743 case vect_double_reduction_def:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_by_reduction)
746 {
73fbfcad 747 if (dump_enabled_p ())
78c60e3d 748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 749 "unsupported use of double reduction.\n");
7c5222ff 750
7c5222ff 751 return false;
06066f92
IR
752 }
753
754 live_p = false;
b8698a0f 755 break;
7c5222ff 756
06066f92
IR
757 default:
758 break;
7c5222ff 759 }
b8698a0f 760
aec7ae7d 761 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
762 {
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt))
767 {
69d2aade
JJ
768 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
769 tree op = gimple_assign_rhs1 (stmt);
770
771 i = 1;
772 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 {
774 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 775 live_p, relevant, &worklist, false)
69d2aade 776 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 777 live_p, relevant, &worklist, false))
07687835 778 return false;
69d2aade
JJ
779 i = 2;
780 }
781 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 782 {
69d2aade 783 op = gimple_op (stmt, i);
9d5e7640 784 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 785 &worklist, false))
07687835 786 return false;
9d5e7640
IR
787 }
788 }
789 else if (is_gimple_call (stmt))
790 {
791 for (i = 0; i < gimple_call_num_args (stmt); i++)
792 {
793 tree arg = gimple_call_arg (stmt, i);
794 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 795 &worklist, false))
07687835 796 return false;
9d5e7640
IR
797 }
798 }
799 }
800 else
801 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
802 {
803 tree op = USE_FROM_PTR (use_p);
804 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 805 &worklist, false))
07687835 806 return false;
9d5e7640 807 }
aec7ae7d
JJ
808
809 if (STMT_VINFO_GATHER_P (stmt_vinfo))
810 {
811 tree off;
812 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
813 gcc_assert (decl);
814 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
815 &worklist, true))
07687835 816 return false;
aec7ae7d 817 }
ebfd146a
IR
818 } /* while worklist */
819
ebfd146a
IR
820 return true;
821}
822
823
b8698a0f 824/* Function vect_model_simple_cost.
ebfd146a 825
b8698a0f 826 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
829
830void
b8698a0f 831vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
832 enum vect_def_type *dt,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
835{
836 int i;
92345349 837 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
838
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
842
ebfd146a
IR
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i = 0; i < 2; i++)
92345349
BS
845 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
846 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
847 stmt_info, 0, vect_prologue);
c3e7ee41
BS
848
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
850 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
851 stmt_info, 0, vect_body);
c3e7ee41 852
73fbfcad 853 if (dump_enabled_p ())
78c60e3d
SS
854 dump_printf_loc (MSG_NOTE, vect_location,
855 "vect_model_simple_cost: inside_cost = %d, "
e645e942 856 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
857}
858
859
8bd37302
BS
860/* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
864
865static void
866vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
867 enum vect_def_type *dt, int pwr)
868{
869 int i, tmp;
92345349 870 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
872 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
873 void *target_cost_data;
8bd37302
BS
874
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info))
877 return;
878
c3e7ee41
BS
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
883
8bd37302
BS
884 for (i = 0; i < pwr + 1; i++)
885 {
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
c3e7ee41 888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
8bd37302
BS
891 }
892
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
92345349
BS
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
8bd37302 898
73fbfcad 899 if (dump_enabled_p ())
78c60e3d
SS
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
903}
904
0d0293ac 905/* Function vect_cost_group_size
b8698a0f 906
0d0293ac 907 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
910
911static int
0d0293ac 912vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 913{
e14c1050 914 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
915
916 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 917 return GROUP_SIZE (stmt_info);
ebfd146a
IR
918
919 return 1;
920}
921
922
923/* Function vect_model_store_cost
924
0d0293ac
MM
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
927
928void
b8698a0f 929vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 930 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
931 slp_tree slp_node,
932 stmt_vector_for_cost *prologue_cost_vec,
933 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
934{
935 int group_size;
92345349 936 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
937 struct data_reference *first_dr;
938 gimple first_stmt;
ebfd146a
IR
939
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info))
942 return;
943
8644a673 944 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
ebfd146a 947
0d0293ac
MM
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
950 {
951 if (slp_node)
952 {
9771b263 953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
954 group_size = 1;
955 }
956 else
957 {
e14c1050 958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 959 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
960 }
961
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
963 }
0d0293ac 964 /* Not a grouped access. */
ebfd146a 965 else
720f5239
IR
966 {
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
969 }
ebfd146a 970
272c6793 971 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1)
ebfd146a
IR
976 {
977 /* Uses a high and low interleave operation for each needed permute. */
c3e7ee41
BS
978
979 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
980 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
981 stmt_info, 0, vect_body);
ebfd146a 982
73fbfcad 983 if (dump_enabled_p ())
78c60e3d 984 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 985 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 986 group_size);
ebfd146a
IR
987 }
988
989 /* Costs of the stores. */
92345349 990 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 991
73fbfcad 992 if (dump_enabled_p ())
78c60e3d
SS
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: inside_cost = %d, "
e645e942 995 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
996}
997
998
720f5239
IR
999/* Calculate cost of DR's memory access. */
1000void
1001vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1002 unsigned int *inside_cost,
92345349 1003 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1004{
1005 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1008
1009 switch (alignment_support_scheme)
1010 {
1011 case dr_aligned:
1012 {
92345349
BS
1013 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1014 vector_store, stmt_info, 0,
1015 vect_body);
720f5239 1016
73fbfcad 1017 if (dump_enabled_p ())
78c60e3d 1018 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1019 "vect_model_store_cost: aligned.\n");
720f5239
IR
1020 break;
1021 }
1022
1023 case dr_unaligned_supported:
1024 {
720f5239 1025 /* Here, we assign an additional cost for the unaligned store. */
92345349 1026 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1027 unaligned_store, stmt_info,
92345349 1028 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1029 if (dump_enabled_p ())
78c60e3d
SS
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "vect_model_store_cost: unaligned supported by "
e645e942 1032 "hardware.\n");
720f5239
IR
1033 break;
1034 }
1035
38eec4c6
UW
1036 case dr_unaligned_unsupported:
1037 {
1038 *inside_cost = VECT_MAX_COST;
1039
73fbfcad 1040 if (dump_enabled_p ())
78c60e3d 1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1042 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1043 break;
1044 }
1045
720f5239
IR
1046 default:
1047 gcc_unreachable ();
1048 }
1049}
1050
1051
ebfd146a
IR
1052/* Function vect_model_load_cost
1053
0d0293ac
MM
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1056 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1057 access scheme chosen. */
1058
1059void
92345349
BS
1060vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1061 bool load_lanes_p, slp_tree slp_node,
1062 stmt_vector_for_cost *prologue_cost_vec,
1063 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1064{
1065 int group_size;
ebfd146a
IR
1066 gimple first_stmt;
1067 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1068 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
1069
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info))
1072 return;
1073
0d0293ac 1074 /* Grouped accesses? */
e14c1050 1075 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1077 {
0d0293ac 1078 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1079 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1080 }
0d0293ac 1081 /* Not a grouped access. */
ebfd146a
IR
1082 else
1083 {
1084 group_size = 1;
1085 first_dr = dr;
1086 }
1087
272c6793 1088 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p && group_size > 1)
ebfd146a
IR
1093 {
1094 /* Uses an even and odd extract operations for each needed permute. */
c3e7ee41 1095 int nstmts = ncopies * exact_log2 (group_size) * group_size;
92345349
BS
1096 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1097 stmt_info, 0, vect_body);
ebfd146a 1098
73fbfcad 1099 if (dump_enabled_p ())
e645e942
TJ
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1102 group_size);
ebfd146a
IR
1103 }
1104
1105 /* The loads themselves. */
a82960aa
RG
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1107 {
a21892ad
BS
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1110 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1111 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349
BS
1112 scalar_load, stmt_info, 0, vect_body);
1113 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1114 stmt_info, 0, vect_body);
a82960aa
RG
1115 }
1116 else
1117 vect_get_load_cost (first_dr, ncopies,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1119 || group_size > 1 || slp_node),
92345349
BS
1120 &inside_cost, &prologue_cost,
1121 prologue_cost_vec, body_cost_vec, true);
720f5239 1122
73fbfcad 1123 if (dump_enabled_p ())
78c60e3d
SS
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: inside_cost = %d, "
e645e942 1126 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1127}
1128
1129
1130/* Calculate cost of DR's memory access. */
1131void
1132vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1133 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1134 unsigned int *prologue_cost,
1135 stmt_vector_for_cost *prologue_cost_vec,
1136 stmt_vector_for_cost *body_cost_vec,
1137 bool record_prologue_costs)
720f5239
IR
1138{
1139 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1140 gimple stmt = DR_STMT (dr);
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1142
1143 switch (alignment_support_scheme)
ebfd146a
IR
1144 {
1145 case dr_aligned:
1146 {
92345349
BS
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1148 stmt_info, 0, vect_body);
ebfd146a 1149
73fbfcad 1150 if (dump_enabled_p ())
78c60e3d 1151 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1152 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1153
1154 break;
1155 }
1156 case dr_unaligned_supported:
1157 {
720f5239 1158 /* Here, we assign an additional cost for the unaligned load. */
92345349 1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1160 unaligned_load, stmt_info,
92345349 1161 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1162
73fbfcad 1163 if (dump_enabled_p ())
78c60e3d
SS
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned supported by "
e645e942 1166 "hardware.\n");
ebfd146a
IR
1167
1168 break;
1169 }
1170 case dr_explicit_realign:
1171 {
92345349
BS
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1173 vector_load, stmt_info, 0, vect_body);
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1176
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
92345349 1179 prologue costs. */
ebfd146a 1180 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1181 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1182 stmt_info, 0, vect_body);
ebfd146a 1183
73fbfcad 1184 if (dump_enabled_p ())
e645e942
TJ
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: explicit realign\n");
8bd37302 1187
ebfd146a
IR
1188 break;
1189 }
1190 case dr_explicit_realign_optimized:
1191 {
73fbfcad 1192 if (dump_enabled_p ())
e645e942 1193 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1194 "vect_model_load_cost: unaligned software "
e645e942 1195 "pipelined.\n");
ebfd146a
IR
1196
1197 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1198 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1199 if this is an access in a group of loads, which provide grouped
ebfd146a 1200 access, then the above cost should only be considered for one
ff802fa1 1201 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1202 and a realignment op. */
1203
92345349 1204 if (add_realign_cost && record_prologue_costs)
ebfd146a 1205 {
92345349
BS
1206 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1207 vector_stmt, stmt_info,
1208 0, vect_prologue);
ebfd146a 1209 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
ebfd146a
IR
1213 }
1214
92345349
BS
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1216 stmt_info, 0, vect_body);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1218 stmt_info, 0, vect_body);
8bd37302 1219
73fbfcad 1220 if (dump_enabled_p ())
78c60e3d 1221 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1222 "vect_model_load_cost: explicit realign optimized"
1223 "\n");
8bd37302 1224
ebfd146a
IR
1225 break;
1226 }
1227
38eec4c6
UW
1228 case dr_unaligned_unsupported:
1229 {
1230 *inside_cost = VECT_MAX_COST;
1231
73fbfcad 1232 if (dump_enabled_p ())
78c60e3d 1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1234 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1235 break;
1236 }
1237
ebfd146a
IR
1238 default:
1239 gcc_unreachable ();
1240 }
ebfd146a
IR
1241}
1242
418b7df3
RG
1243/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1245
418b7df3
RG
1246static void
1247vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1248{
ebfd146a 1249 if (gsi)
418b7df3 1250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1251 else
1252 {
418b7df3 1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1254 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1255
a70d6342
IR
1256 if (loop_vinfo)
1257 {
1258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1259 basic_block new_bb;
1260 edge pe;
a70d6342
IR
1261
1262 if (nested_in_vect_loop_p (loop, stmt))
1263 loop = loop->inner;
b8698a0f 1264
a70d6342 1265 pe = loop_preheader_edge (loop);
418b7df3 1266 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1267 gcc_assert (!new_bb);
1268 }
1269 else
1270 {
1271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1272 basic_block bb;
1273 gimple_stmt_iterator gsi_bb_start;
1274
1275 gcc_assert (bb_vinfo);
1276 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1277 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1278 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1279 }
ebfd146a
IR
1280 }
1281
73fbfcad 1282 if (dump_enabled_p ())
ebfd146a 1283 {
78c60e3d
SS
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 1287 dump_printf (MSG_NOTE, "\n");
ebfd146a 1288 }
418b7df3
RG
1289}
1290
1291/* Function vect_init_vector.
ebfd146a 1292
5467ee52
RG
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
418b7df3
RG
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1300
1301tree
5467ee52 1302vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1303{
1304 tree new_var;
1305 gimple init_stmt;
1306 tree vec_oprnd;
1307 tree new_temp;
1308
5467ee52
RG
1309 if (TREE_CODE (type) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1311 {
5467ee52 1312 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1313 {
5467ee52
RG
1314 if (CONSTANT_CLASS_P (val))
1315 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1316 else
1317 {
83d5977e 1318 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
418b7df3 1319 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
83d5977e 1320 new_temp, val,
418b7df3 1321 NULL_TREE);
418b7df3 1322 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1323 val = new_temp;
418b7df3
RG
1324 }
1325 }
5467ee52 1326 val = build_vector_from_val (type, val);
418b7df3
RG
1327 }
1328
5467ee52 1329 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1330 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1331 new_temp = make_ssa_name (new_var, init_stmt);
1332 gimple_assign_set_lhs (init_stmt, new_temp);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1334 vec_oprnd = gimple_assign_lhs (init_stmt);
1335 return vec_oprnd;
1336}
1337
a70d6342 1338
ebfd146a
IR
1339/* Function vect_get_vec_def_for_operand.
1340
ff802fa1 1341 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1342 used in the vectorized stmt for STMT.
1343
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1346
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1349
1350tree
1351vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1352{
1353 tree vec_oprnd;
1354 gimple vec_stmt;
1355 gimple def_stmt;
1356 stmt_vec_info def_stmt_info = NULL;
1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1358 unsigned int nunits;
ebfd146a 1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1360 tree def;
ebfd146a
IR
1361 enum vect_def_type dt;
1362 bool is_simple_use;
1363 tree vector_type;
1364
73fbfcad 1365 if (dump_enabled_p ())
ebfd146a 1366 {
78c60e3d
SS
1367 dump_printf_loc (MSG_NOTE, vect_location,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1370 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1371 }
1372
24ee1384
IR
1373 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1374 &def_stmt, &def, &dt);
ebfd146a 1375 gcc_assert (is_simple_use);
73fbfcad 1376 if (dump_enabled_p ())
ebfd146a 1377 {
78c60e3d 1378 int loc_printed = 0;
ebfd146a
IR
1379 if (def)
1380 {
78c60e3d
SS
1381 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1382 loc_printed = 1;
1383 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1384 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1385 }
1386 if (def_stmt)
1387 {
78c60e3d
SS
1388 if (loc_printed)
1389 dump_printf (MSG_NOTE, " def_stmt = ");
1390 else
1391 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
e645e942 1393 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1394 }
1395 }
1396
1397 switch (dt)
1398 {
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def:
1401 {
7569a6cc
RG
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1403 gcc_assert (vector_type);
9dc3f7de 1404 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1405
b8698a0f 1406 if (scalar_def)
ebfd146a
IR
1407 *scalar_def = op;
1408
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1410 if (dump_enabled_p ())
78c60e3d 1411 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1412 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1413
418b7df3 1414 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1415 }
1416
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1418 case vect_external_def:
ebfd146a
IR
1419 {
1420 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1421 gcc_assert (vector_type);
ebfd146a 1422
b8698a0f 1423 if (scalar_def)
ebfd146a
IR
1424 *scalar_def = def;
1425
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1427 if (dump_enabled_p ())
e645e942 1428 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1429
418b7df3 1430 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1431 }
1432
1433 /* Case 3: operand is defined inside the loop. */
8644a673 1434 case vect_internal_def:
ebfd146a 1435 {
b8698a0f 1436 if (scalar_def)
ebfd146a
IR
1437 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1438
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1441
ebfd146a 1442 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1443 /* Get vectorized pattern statement. */
1444 if (!vec_stmt
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info))
1447 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1449 gcc_assert (vec_stmt);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else if (is_gimple_call (vec_stmt))
1453 vec_oprnd = gimple_call_lhs (vec_stmt);
1454 else
1455 vec_oprnd = gimple_assign_lhs (vec_stmt);
1456 return vec_oprnd;
1457 }
1458
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def:
06066f92 1461 case vect_double_reduction_def:
7c5222ff 1462 case vect_nested_cycle:
ebfd146a
IR
1463 {
1464 struct loop *loop;
1465
1466 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1467 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1468
1469 /* Get the def before the loop */
1470 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1471 return get_initial_def_for_reduction (stmt, op, scalar_def);
1472 }
1473
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def:
1476 {
1477 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1478
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1481 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1482 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1483 vec_oprnd = PHI_RESULT (vec_stmt);
1484 else
1485 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1486 return vec_oprnd;
1487 }
1488
1489 default:
1490 gcc_unreachable ();
1491 }
1492}
1493
1494
1495/* Function vect_get_vec_def_for_stmt_copy
1496
ff802fa1 1497 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1500 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1502 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1503 DT is the type of the vector def VEC_OPRND.
1504
1505 Context:
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1508 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1509 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1513 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1516
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1518
ebfd146a
IR
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1522 VS1.3: vx.3 = memref3
ebfd146a
IR
1523
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1528
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
b8698a0f
L
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1533 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1534 returns the vector-def 'vx.0'.
1535
b8698a0f
L
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1540
b8698a0f
L
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1550
1551tree
1552vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1553{
1554 gimple vec_stmt_for_operand;
1555 stmt_vec_info def_stmt_info;
1556
1557 /* Do nothing; can reuse same def. */
8644a673 1558 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1559 return vec_oprnd;
1560
1561 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1562 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1563 gcc_assert (def_stmt_info);
1564 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1565 gcc_assert (vec_stmt_for_operand);
1566 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1567 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1568 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1569 else
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 return vec_oprnd;
1572}
1573
1574
1575/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1577
1578static void
b8698a0f 1579vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1)
ebfd146a 1582{
9771b263 1583 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1584
1585 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1586 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1587
9771b263 1588 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1589 {
9771b263 1590 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1591 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1592 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1593 }
1594}
1595
1596
d092494c
IR
1597/* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
ebfd146a 1600
d092494c 1601void
ebfd146a 1602vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1603 vec<tree> *vec_oprnds0,
1604 vec<tree> *vec_oprnds1,
d092494c 1605 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1606{
1607 if (slp_node)
d092494c
IR
1608 {
1609 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1610 auto_vec<tree> ops (nops);
1611 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1612
9771b263 1613 ops.quick_push (op0);
d092494c 1614 if (op1)
9771b263 1615 ops.quick_push (op1);
d092494c
IR
1616
1617 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1618
37b5ec8f 1619 *vec_oprnds0 = vec_defs[0];
d092494c 1620 if (op1)
37b5ec8f 1621 *vec_oprnds1 = vec_defs[1];
d092494c 1622 }
ebfd146a
IR
1623 else
1624 {
1625 tree vec_oprnd;
1626
9771b263 1627 vec_oprnds0->create (1);
b8698a0f 1628 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1629 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1630
1631 if (op1)
1632 {
9771b263 1633 vec_oprnds1->create (1);
b8698a0f 1634 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1635 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1636 }
1637 }
1638}
1639
1640
1641/* Function vect_finish_stmt_generation.
1642
1643 Insert a new stmt. */
1644
1645void
1646vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1647 gimple_stmt_iterator *gsi)
1648{
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1652
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654
54e8e2c3
RG
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1657 {
1658 gimple at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 {
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 {
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 }
1679 }
1680 }
ebfd146a
IR
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682
b8698a0f 1683 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1684 bb_vinfo));
ebfd146a 1685
73fbfcad 1686 if (dump_enabled_p ())
ebfd146a 1687 {
78c60e3d
SS
1688 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
e645e942 1690 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1691 }
1692
ad885386 1693 gimple_set_location (vec_stmt, gimple_location (stmt));
ebfd146a
IR
1694}
1695
1696/* Checks if CALL can be vectorized in type VECTYPE. Returns
1697 a function declaration if the target has a vectorized version
1698 of the function, or NULL_TREE if the function cannot be vectorized. */
1699
1700tree
1701vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1702{
1703 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1704
1705 /* We only handle functions that do not read or clobber memory -- i.e.
1706 const or novops ones. */
1707 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1708 return NULL_TREE;
1709
1710 if (!fndecl
1711 || TREE_CODE (fndecl) != FUNCTION_DECL
1712 || !DECL_BUILT_IN (fndecl))
1713 return NULL_TREE;
1714
62f7fd21 1715 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1716 vectype_in);
1717}
1718
5ce9450f
JJ
1719
1720static tree permute_vec_elements (tree, tree, tree, gimple,
1721 gimple_stmt_iterator *);
1722
1723
1724/* Function vectorizable_mask_load_store.
1725
1726 Check if STMT performs a conditional load or store that can be vectorized.
1727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1728 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1730
1731static bool
1732vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1733 gimple *vec_stmt, slp_tree slp_node)
1734{
1735 tree vec_dest = NULL;
1736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1737 stmt_vec_info prev_stmt_info;
1738 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1739 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1740 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1741 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1742 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1743 tree elem_type;
1744 gimple new_stmt;
1745 tree dummy;
1746 tree dataref_ptr = NULL_TREE;
1747 gimple ptr_incr;
1748 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1749 int ncopies;
1750 int i, j;
1751 bool inv_p;
1752 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1753 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1754 int gather_scale = 1;
1755 enum vect_def_type gather_dt = vect_unknown_def_type;
1756 bool is_store;
1757 tree mask;
1758 gimple def_stmt;
1759 tree def;
1760 enum vect_def_type dt;
1761
1762 if (slp_node != NULL)
1763 return false;
1764
1765 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1766 gcc_assert (ncopies >= 1);
1767
1768 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1769 mask = gimple_call_arg (stmt, 2);
1770 if (TYPE_PRECISION (TREE_TYPE (mask))
1771 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1772 return false;
1773
1774 /* FORNOW. This restriction should be relaxed. */
1775 if (nested_in_vect_loop && ncopies > 1)
1776 {
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "multiple types in nested loop.");
1780 return false;
1781 }
1782
1783 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1784 return false;
1785
1786 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1787 return false;
1788
1789 if (!STMT_VINFO_DATA_REF (stmt_info))
1790 return false;
1791
1792 elem_type = TREE_TYPE (vectype);
1793
1794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1795 return false;
1796
1797 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1798 return false;
1799
1800 if (STMT_VINFO_GATHER_P (stmt_info))
1801 {
1802 gimple def_stmt;
1803 tree def;
1804 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1805 &gather_off, &gather_scale);
1806 gcc_assert (gather_decl);
1807 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1808 &def_stmt, &def, &gather_dt,
1809 &gather_off_vectype))
1810 {
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "gather index use not simple.");
1814 return false;
1815 }
1816 }
1817 else if (tree_int_cst_compare (nested_in_vect_loop
1818 ? STMT_VINFO_DR_STEP (stmt_info)
1819 : DR_STEP (dr), size_zero_node) <= 0)
1820 return false;
1821 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1822 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1823 return false;
1824
1825 if (TREE_CODE (mask) != SSA_NAME)
1826 return false;
1827
1828 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1829 &def_stmt, &def, &dt))
1830 return false;
1831
1832 if (is_store)
1833 {
1834 tree rhs = gimple_call_arg (stmt, 3);
1835 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1836 &def_stmt, &def, &dt))
1837 return false;
1838 }
1839
1840 if (!vec_stmt) /* transformation not required. */
1841 {
1842 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1843 if (is_store)
1844 vect_model_store_cost (stmt_info, ncopies, false, dt,
1845 NULL, NULL, NULL);
1846 else
1847 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1848 return true;
1849 }
1850
1851 /** Transform. **/
1852
1853 if (STMT_VINFO_GATHER_P (stmt_info))
1854 {
1855 tree vec_oprnd0 = NULL_TREE, op;
1856 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1857 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1858 tree ptr, vec_mask = NULL_TREE, mask_op, var, scale;
1859 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1860 edge pe = loop_preheader_edge (loop);
1861 gimple_seq seq;
1862 basic_block new_bb;
1863 enum { NARROW, NONE, WIDEN } modifier;
1864 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1865
1866 if (nunits == gather_off_nunits)
1867 modifier = NONE;
1868 else if (nunits == gather_off_nunits / 2)
1869 {
1870 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1871 modifier = WIDEN;
1872
1873 for (i = 0; i < gather_off_nunits; ++i)
1874 sel[i] = i | nunits;
1875
1876 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1877 gcc_assert (perm_mask != NULL_TREE);
1878 }
1879 else if (nunits == gather_off_nunits * 2)
1880 {
1881 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1882 modifier = NARROW;
1883
1884 for (i = 0; i < nunits; ++i)
1885 sel[i] = i < gather_off_nunits
1886 ? i : i + nunits - gather_off_nunits;
1887
1888 perm_mask = vect_gen_perm_mask (vectype, sel);
1889 gcc_assert (perm_mask != NULL_TREE);
1890 ncopies *= 2;
1891 }
1892 else
1893 gcc_unreachable ();
1894
1895 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1896 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1897 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1898 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1899 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1900 scaletype = TREE_VALUE (arglist);
1901 gcc_checking_assert (types_compatible_p (srctype, rettype)
1902 && types_compatible_p (srctype, masktype));
1903
1904 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1905
1906 ptr = fold_convert (ptrtype, gather_base);
1907 if (!is_gimple_min_invariant (ptr))
1908 {
1909 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1910 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1911 gcc_assert (!new_bb);
1912 }
1913
1914 scale = build_int_cst (scaletype, gather_scale);
1915
1916 prev_stmt_info = NULL;
1917 for (j = 0; j < ncopies; ++j)
1918 {
1919 if (modifier == WIDEN && (j & 1))
1920 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1921 perm_mask, stmt, gsi);
1922 else if (j == 0)
1923 op = vec_oprnd0
1924 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1925 else
1926 op = vec_oprnd0
1927 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1928
1929 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1930 {
1931 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1932 == TYPE_VECTOR_SUBPARTS (idxtype));
1933 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1934 var = make_ssa_name (var, NULL);
1935 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1936 new_stmt
1937 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1938 op, NULL_TREE);
1939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1940 op = var;
1941 }
1942
1943 if (j == 0)
1944 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1945 else
1946 {
1947 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
1948 &def, &dt);
1949 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1950 }
1951
1952 mask_op = vec_mask;
1953 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1954 {
1955 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1956 == TYPE_VECTOR_SUBPARTS (masktype));
1957 var = vect_get_new_vect_var (masktype, vect_simple_var, NULL);
1958 var = make_ssa_name (var, NULL);
1959 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1960 new_stmt
1961 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1962 mask_op, NULL_TREE);
1963 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1964 mask_op = var;
1965 }
1966
1967 new_stmt
1968 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1969 scale);
1970
1971 if (!useless_type_conversion_p (vectype, rettype))
1972 {
1973 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1974 == TYPE_VECTOR_SUBPARTS (rettype));
1975 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
1976 op = make_ssa_name (var, new_stmt);
1977 gimple_call_set_lhs (new_stmt, op);
1978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1979 var = make_ssa_name (vec_dest, NULL);
1980 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1981 new_stmt
1982 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
1983 NULL_TREE);
1984 }
1985 else
1986 {
1987 var = make_ssa_name (vec_dest, new_stmt);
1988 gimple_call_set_lhs (new_stmt, var);
1989 }
1990
1991 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1992
1993 if (modifier == NARROW)
1994 {
1995 if ((j & 1) == 0)
1996 {
1997 prev_res = var;
1998 continue;
1999 }
2000 var = permute_vec_elements (prev_res, var,
2001 perm_mask, stmt, gsi);
2002 new_stmt = SSA_NAME_DEF_STMT (var);
2003 }
2004
2005 if (prev_stmt_info == NULL)
2006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2007 else
2008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2009 prev_stmt_info = vinfo_for_stmt (new_stmt);
2010 }
2011 return true;
2012 }
2013 else if (is_store)
2014 {
2015 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2016 prev_stmt_info = NULL;
2017 for (i = 0; i < ncopies; i++)
2018 {
2019 unsigned align, misalign;
2020
2021 if (i == 0)
2022 {
2023 tree rhs = gimple_call_arg (stmt, 3);
2024 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2025 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2026 /* We should have catched mismatched types earlier. */
2027 gcc_assert (useless_type_conversion_p (vectype,
2028 TREE_TYPE (vec_rhs)));
2029 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2030 NULL_TREE, &dummy, gsi,
2031 &ptr_incr, false, &inv_p);
2032 gcc_assert (!inv_p);
2033 }
2034 else
2035 {
2036 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2037 &def, &dt);
2038 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2039 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2040 &def, &dt);
2041 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2042 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2043 TYPE_SIZE_UNIT (vectype));
2044 }
2045
2046 align = TYPE_ALIGN_UNIT (vectype);
2047 if (aligned_access_p (dr))
2048 misalign = 0;
2049 else if (DR_MISALIGNMENT (dr) == -1)
2050 {
2051 align = TYPE_ALIGN_UNIT (elem_type);
2052 misalign = 0;
2053 }
2054 else
2055 misalign = DR_MISALIGNMENT (dr);
2056 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2057 misalign);
2058 new_stmt
2059 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2060 gimple_call_arg (stmt, 1),
2061 vec_mask, vec_rhs);
2062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2063 if (i == 0)
2064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2065 else
2066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2068 }
2069 }
2070 else
2071 {
2072 tree vec_mask = NULL_TREE;
2073 prev_stmt_info = NULL;
2074 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2075 for (i = 0; i < ncopies; i++)
2076 {
2077 unsigned align, misalign;
2078
2079 if (i == 0)
2080 {
2081 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2082 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2083 NULL_TREE, &dummy, gsi,
2084 &ptr_incr, false, &inv_p);
2085 gcc_assert (!inv_p);
2086 }
2087 else
2088 {
2089 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2090 &def, &dt);
2091 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2092 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2093 TYPE_SIZE_UNIT (vectype));
2094 }
2095
2096 align = TYPE_ALIGN_UNIT (vectype);
2097 if (aligned_access_p (dr))
2098 misalign = 0;
2099 else if (DR_MISALIGNMENT (dr) == -1)
2100 {
2101 align = TYPE_ALIGN_UNIT (elem_type);
2102 misalign = 0;
2103 }
2104 else
2105 misalign = DR_MISALIGNMENT (dr);
2106 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2107 misalign);
2108 new_stmt
2109 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2110 gimple_call_arg (stmt, 1),
2111 vec_mask);
2112 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114 if (i == 0)
2115 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2116 else
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2118 prev_stmt_info = vinfo_for_stmt (new_stmt);
2119 }
2120 }
2121
2122 return true;
2123}
2124
2125
ebfd146a
IR
2126/* Function vectorizable_call.
2127
b8698a0f
L
2128 Check if STMT performs a function call that can be vectorized.
2129 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2130 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2131 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2132
2133static bool
190c2236
JJ
2134vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2135 slp_tree slp_node)
ebfd146a
IR
2136{
2137 tree vec_dest;
2138 tree scalar_dest;
2139 tree op, type;
2140 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2142 tree vectype_out, vectype_in;
2143 int nunits_in;
2144 int nunits_out;
2145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2147 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2148 gimple def_stmt;
0502fb85
UB
2149 enum vect_def_type dt[3]
2150 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2151 gimple new_stmt = NULL;
ebfd146a 2152 int ncopies, j;
6e1aa848 2153 vec<tree> vargs = vNULL;
ebfd146a
IR
2154 enum { NARROW, NONE, WIDEN } modifier;
2155 size_t i, nargs;
9d5e7640 2156 tree lhs;
ebfd146a 2157
190c2236 2158 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2159 return false;
2160
8644a673 2161 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2162 return false;
2163
ebfd146a
IR
2164 /* Is STMT a vectorizable call? */
2165 if (!is_gimple_call (stmt))
2166 return false;
2167
5ce9450f
JJ
2168 if (gimple_call_internal_p (stmt)
2169 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2170 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2171 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2172 slp_node);
2173
0136f8f0
AH
2174 if (gimple_call_lhs (stmt) == NULL_TREE
2175 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2176 return false;
2177
0136f8f0 2178 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2179
b690cc0f
RG
2180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2181
ebfd146a
IR
2182 /* Process function arguments. */
2183 rhs_type = NULL_TREE;
b690cc0f 2184 vectype_in = NULL_TREE;
ebfd146a
IR
2185 nargs = gimple_call_num_args (stmt);
2186
1b1562a5
MM
2187 /* Bail out if the function has more than three arguments, we do not have
2188 interesting builtin functions to vectorize with more than two arguments
2189 except for fma. No arguments is also not good. */
2190 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2191 return false;
2192
74bf76ed
JJ
2193 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2194 if (gimple_call_internal_p (stmt)
2195 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2196 {
2197 nargs = 0;
2198 rhs_type = unsigned_type_node;
2199 }
2200
ebfd146a
IR
2201 for (i = 0; i < nargs; i++)
2202 {
b690cc0f
RG
2203 tree opvectype;
2204
ebfd146a
IR
2205 op = gimple_call_arg (stmt, i);
2206
2207 /* We can only handle calls with arguments of the same type. */
2208 if (rhs_type
8533c9d8 2209 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2210 {
73fbfcad 2211 if (dump_enabled_p ())
78c60e3d 2212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2213 "argument types differ.\n");
ebfd146a
IR
2214 return false;
2215 }
b690cc0f
RG
2216 if (!rhs_type)
2217 rhs_type = TREE_TYPE (op);
ebfd146a 2218
24ee1384 2219 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2220 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2221 {
73fbfcad 2222 if (dump_enabled_p ())
78c60e3d 2223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2224 "use not simple.\n");
ebfd146a
IR
2225 return false;
2226 }
ebfd146a 2227
b690cc0f
RG
2228 if (!vectype_in)
2229 vectype_in = opvectype;
2230 else if (opvectype
2231 && opvectype != vectype_in)
2232 {
73fbfcad 2233 if (dump_enabled_p ())
78c60e3d 2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2235 "argument vector types differ.\n");
b690cc0f
RG
2236 return false;
2237 }
2238 }
2239 /* If all arguments are external or constant defs use a vector type with
2240 the same size as the output vector type. */
ebfd146a 2241 if (!vectype_in)
b690cc0f 2242 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2243 if (vec_stmt)
2244 gcc_assert (vectype_in);
2245 if (!vectype_in)
2246 {
73fbfcad 2247 if (dump_enabled_p ())
7d8930a0 2248 {
78c60e3d
SS
2249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2250 "no vectype for scalar type ");
2251 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2252 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2253 }
2254
2255 return false;
2256 }
ebfd146a
IR
2257
2258 /* FORNOW */
b690cc0f
RG
2259 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2260 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2261 if (nunits_in == nunits_out / 2)
2262 modifier = NARROW;
2263 else if (nunits_out == nunits_in)
2264 modifier = NONE;
2265 else if (nunits_out == nunits_in / 2)
2266 modifier = WIDEN;
2267 else
2268 return false;
2269
2270 /* For now, we only vectorize functions if a target specific builtin
2271 is available. TODO -- in some cases, it might be profitable to
2272 insert the calls for pieces of the vector, in order to be able
2273 to vectorize other operations in the loop. */
2274 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2275 if (fndecl == NULL_TREE)
2276 {
74bf76ed
JJ
2277 if (gimple_call_internal_p (stmt)
2278 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2279 && !slp_node
2280 && loop_vinfo
2281 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2282 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2283 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2284 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2285 {
2286 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2287 { 0, 1, 2, ... vf - 1 } vector. */
2288 gcc_assert (nargs == 0);
2289 }
2290 else
2291 {
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2294 "function is not vectorizable.\n");
74bf76ed
JJ
2295 return false;
2296 }
ebfd146a
IR
2297 }
2298
5006671f 2299 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2300
190c2236
JJ
2301 if (slp_node || PURE_SLP_STMT (stmt_info))
2302 ncopies = 1;
2303 else if (modifier == NARROW)
ebfd146a
IR
2304 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2305 else
2306 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2307
2308 /* Sanity check: make sure that at least one copy of the vectorized stmt
2309 needs to be generated. */
2310 gcc_assert (ncopies >= 1);
2311
2312 if (!vec_stmt) /* transformation not required. */
2313 {
2314 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2315 if (dump_enabled_p ())
e645e942
TJ
2316 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2317 "\n");
c3e7ee41 2318 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2319 return true;
2320 }
2321
2322 /** Transform. **/
2323
73fbfcad 2324 if (dump_enabled_p ())
e645e942 2325 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2326
2327 /* Handle def. */
2328 scalar_dest = gimple_call_lhs (stmt);
2329 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2330
2331 prev_stmt_info = NULL;
2332 switch (modifier)
2333 {
2334 case NONE:
2335 for (j = 0; j < ncopies; ++j)
2336 {
2337 /* Build argument list for the vectorized call. */
2338 if (j == 0)
9771b263 2339 vargs.create (nargs);
ebfd146a 2340 else
9771b263 2341 vargs.truncate (0);
ebfd146a 2342
190c2236
JJ
2343 if (slp_node)
2344 {
ef062b13 2345 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2346 vec<tree> vec_oprnds0;
190c2236
JJ
2347
2348 for (i = 0; i < nargs; i++)
9771b263 2349 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2350 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2351 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2352
2353 /* Arguments are ready. Create the new vector stmt. */
9771b263 2354 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2355 {
2356 size_t k;
2357 for (k = 0; k < nargs; k++)
2358 {
37b5ec8f 2359 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2360 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2361 }
2362 new_stmt = gimple_build_call_vec (fndecl, vargs);
2363 new_temp = make_ssa_name (vec_dest, new_stmt);
2364 gimple_call_set_lhs (new_stmt, new_temp);
2365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2366 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2367 }
2368
2369 for (i = 0; i < nargs; i++)
2370 {
37b5ec8f 2371 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2372 vec_oprndsi.release ();
190c2236 2373 }
190c2236
JJ
2374 continue;
2375 }
2376
ebfd146a
IR
2377 for (i = 0; i < nargs; i++)
2378 {
2379 op = gimple_call_arg (stmt, i);
2380 if (j == 0)
2381 vec_oprnd0
2382 = vect_get_vec_def_for_operand (op, stmt, NULL);
2383 else
63827fb8
IR
2384 {
2385 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2386 vec_oprnd0
2387 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2388 }
ebfd146a 2389
9771b263 2390 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2391 }
2392
74bf76ed
JJ
2393 if (gimple_call_internal_p (stmt)
2394 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2395 {
2396 tree *v = XALLOCAVEC (tree, nunits_out);
2397 int k;
2398 for (k = 0; k < nunits_out; ++k)
2399 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2400 tree cst = build_vector (vectype_out, v);
2401 tree new_var
2402 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2403 gimple init_stmt = gimple_build_assign (new_var, cst);
2404 new_temp = make_ssa_name (new_var, init_stmt);
2405 gimple_assign_set_lhs (init_stmt, new_temp);
2406 vect_init_vector_1 (stmt, init_stmt, NULL);
2407 new_temp = make_ssa_name (vec_dest, NULL);
2408 new_stmt = gimple_build_assign (new_temp,
2409 gimple_assign_lhs (init_stmt));
2410 }
2411 else
2412 {
2413 new_stmt = gimple_build_call_vec (fndecl, vargs);
2414 new_temp = make_ssa_name (vec_dest, new_stmt);
2415 gimple_call_set_lhs (new_stmt, new_temp);
2416 }
ebfd146a
IR
2417 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2418
2419 if (j == 0)
2420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2421 else
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2423
2424 prev_stmt_info = vinfo_for_stmt (new_stmt);
2425 }
2426
2427 break;
2428
2429 case NARROW:
2430 for (j = 0; j < ncopies; ++j)
2431 {
2432 /* Build argument list for the vectorized call. */
2433 if (j == 0)
9771b263 2434 vargs.create (nargs * 2);
ebfd146a 2435 else
9771b263 2436 vargs.truncate (0);
ebfd146a 2437
190c2236
JJ
2438 if (slp_node)
2439 {
ef062b13 2440 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2441 vec<tree> vec_oprnds0;
190c2236
JJ
2442
2443 for (i = 0; i < nargs; i++)
9771b263 2444 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2445 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2446 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2447
2448 /* Arguments are ready. Create the new vector stmt. */
9771b263 2449 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2450 {
2451 size_t k;
9771b263 2452 vargs.truncate (0);
190c2236
JJ
2453 for (k = 0; k < nargs; k++)
2454 {
37b5ec8f 2455 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2456 vargs.quick_push (vec_oprndsk[i]);
2457 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2458 }
2459 new_stmt = gimple_build_call_vec (fndecl, vargs);
2460 new_temp = make_ssa_name (vec_dest, new_stmt);
2461 gimple_call_set_lhs (new_stmt, new_temp);
2462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2463 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2464 }
2465
2466 for (i = 0; i < nargs; i++)
2467 {
37b5ec8f 2468 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2469 vec_oprndsi.release ();
190c2236 2470 }
190c2236
JJ
2471 continue;
2472 }
2473
ebfd146a
IR
2474 for (i = 0; i < nargs; i++)
2475 {
2476 op = gimple_call_arg (stmt, i);
2477 if (j == 0)
2478 {
2479 vec_oprnd0
2480 = vect_get_vec_def_for_operand (op, stmt, NULL);
2481 vec_oprnd1
63827fb8 2482 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2483 }
2484 else
2485 {
336ecb65 2486 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2487 vec_oprnd0
63827fb8 2488 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2489 vec_oprnd1
63827fb8 2490 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2491 }
2492
9771b263
DN
2493 vargs.quick_push (vec_oprnd0);
2494 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2495 }
2496
2497 new_stmt = gimple_build_call_vec (fndecl, vargs);
2498 new_temp = make_ssa_name (vec_dest, new_stmt);
2499 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2501
2502 if (j == 0)
2503 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2504 else
2505 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2506
2507 prev_stmt_info = vinfo_for_stmt (new_stmt);
2508 }
2509
2510 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2511
2512 break;
2513
2514 case WIDEN:
2515 /* No current target implements this case. */
2516 return false;
2517 }
2518
9771b263 2519 vargs.release ();
ebfd146a 2520
ebfd146a
IR
2521 /* The call in STMT might prevent it from being removed in dce.
2522 We however cannot remove it here, due to the way the ssa name
2523 it defines is mapped to the new definition. So just replace
2524 rhs of the statement with something harmless. */
2525
dd34c087
JJ
2526 if (slp_node)
2527 return true;
2528
ebfd146a 2529 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2530 if (is_pattern_stmt_p (stmt_info))
2531 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2532 else
2533 lhs = gimple_call_lhs (stmt);
2534 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2535 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2536 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2537 STMT_VINFO_STMT (stmt_info) = new_stmt;
2538 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2539
2540 return true;
2541}
2542
2543
0136f8f0
AH
2544struct simd_call_arg_info
2545{
2546 tree vectype;
2547 tree op;
2548 enum vect_def_type dt;
2549 HOST_WIDE_INT linear_step;
2550 unsigned int align;
2551};
2552
2553/* Function vectorizable_simd_clone_call.
2554
2555 Check if STMT performs a function call that can be vectorized
2556 by calling a simd clone of the function.
2557 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2558 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2559 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2560
2561static bool
2562vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2563 gimple *vec_stmt, slp_tree slp_node)
2564{
2565 tree vec_dest;
2566 tree scalar_dest;
2567 tree op, type;
2568 tree vec_oprnd0 = NULL_TREE;
2569 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2570 tree vectype;
2571 unsigned int nunits;
2572 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2573 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2574 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2575 tree fndecl, new_temp, def;
2576 gimple def_stmt;
2577 gimple new_stmt = NULL;
2578 int ncopies, j;
2579 vec<simd_call_arg_info> arginfo = vNULL;
2580 vec<tree> vargs = vNULL;
2581 size_t i, nargs;
2582 tree lhs, rtype, ratype;
2583 vec<constructor_elt, va_gc> *ret_ctor_elts;
2584
2585 /* Is STMT a vectorizable call? */
2586 if (!is_gimple_call (stmt))
2587 return false;
2588
2589 fndecl = gimple_call_fndecl (stmt);
2590 if (fndecl == NULL_TREE)
2591 return false;
2592
2593 struct cgraph_node *node = cgraph_get_node (fndecl);
2594 if (node == NULL || node->simd_clones == NULL)
2595 return false;
2596
2597 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2598 return false;
2599
2600 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2601 return false;
2602
2603 if (gimple_call_lhs (stmt)
2604 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2605 return false;
2606
2607 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2608
2609 vectype = STMT_VINFO_VECTYPE (stmt_info);
2610
2611 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2612 return false;
2613
2614 /* FORNOW */
2615 if (slp_node || PURE_SLP_STMT (stmt_info))
2616 return false;
2617
2618 /* Process function arguments. */
2619 nargs = gimple_call_num_args (stmt);
2620
2621 /* Bail out if the function has zero arguments. */
2622 if (nargs == 0)
2623 return false;
2624
2625 arginfo.create (nargs);
2626
2627 for (i = 0; i < nargs; i++)
2628 {
2629 simd_call_arg_info thisarginfo;
2630 affine_iv iv;
2631
2632 thisarginfo.linear_step = 0;
2633 thisarginfo.align = 0;
2634 thisarginfo.op = NULL_TREE;
2635
2636 op = gimple_call_arg (stmt, i);
2637 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2638 &def_stmt, &def, &thisarginfo.dt,
2639 &thisarginfo.vectype)
2640 || thisarginfo.dt == vect_uninitialized_def)
2641 {
2642 if (dump_enabled_p ())
2643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2644 "use not simple.\n");
2645 arginfo.release ();
2646 return false;
2647 }
2648
2649 if (thisarginfo.dt == vect_constant_def
2650 || thisarginfo.dt == vect_external_def)
2651 gcc_assert (thisarginfo.vectype == NULL_TREE);
2652 else
2653 gcc_assert (thisarginfo.vectype != NULL_TREE);
2654
2655 if (thisarginfo.dt != vect_constant_def
2656 && thisarginfo.dt != vect_external_def
2657 && loop_vinfo
2658 && TREE_CODE (op) == SSA_NAME
2659 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2660 && tree_fits_shwi_p (iv.step))
2661 {
2662 thisarginfo.linear_step = tree_to_shwi (iv.step);
2663 thisarginfo.op = iv.base;
2664 }
2665 else if ((thisarginfo.dt == vect_constant_def
2666 || thisarginfo.dt == vect_external_def)
2667 && POINTER_TYPE_P (TREE_TYPE (op)))
2668 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2669
2670 arginfo.quick_push (thisarginfo);
2671 }
2672
2673 unsigned int badness = 0;
2674 struct cgraph_node *bestn = NULL;
2675 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2676 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2677 else
2678 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2679 n = n->simdclone->next_clone)
2680 {
2681 unsigned int this_badness = 0;
2682 if (n->simdclone->simdlen
2683 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2684 || n->simdclone->nargs != nargs)
2685 continue;
2686 if (n->simdclone->simdlen
2687 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2688 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2689 - exact_log2 (n->simdclone->simdlen)) * 1024;
2690 if (n->simdclone->inbranch)
2691 this_badness += 2048;
2692 int target_badness = targetm.simd_clone.usable (n);
2693 if (target_badness < 0)
2694 continue;
2695 this_badness += target_badness * 512;
2696 /* FORNOW: Have to add code to add the mask argument. */
2697 if (n->simdclone->inbranch)
2698 continue;
2699 for (i = 0; i < nargs; i++)
2700 {
2701 switch (n->simdclone->args[i].arg_type)
2702 {
2703 case SIMD_CLONE_ARG_TYPE_VECTOR:
2704 if (!useless_type_conversion_p
2705 (n->simdclone->args[i].orig_type,
2706 TREE_TYPE (gimple_call_arg (stmt, i))))
2707 i = -1;
2708 else if (arginfo[i].dt == vect_constant_def
2709 || arginfo[i].dt == vect_external_def
2710 || arginfo[i].linear_step)
2711 this_badness += 64;
2712 break;
2713 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2714 if (arginfo[i].dt != vect_constant_def
2715 && arginfo[i].dt != vect_external_def)
2716 i = -1;
2717 break;
2718 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2719 if (arginfo[i].dt == vect_constant_def
2720 || arginfo[i].dt == vect_external_def
2721 || (arginfo[i].linear_step
2722 != n->simdclone->args[i].linear_step))
2723 i = -1;
2724 break;
2725 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2726 /* FORNOW */
2727 i = -1;
2728 break;
2729 case SIMD_CLONE_ARG_TYPE_MASK:
2730 gcc_unreachable ();
2731 }
2732 if (i == (size_t) -1)
2733 break;
2734 if (n->simdclone->args[i].alignment > arginfo[i].align)
2735 {
2736 i = -1;
2737 break;
2738 }
2739 if (arginfo[i].align)
2740 this_badness += (exact_log2 (arginfo[i].align)
2741 - exact_log2 (n->simdclone->args[i].alignment));
2742 }
2743 if (i == (size_t) -1)
2744 continue;
2745 if (bestn == NULL || this_badness < badness)
2746 {
2747 bestn = n;
2748 badness = this_badness;
2749 }
2750 }
2751
2752 if (bestn == NULL)
2753 {
2754 arginfo.release ();
2755 return false;
2756 }
2757
2758 for (i = 0; i < nargs; i++)
2759 if ((arginfo[i].dt == vect_constant_def
2760 || arginfo[i].dt == vect_external_def)
2761 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2762 {
2763 arginfo[i].vectype
2764 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2765 i)));
2766 if (arginfo[i].vectype == NULL
2767 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2768 > bestn->simdclone->simdlen))
2769 {
2770 arginfo.release ();
2771 return false;
2772 }
2773 }
2774
2775 fndecl = bestn->decl;
2776 nunits = bestn->simdclone->simdlen;
2777 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2778
2779 /* If the function isn't const, only allow it in simd loops where user
2780 has asserted that at least nunits consecutive iterations can be
2781 performed using SIMD instructions. */
2782 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2783 && gimple_vuse (stmt))
2784 {
2785 arginfo.release ();
2786 return false;
2787 }
2788
2789 /* Sanity check: make sure that at least one copy of the vectorized stmt
2790 needs to be generated. */
2791 gcc_assert (ncopies >= 1);
2792
2793 if (!vec_stmt) /* transformation not required. */
2794 {
2795 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2796 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2797 if (dump_enabled_p ())
2798 dump_printf_loc (MSG_NOTE, vect_location,
2799 "=== vectorizable_simd_clone_call ===\n");
2800/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2801 arginfo.release ();
2802 return true;
2803 }
2804
2805 /** Transform. **/
2806
2807 if (dump_enabled_p ())
2808 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2809
2810 /* Handle def. */
2811 scalar_dest = gimple_call_lhs (stmt);
2812 vec_dest = NULL_TREE;
2813 rtype = NULL_TREE;
2814 ratype = NULL_TREE;
2815 if (scalar_dest)
2816 {
2817 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2818 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2819 if (TREE_CODE (rtype) == ARRAY_TYPE)
2820 {
2821 ratype = rtype;
2822 rtype = TREE_TYPE (ratype);
2823 }
2824 }
2825
2826 prev_stmt_info = NULL;
2827 for (j = 0; j < ncopies; ++j)
2828 {
2829 /* Build argument list for the vectorized call. */
2830 if (j == 0)
2831 vargs.create (nargs);
2832 else
2833 vargs.truncate (0);
2834
2835 for (i = 0; i < nargs; i++)
2836 {
2837 unsigned int k, l, m, o;
2838 tree atype;
2839 op = gimple_call_arg (stmt, i);
2840 switch (bestn->simdclone->args[i].arg_type)
2841 {
2842 case SIMD_CLONE_ARG_TYPE_VECTOR:
2843 atype = bestn->simdclone->args[i].vector_type;
2844 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2845 for (m = j * o; m < (j + 1) * o; m++)
2846 {
2847 if (TYPE_VECTOR_SUBPARTS (atype)
2848 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2849 {
2850 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2851 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2852 / TYPE_VECTOR_SUBPARTS (atype));
2853 gcc_assert ((k & (k - 1)) == 0);
2854 if (m == 0)
2855 vec_oprnd0
2856 = vect_get_vec_def_for_operand (op, stmt, NULL);
2857 else
2858 {
2859 vec_oprnd0 = arginfo[i].op;
2860 if ((m & (k - 1)) == 0)
2861 vec_oprnd0
2862 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2863 vec_oprnd0);
2864 }
2865 arginfo[i].op = vec_oprnd0;
2866 vec_oprnd0
2867 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2868 size_int (prec),
2869 bitsize_int ((m & (k - 1)) * prec));
2870 new_stmt
2871 = gimple_build_assign (make_ssa_name (atype, NULL),
2872 vec_oprnd0);
2873 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2874 vargs.safe_push (gimple_assign_lhs (new_stmt));
2875 }
2876 else
2877 {
2878 k = (TYPE_VECTOR_SUBPARTS (atype)
2879 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2880 gcc_assert ((k & (k - 1)) == 0);
2881 vec<constructor_elt, va_gc> *ctor_elts;
2882 if (k != 1)
2883 vec_alloc (ctor_elts, k);
2884 else
2885 ctor_elts = NULL;
2886 for (l = 0; l < k; l++)
2887 {
2888 if (m == 0 && l == 0)
2889 vec_oprnd0
2890 = vect_get_vec_def_for_operand (op, stmt, NULL);
2891 else
2892 vec_oprnd0
2893 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2894 arginfo[i].op);
2895 arginfo[i].op = vec_oprnd0;
2896 if (k == 1)
2897 break;
2898 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2899 vec_oprnd0);
2900 }
2901 if (k == 1)
2902 vargs.safe_push (vec_oprnd0);
2903 else
2904 {
2905 vec_oprnd0 = build_constructor (atype, ctor_elts);
2906 new_stmt
2907 = gimple_build_assign (make_ssa_name (atype, NULL),
2908 vec_oprnd0);
2909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2910 vargs.safe_push (gimple_assign_lhs (new_stmt));
2911 }
2912 }
2913 }
2914 break;
2915 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2916 vargs.safe_push (op);
2917 break;
2918 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2919 if (j == 0)
2920 {
2921 gimple_seq stmts;
2922 arginfo[i].op
2923 = force_gimple_operand (arginfo[i].op, &stmts, true,
2924 NULL_TREE);
2925 if (stmts != NULL)
2926 {
2927 basic_block new_bb;
2928 edge pe = loop_preheader_edge (loop);
2929 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2930 gcc_assert (!new_bb);
2931 }
2932 tree phi_res = copy_ssa_name (op, NULL);
2933 gimple new_phi = create_phi_node (phi_res, loop->header);
2934 set_vinfo_for_stmt (new_phi,
2935 new_stmt_vec_info (new_phi, loop_vinfo,
2936 NULL));
2937 add_phi_arg (new_phi, arginfo[i].op,
2938 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2939 enum tree_code code
2940 = POINTER_TYPE_P (TREE_TYPE (op))
2941 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2942 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2943 ? sizetype : TREE_TYPE (op);
2944 double_int cst
2945 = double_int::from_shwi
2946 (bestn->simdclone->args[i].linear_step);
2947 cst *= double_int::from_uhwi (ncopies * nunits);
2948 tree tcst = double_int_to_tree (type, cst);
2949 tree phi_arg = copy_ssa_name (op, NULL);
2950 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
2951 phi_res, tcst);
2952 gimple_stmt_iterator si = gsi_after_labels (loop->header);
2953 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
2954 set_vinfo_for_stmt (new_stmt,
2955 new_stmt_vec_info (new_stmt, loop_vinfo,
2956 NULL));
2957 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
2958 UNKNOWN_LOCATION);
2959 arginfo[i].op = phi_res;
2960 vargs.safe_push (phi_res);
2961 }
2962 else
2963 {
2964 enum tree_code code
2965 = POINTER_TYPE_P (TREE_TYPE (op))
2966 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2967 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2968 ? sizetype : TREE_TYPE (op);
2969 double_int cst
2970 = double_int::from_shwi
2971 (bestn->simdclone->args[i].linear_step);
2972 cst *= double_int::from_uhwi (j * nunits);
2973 tree tcst = double_int_to_tree (type, cst);
2974 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
2975 new_stmt
2976 = gimple_build_assign_with_ops (code, new_temp,
2977 arginfo[i].op, tcst);
2978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2979 vargs.safe_push (new_temp);
2980 }
2981 break;
2982 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2983 default:
2984 gcc_unreachable ();
2985 }
2986 }
2987
2988 new_stmt = gimple_build_call_vec (fndecl, vargs);
2989 if (vec_dest)
2990 {
2991 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
2992 if (ratype)
2993 new_temp = create_tmp_var (ratype, NULL);
2994 else if (TYPE_VECTOR_SUBPARTS (vectype)
2995 == TYPE_VECTOR_SUBPARTS (rtype))
2996 new_temp = make_ssa_name (vec_dest, new_stmt);
2997 else
2998 new_temp = make_ssa_name (rtype, new_stmt);
2999 gimple_call_set_lhs (new_stmt, new_temp);
3000 }
3001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3002
3003 if (vec_dest)
3004 {
3005 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3006 {
3007 unsigned int k, l;
3008 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3009 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3010 gcc_assert ((k & (k - 1)) == 0);
3011 for (l = 0; l < k; l++)
3012 {
3013 tree t;
3014 if (ratype)
3015 {
3016 t = build_fold_addr_expr (new_temp);
3017 t = build2 (MEM_REF, vectype, t,
3018 build_int_cst (TREE_TYPE (t),
3019 l * prec / BITS_PER_UNIT));
3020 }
3021 else
3022 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3023 size_int (prec), bitsize_int (l * prec));
3024 new_stmt
3025 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3027 if (j == 0 && l == 0)
3028 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3029 else
3030 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3031
3032 prev_stmt_info = vinfo_for_stmt (new_stmt);
3033 }
3034
3035 if (ratype)
3036 {
3037 tree clobber = build_constructor (ratype, NULL);
3038 TREE_THIS_VOLATILE (clobber) = 1;
3039 new_stmt = gimple_build_assign (new_temp, clobber);
3040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3041 }
3042 continue;
3043 }
3044 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3045 {
3046 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3047 / TYPE_VECTOR_SUBPARTS (rtype));
3048 gcc_assert ((k & (k - 1)) == 0);
3049 if ((j & (k - 1)) == 0)
3050 vec_alloc (ret_ctor_elts, k);
3051 if (ratype)
3052 {
3053 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3054 for (m = 0; m < o; m++)
3055 {
3056 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3057 size_int (m), NULL_TREE, NULL_TREE);
3058 new_stmt
3059 = gimple_build_assign (make_ssa_name (rtype, NULL),
3060 tem);
3061 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3062 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3063 gimple_assign_lhs (new_stmt));
3064 }
3065 tree clobber = build_constructor (ratype, NULL);
3066 TREE_THIS_VOLATILE (clobber) = 1;
3067 new_stmt = gimple_build_assign (new_temp, clobber);
3068 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3069 }
3070 else
3071 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3072 if ((j & (k - 1)) != k - 1)
3073 continue;
3074 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3075 new_stmt
3076 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3077 vec_oprnd0);
3078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3079
3080 if ((unsigned) j == k - 1)
3081 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3082 else
3083 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3084
3085 prev_stmt_info = vinfo_for_stmt (new_stmt);
3086 continue;
3087 }
3088 else if (ratype)
3089 {
3090 tree t = build_fold_addr_expr (new_temp);
3091 t = build2 (MEM_REF, vectype, t,
3092 build_int_cst (TREE_TYPE (t), 0));
3093 new_stmt
3094 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3095 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3096 tree clobber = build_constructor (ratype, NULL);
3097 TREE_THIS_VOLATILE (clobber) = 1;
3098 vect_finish_stmt_generation (stmt,
3099 gimple_build_assign (new_temp,
3100 clobber), gsi);
3101 }
3102 }
3103
3104 if (j == 0)
3105 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3106 else
3107 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3108
3109 prev_stmt_info = vinfo_for_stmt (new_stmt);
3110 }
3111
3112 vargs.release ();
3113
3114 /* The call in STMT might prevent it from being removed in dce.
3115 We however cannot remove it here, due to the way the ssa name
3116 it defines is mapped to the new definition. So just replace
3117 rhs of the statement with something harmless. */
3118
3119 if (slp_node)
3120 return true;
3121
3122 if (scalar_dest)
3123 {
3124 type = TREE_TYPE (scalar_dest);
3125 if (is_pattern_stmt_p (stmt_info))
3126 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3127 else
3128 lhs = gimple_call_lhs (stmt);
3129 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3130 }
3131 else
3132 new_stmt = gimple_build_nop ();
3133 set_vinfo_for_stmt (new_stmt, stmt_info);
3134 set_vinfo_for_stmt (stmt, NULL);
3135 STMT_VINFO_STMT (stmt_info) = new_stmt;
3136 gsi_replace (gsi, new_stmt, false);
3137 unlink_stmt_vdef (stmt);
3138
3139 return true;
3140}
3141
3142
ebfd146a
IR
3143/* Function vect_gen_widened_results_half
3144
3145 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3146 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3147 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3148 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3149 needs to be created (DECL is a function-decl of a target-builtin).
3150 STMT is the original scalar stmt that we are vectorizing. */
3151
3152static gimple
3153vect_gen_widened_results_half (enum tree_code code,
3154 tree decl,
3155 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3156 tree vec_dest, gimple_stmt_iterator *gsi,
3157 gimple stmt)
b8698a0f 3158{
ebfd146a 3159 gimple new_stmt;
b8698a0f
L
3160 tree new_temp;
3161
3162 /* Generate half of the widened result: */
3163 if (code == CALL_EXPR)
3164 {
3165 /* Target specific support */
ebfd146a
IR
3166 if (op_type == binary_op)
3167 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3168 else
3169 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3170 new_temp = make_ssa_name (vec_dest, new_stmt);
3171 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3172 }
3173 else
ebfd146a 3174 {
b8698a0f
L
3175 /* Generic support */
3176 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3177 if (op_type != binary_op)
3178 vec_oprnd1 = NULL;
3179 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3180 vec_oprnd1);
3181 new_temp = make_ssa_name (vec_dest, new_stmt);
3182 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3183 }
ebfd146a
IR
3184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3185
ebfd146a
IR
3186 return new_stmt;
3187}
3188
4a00c761
JJ
3189
3190/* Get vectorized definitions for loop-based vectorization. For the first
3191 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3192 scalar operand), and for the rest we get a copy with
3193 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3194 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3195 The vectors are collected into VEC_OPRNDS. */
3196
3197static void
3198vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3199 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3200{
3201 tree vec_oprnd;
3202
3203 /* Get first vector operand. */
3204 /* All the vector operands except the very first one (that is scalar oprnd)
3205 are stmt copies. */
3206 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3207 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3208 else
3209 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3210
9771b263 3211 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3212
3213 /* Get second vector operand. */
3214 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3215 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3216
3217 *oprnd = vec_oprnd;
3218
3219 /* For conversion in multiple steps, continue to get operands
3220 recursively. */
3221 if (multi_step_cvt)
3222 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3223}
3224
3225
3226/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3227 For multi-step conversions store the resulting vectors and call the function
3228 recursively. */
3229
3230static void
9771b263 3231vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3232 int multi_step_cvt, gimple stmt,
9771b263 3233 vec<tree> vec_dsts,
4a00c761
JJ
3234 gimple_stmt_iterator *gsi,
3235 slp_tree slp_node, enum tree_code code,
3236 stmt_vec_info *prev_stmt_info)
3237{
3238 unsigned int i;
3239 tree vop0, vop1, new_tmp, vec_dest;
3240 gimple new_stmt;
3241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3242
9771b263 3243 vec_dest = vec_dsts.pop ();
4a00c761 3244
9771b263 3245 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3246 {
3247 /* Create demotion operation. */
9771b263
DN
3248 vop0 = (*vec_oprnds)[i];
3249 vop1 = (*vec_oprnds)[i + 1];
4a00c761
JJ
3250 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3251 new_tmp = make_ssa_name (vec_dest, new_stmt);
3252 gimple_assign_set_lhs (new_stmt, new_tmp);
3253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3254
3255 if (multi_step_cvt)
3256 /* Store the resulting vector for next recursive call. */
9771b263 3257 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3258 else
3259 {
3260 /* This is the last step of the conversion sequence. Store the
3261 vectors in SLP_NODE or in vector info of the scalar statement
3262 (or in STMT_VINFO_RELATED_STMT chain). */
3263 if (slp_node)
9771b263 3264 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3265 else
3266 {
3267 if (!*prev_stmt_info)
3268 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3269 else
3270 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3271
3272 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3273 }
3274 }
3275 }
3276
3277 /* For multi-step demotion operations we first generate demotion operations
3278 from the source type to the intermediate types, and then combine the
3279 results (stored in VEC_OPRNDS) in demotion operation to the destination
3280 type. */
3281 if (multi_step_cvt)
3282 {
3283 /* At each level of recursion we have half of the operands we had at the
3284 previous level. */
9771b263 3285 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3286 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3287 stmt, vec_dsts, gsi, slp_node,
3288 VEC_PACK_TRUNC_EXPR,
3289 prev_stmt_info);
3290 }
3291
9771b263 3292 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3293}
3294
3295
3296/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3297 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3298 the resulting vectors and call the function recursively. */
3299
3300static void
9771b263
DN
3301vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3302 vec<tree> *vec_oprnds1,
4a00c761
JJ
3303 gimple stmt, tree vec_dest,
3304 gimple_stmt_iterator *gsi,
3305 enum tree_code code1,
3306 enum tree_code code2, tree decl1,
3307 tree decl2, int op_type)
3308{
3309 int i;
3310 tree vop0, vop1, new_tmp1, new_tmp2;
3311 gimple new_stmt1, new_stmt2;
6e1aa848 3312 vec<tree> vec_tmp = vNULL;
4a00c761 3313
9771b263
DN
3314 vec_tmp.create (vec_oprnds0->length () * 2);
3315 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3316 {
3317 if (op_type == binary_op)
9771b263 3318 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3319 else
3320 vop1 = NULL_TREE;
3321
3322 /* Generate the two halves of promotion operation. */
3323 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3324 op_type, vec_dest, gsi, stmt);
3325 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3326 op_type, vec_dest, gsi, stmt);
3327 if (is_gimple_call (new_stmt1))
3328 {
3329 new_tmp1 = gimple_call_lhs (new_stmt1);
3330 new_tmp2 = gimple_call_lhs (new_stmt2);
3331 }
3332 else
3333 {
3334 new_tmp1 = gimple_assign_lhs (new_stmt1);
3335 new_tmp2 = gimple_assign_lhs (new_stmt2);
3336 }
3337
3338 /* Store the results for the next step. */
9771b263
DN
3339 vec_tmp.quick_push (new_tmp1);
3340 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3341 }
3342
689eaba3 3343 vec_oprnds0->release ();
4a00c761
JJ
3344 *vec_oprnds0 = vec_tmp;
3345}
3346
3347
b8698a0f
L
3348/* Check if STMT performs a conversion operation, that can be vectorized.
3349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3350 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3352
3353static bool
3354vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3355 gimple *vec_stmt, slp_tree slp_node)
3356{
3357 tree vec_dest;
3358 tree scalar_dest;
4a00c761 3359 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3360 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3362 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3363 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3364 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3365 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3366 tree new_temp;
3367 tree def;
3368 gimple def_stmt;
3369 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3370 gimple new_stmt = NULL;
3371 stmt_vec_info prev_stmt_info;
3372 int nunits_in;
3373 int nunits_out;
3374 tree vectype_out, vectype_in;
4a00c761
JJ
3375 int ncopies, i, j;
3376 tree lhs_type, rhs_type;
ebfd146a 3377 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3378 vec<tree> vec_oprnds0 = vNULL;
3379 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3380 tree vop0;
4a00c761
JJ
3381 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3382 int multi_step_cvt = 0;
6e1aa848
DN
3383 vec<tree> vec_dsts = vNULL;
3384 vec<tree> interm_types = vNULL;
4a00c761
JJ
3385 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3386 int op_type;
3387 enum machine_mode rhs_mode;
3388 unsigned short fltsz;
ebfd146a
IR
3389
3390 /* Is STMT a vectorizable conversion? */
3391
4a00c761 3392 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3393 return false;
3394
8644a673 3395 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3396 return false;
3397
3398 if (!is_gimple_assign (stmt))
3399 return false;
3400
3401 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3402 return false;
3403
3404 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3405 if (!CONVERT_EXPR_CODE_P (code)
3406 && code != FIX_TRUNC_EXPR
3407 && code != FLOAT_EXPR
3408 && code != WIDEN_MULT_EXPR
3409 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3410 return false;
3411
4a00c761
JJ
3412 op_type = TREE_CODE_LENGTH (code);
3413
ebfd146a 3414 /* Check types of lhs and rhs. */
b690cc0f 3415 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3416 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3417 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3418
ebfd146a
IR
3419 op0 = gimple_assign_rhs1 (stmt);
3420 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3421
3422 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3423 && !((INTEGRAL_TYPE_P (lhs_type)
3424 && INTEGRAL_TYPE_P (rhs_type))
3425 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3426 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3427 return false;
3428
3429 if ((INTEGRAL_TYPE_P (lhs_type)
3430 && (TYPE_PRECISION (lhs_type)
3431 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3432 || (INTEGRAL_TYPE_P (rhs_type)
3433 && (TYPE_PRECISION (rhs_type)
3434 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3435 {
73fbfcad 3436 if (dump_enabled_p ())
78c60e3d 3437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3438 "type conversion to/from bit-precision unsupported."
3439 "\n");
4a00c761
JJ
3440 return false;
3441 }
3442
b690cc0f 3443 /* Check the operands of the operation. */
24ee1384 3444 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3445 &def_stmt, &def, &dt[0], &vectype_in))
3446 {
73fbfcad 3447 if (dump_enabled_p ())
78c60e3d 3448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3449 "use not simple.\n");
b690cc0f
RG
3450 return false;
3451 }
4a00c761
JJ
3452 if (op_type == binary_op)
3453 {
3454 bool ok;
3455
3456 op1 = gimple_assign_rhs2 (stmt);
3457 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3458 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3459 OP1. */
3460 if (CONSTANT_CLASS_P (op0))
f5709183 3461 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3462 &def_stmt, &def, &dt[1], &vectype_in);
3463 else
f5709183 3464 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3465 &def, &dt[1]);
4a00c761
JJ
3466
3467 if (!ok)
3468 {
73fbfcad 3469 if (dump_enabled_p ())
78c60e3d 3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3471 "use not simple.\n");
4a00c761
JJ
3472 return false;
3473 }
3474 }
3475
b690cc0f
RG
3476 /* If op0 is an external or constant defs use a vector type of
3477 the same size as the output vector type. */
ebfd146a 3478 if (!vectype_in)
b690cc0f 3479 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3480 if (vec_stmt)
3481 gcc_assert (vectype_in);
3482 if (!vectype_in)
3483 {
73fbfcad 3484 if (dump_enabled_p ())
4a00c761 3485 {
78c60e3d
SS
3486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3487 "no vectype for scalar type ");
3488 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3489 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3490 }
7d8930a0
IR
3491
3492 return false;
3493 }
ebfd146a 3494
b690cc0f
RG
3495 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3496 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3497 if (nunits_in < nunits_out)
ebfd146a
IR
3498 modifier = NARROW;
3499 else if (nunits_out == nunits_in)
3500 modifier = NONE;
ebfd146a 3501 else
4a00c761 3502 modifier = WIDEN;
ebfd146a 3503
ff802fa1
IR
3504 /* Multiple types in SLP are handled by creating the appropriate number of
3505 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3506 case of SLP. */
437f4a00 3507 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3508 ncopies = 1;
4a00c761
JJ
3509 else if (modifier == NARROW)
3510 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3511 else
3512 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3513
ebfd146a
IR
3514 /* Sanity check: make sure that at least one copy of the vectorized stmt
3515 needs to be generated. */
3516 gcc_assert (ncopies >= 1);
3517
ebfd146a 3518 /* Supportable by target? */
4a00c761 3519 switch (modifier)
ebfd146a 3520 {
4a00c761
JJ
3521 case NONE:
3522 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3523 return false;
3524 if (supportable_convert_operation (code, vectype_out, vectype_in,
3525 &decl1, &code1))
3526 break;
3527 /* FALLTHRU */
3528 unsupported:
73fbfcad 3529 if (dump_enabled_p ())
78c60e3d 3530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3531 "conversion not supported by target.\n");
ebfd146a 3532 return false;
ebfd146a 3533
4a00c761
JJ
3534 case WIDEN:
3535 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3536 &code1, &code2, &multi_step_cvt,
3537 &interm_types))
4a00c761
JJ
3538 {
3539 /* Binary widening operation can only be supported directly by the
3540 architecture. */
3541 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3542 break;
3543 }
3544
3545 if (code != FLOAT_EXPR
3546 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3547 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3548 goto unsupported;
3549
3550 rhs_mode = TYPE_MODE (rhs_type);
3551 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3552 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3553 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3554 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3555 {
3556 cvt_type
3557 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3558 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3559 if (cvt_type == NULL_TREE)
3560 goto unsupported;
3561
3562 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3563 {
3564 if (!supportable_convert_operation (code, vectype_out,
3565 cvt_type, &decl1, &codecvt1))
3566 goto unsupported;
3567 }
3568 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3569 cvt_type, &codecvt1,
3570 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3571 &interm_types))
3572 continue;
3573 else
3574 gcc_assert (multi_step_cvt == 0);
3575
3576 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3577 vectype_in, &code1, &code2,
3578 &multi_step_cvt, &interm_types))
4a00c761
JJ
3579 break;
3580 }
3581
3582 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3583 goto unsupported;
3584
3585 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3586 codecvt2 = ERROR_MARK;
3587 else
3588 {
3589 multi_step_cvt++;
9771b263 3590 interm_types.safe_push (cvt_type);
4a00c761
JJ
3591 cvt_type = NULL_TREE;
3592 }
3593 break;
3594
3595 case NARROW:
3596 gcc_assert (op_type == unary_op);
3597 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3598 &code1, &multi_step_cvt,
3599 &interm_types))
3600 break;
3601
3602 if (code != FIX_TRUNC_EXPR
3603 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3604 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3605 goto unsupported;
3606
3607 rhs_mode = TYPE_MODE (rhs_type);
3608 cvt_type
3609 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3610 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3611 if (cvt_type == NULL_TREE)
3612 goto unsupported;
3613 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3614 &decl1, &codecvt1))
3615 goto unsupported;
3616 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3617 &code1, &multi_step_cvt,
3618 &interm_types))
3619 break;
3620 goto unsupported;
3621
3622 default:
3623 gcc_unreachable ();
ebfd146a
IR
3624 }
3625
3626 if (!vec_stmt) /* transformation not required. */
3627 {
73fbfcad 3628 if (dump_enabled_p ())
78c60e3d 3629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3630 "=== vectorizable_conversion ===\n");
4a00c761 3631 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3632 {
3633 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3634 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3635 }
4a00c761
JJ
3636 else if (modifier == NARROW)
3637 {
3638 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3639 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3640 }
3641 else
3642 {
3643 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3644 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3645 }
9771b263 3646 interm_types.release ();
ebfd146a
IR
3647 return true;
3648 }
3649
3650 /** Transform. **/
73fbfcad 3651 if (dump_enabled_p ())
78c60e3d 3652 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3653 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3654
4a00c761
JJ
3655 if (op_type == binary_op)
3656 {
3657 if (CONSTANT_CLASS_P (op0))
3658 op0 = fold_convert (TREE_TYPE (op1), op0);
3659 else if (CONSTANT_CLASS_P (op1))
3660 op1 = fold_convert (TREE_TYPE (op0), op1);
3661 }
3662
3663 /* In case of multi-step conversion, we first generate conversion operations
3664 to the intermediate types, and then from that types to the final one.
3665 We create vector destinations for the intermediate type (TYPES) received
3666 from supportable_*_operation, and store them in the correct order
3667 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3668 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3669 vec_dest = vect_create_destination_var (scalar_dest,
3670 (cvt_type && modifier == WIDEN)
3671 ? cvt_type : vectype_out);
9771b263 3672 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3673
3674 if (multi_step_cvt)
3675 {
9771b263
DN
3676 for (i = interm_types.length () - 1;
3677 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3678 {
3679 vec_dest = vect_create_destination_var (scalar_dest,
3680 intermediate_type);
9771b263 3681 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3682 }
3683 }
ebfd146a 3684
4a00c761 3685 if (cvt_type)
82294ec1
JJ
3686 vec_dest = vect_create_destination_var (scalar_dest,
3687 modifier == WIDEN
3688 ? vectype_out : cvt_type);
4a00c761
JJ
3689
3690 if (!slp_node)
3691 {
30862efc 3692 if (modifier == WIDEN)
4a00c761 3693 {
c3284718 3694 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3695 if (op_type == binary_op)
9771b263 3696 vec_oprnds1.create (1);
4a00c761 3697 }
30862efc 3698 else if (modifier == NARROW)
9771b263
DN
3699 vec_oprnds0.create (
3700 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3701 }
3702 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3703 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3704
4a00c761 3705 last_oprnd = op0;
ebfd146a
IR
3706 prev_stmt_info = NULL;
3707 switch (modifier)
3708 {
3709 case NONE:
3710 for (j = 0; j < ncopies; j++)
3711 {
ebfd146a 3712 if (j == 0)
d092494c
IR
3713 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3714 -1);
ebfd146a
IR
3715 else
3716 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3717
9771b263 3718 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3719 {
3720 /* Arguments are ready, create the new vector stmt. */
3721 if (code1 == CALL_EXPR)
3722 {
3723 new_stmt = gimple_build_call (decl1, 1, vop0);
3724 new_temp = make_ssa_name (vec_dest, new_stmt);
3725 gimple_call_set_lhs (new_stmt, new_temp);
3726 }
3727 else
3728 {
3729 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3730 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3731 vop0, NULL);
3732 new_temp = make_ssa_name (vec_dest, new_stmt);
3733 gimple_assign_set_lhs (new_stmt, new_temp);
3734 }
3735
3736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3737 if (slp_node)
9771b263 3738 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3739 }
3740
ebfd146a
IR
3741 if (j == 0)
3742 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3743 else
3744 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3745 prev_stmt_info = vinfo_for_stmt (new_stmt);
3746 }
3747 break;
3748
3749 case WIDEN:
3750 /* In case the vectorization factor (VF) is bigger than the number
3751 of elements that we can fit in a vectype (nunits), we have to
3752 generate more than one vector stmt - i.e - we need to "unroll"
3753 the vector stmt by a factor VF/nunits. */
3754 for (j = 0; j < ncopies; j++)
3755 {
4a00c761 3756 /* Handle uses. */
ebfd146a 3757 if (j == 0)
4a00c761
JJ
3758 {
3759 if (slp_node)
3760 {
3761 if (code == WIDEN_LSHIFT_EXPR)
3762 {
3763 unsigned int k;
ebfd146a 3764
4a00c761
JJ
3765 vec_oprnd1 = op1;
3766 /* Store vec_oprnd1 for every vector stmt to be created
3767 for SLP_NODE. We check during the analysis that all
3768 the shift arguments are the same. */
3769 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3770 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3771
3772 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3773 slp_node, -1);
3774 }
3775 else
3776 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3777 &vec_oprnds1, slp_node, -1);
3778 }
3779 else
3780 {
3781 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3782 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3783 if (op_type == binary_op)
3784 {
3785 if (code == WIDEN_LSHIFT_EXPR)
3786 vec_oprnd1 = op1;
3787 else
3788 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3789 NULL);
9771b263 3790 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3791 }
3792 }
3793 }
ebfd146a 3794 else
4a00c761
JJ
3795 {
3796 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3797 vec_oprnds0.truncate (0);
3798 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3799 if (op_type == binary_op)
3800 {
3801 if (code == WIDEN_LSHIFT_EXPR)
3802 vec_oprnd1 = op1;
3803 else
3804 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3805 vec_oprnd1);
9771b263
DN
3806 vec_oprnds1.truncate (0);
3807 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3808 }
3809 }
ebfd146a 3810
4a00c761
JJ
3811 /* Arguments are ready. Create the new vector stmts. */
3812 for (i = multi_step_cvt; i >= 0; i--)
3813 {
9771b263 3814 tree this_dest = vec_dsts[i];
4a00c761
JJ
3815 enum tree_code c1 = code1, c2 = code2;
3816 if (i == 0 && codecvt2 != ERROR_MARK)
3817 {
3818 c1 = codecvt1;
3819 c2 = codecvt2;
3820 }
3821 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3822 &vec_oprnds1,
3823 stmt, this_dest, gsi,
3824 c1, c2, decl1, decl2,
3825 op_type);
3826 }
3827
9771b263 3828 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3829 {
3830 if (cvt_type)
3831 {
3832 if (codecvt1 == CALL_EXPR)
3833 {
3834 new_stmt = gimple_build_call (decl1, 1, vop0);
3835 new_temp = make_ssa_name (vec_dest, new_stmt);
3836 gimple_call_set_lhs (new_stmt, new_temp);
3837 }
3838 else
3839 {
3840 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3841 new_temp = make_ssa_name (vec_dest, NULL);
3842 new_stmt = gimple_build_assign_with_ops (codecvt1,
3843 new_temp,
3844 vop0, NULL);
3845 }
3846
3847 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3848 }
3849 else
3850 new_stmt = SSA_NAME_DEF_STMT (vop0);
3851
3852 if (slp_node)
9771b263 3853 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3854 else
3855 {
3856 if (!prev_stmt_info)
3857 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3858 else
3859 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3860 prev_stmt_info = vinfo_for_stmt (new_stmt);
3861 }
3862 }
ebfd146a 3863 }
4a00c761
JJ
3864
3865 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3866 break;
3867
3868 case NARROW:
3869 /* In case the vectorization factor (VF) is bigger than the number
3870 of elements that we can fit in a vectype (nunits), we have to
3871 generate more than one vector stmt - i.e - we need to "unroll"
3872 the vector stmt by a factor VF/nunits. */
3873 for (j = 0; j < ncopies; j++)
3874 {
3875 /* Handle uses. */
4a00c761
JJ
3876 if (slp_node)
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
ebfd146a
IR
3879 else
3880 {
9771b263 3881 vec_oprnds0.truncate (0);
4a00c761
JJ
3882 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3883 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3884 }
3885
4a00c761
JJ
3886 /* Arguments are ready. Create the new vector stmts. */
3887 if (cvt_type)
9771b263 3888 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3889 {
3890 if (codecvt1 == CALL_EXPR)
3891 {
3892 new_stmt = gimple_build_call (decl1, 1, vop0);
3893 new_temp = make_ssa_name (vec_dest, new_stmt);
3894 gimple_call_set_lhs (new_stmt, new_temp);
3895 }
3896 else
3897 {
3898 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3899 new_temp = make_ssa_name (vec_dest, NULL);
3900 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3901 vop0, NULL);
3902 }
ebfd146a 3903
4a00c761 3904 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3905 vec_oprnds0[i] = new_temp;
4a00c761 3906 }
ebfd146a 3907
4a00c761
JJ
3908 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3909 stmt, vec_dsts, gsi,
3910 slp_node, code1,
3911 &prev_stmt_info);
ebfd146a
IR
3912 }
3913
3914 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 3915 break;
ebfd146a
IR
3916 }
3917
9771b263
DN
3918 vec_oprnds0.release ();
3919 vec_oprnds1.release ();
3920 vec_dsts.release ();
3921 interm_types.release ();
ebfd146a
IR
3922
3923 return true;
3924}
ff802fa1
IR
3925
3926
ebfd146a
IR
3927/* Function vectorizable_assignment.
3928
b8698a0f
L
3929 Check if STMT performs an assignment (copy) that can be vectorized.
3930 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3931 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3932 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3933
3934static bool
3935vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3936 gimple *vec_stmt, slp_tree slp_node)
3937{
3938 tree vec_dest;
3939 tree scalar_dest;
3940 tree op;
3941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3942 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3943 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3944 tree new_temp;
3945 tree def;
3946 gimple def_stmt;
3947 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 3948 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3949 int ncopies;
f18b55bd 3950 int i, j;
6e1aa848 3951 vec<tree> vec_oprnds = vNULL;
ebfd146a 3952 tree vop;
a70d6342 3953 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
3954 gimple new_stmt = NULL;
3955 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
3956 enum tree_code code;
3957 tree vectype_in;
ebfd146a
IR
3958
3959 /* Multiple types in SLP are handled by creating the appropriate number of
3960 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3961 case of SLP. */
437f4a00 3962 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
3963 ncopies = 1;
3964 else
3965 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3966
3967 gcc_assert (ncopies >= 1);
ebfd146a 3968
a70d6342 3969 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3970 return false;
3971
8644a673 3972 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3973 return false;
3974
3975 /* Is vectorizable assignment? */
3976 if (!is_gimple_assign (stmt))
3977 return false;
3978
3979 scalar_dest = gimple_assign_lhs (stmt);
3980 if (TREE_CODE (scalar_dest) != SSA_NAME)
3981 return false;
3982
fde9c428 3983 code = gimple_assign_rhs_code (stmt);
ebfd146a 3984 if (gimple_assign_single_p (stmt)
fde9c428
RG
3985 || code == PAREN_EXPR
3986 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
3987 op = gimple_assign_rhs1 (stmt);
3988 else
3989 return false;
3990
7b7ec6c5
RG
3991 if (code == VIEW_CONVERT_EXPR)
3992 op = TREE_OPERAND (op, 0);
3993
24ee1384 3994 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 3995 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 3996 {
73fbfcad 3997 if (dump_enabled_p ())
78c60e3d 3998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3999 "use not simple.\n");
ebfd146a
IR
4000 return false;
4001 }
4002
fde9c428
RG
4003 /* We can handle NOP_EXPR conversions that do not change the number
4004 of elements or the vector size. */
7b7ec6c5
RG
4005 if ((CONVERT_EXPR_CODE_P (code)
4006 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4007 && (!vectype_in
4008 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4009 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4010 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4011 return false;
4012
7b7b1813
RG
4013 /* We do not handle bit-precision changes. */
4014 if ((CONVERT_EXPR_CODE_P (code)
4015 || code == VIEW_CONVERT_EXPR)
4016 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4017 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4018 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4019 || ((TYPE_PRECISION (TREE_TYPE (op))
4020 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4021 /* But a conversion that does not change the bit-pattern is ok. */
4022 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4023 > TYPE_PRECISION (TREE_TYPE (op)))
4024 && TYPE_UNSIGNED (TREE_TYPE (op))))
4025 {
73fbfcad 4026 if (dump_enabled_p ())
78c60e3d
SS
4027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4028 "type conversion to/from bit-precision "
e645e942 4029 "unsupported.\n");
7b7b1813
RG
4030 return false;
4031 }
4032
ebfd146a
IR
4033 if (!vec_stmt) /* transformation not required. */
4034 {
4035 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4036 if (dump_enabled_p ())
78c60e3d 4037 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4038 "=== vectorizable_assignment ===\n");
c3e7ee41 4039 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4040 return true;
4041 }
4042
4043 /** Transform. **/
73fbfcad 4044 if (dump_enabled_p ())
e645e942 4045 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4046
4047 /* Handle def. */
4048 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4049
4050 /* Handle use. */
f18b55bd 4051 for (j = 0; j < ncopies; j++)
ebfd146a 4052 {
f18b55bd
IR
4053 /* Handle uses. */
4054 if (j == 0)
d092494c 4055 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4056 else
4057 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4058
4059 /* Arguments are ready. create the new vector stmt. */
9771b263 4060 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4061 {
7b7ec6c5
RG
4062 if (CONVERT_EXPR_CODE_P (code)
4063 || code == VIEW_CONVERT_EXPR)
4a73490d 4064 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4065 new_stmt = gimple_build_assign (vec_dest, vop);
4066 new_temp = make_ssa_name (vec_dest, new_stmt);
4067 gimple_assign_set_lhs (new_stmt, new_temp);
4068 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069 if (slp_node)
9771b263 4070 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4071 }
ebfd146a
IR
4072
4073 if (slp_node)
f18b55bd
IR
4074 continue;
4075
4076 if (j == 0)
4077 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4078 else
4079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4080
4081 prev_stmt_info = vinfo_for_stmt (new_stmt);
4082 }
b8698a0f 4083
9771b263 4084 vec_oprnds.release ();
ebfd146a
IR
4085 return true;
4086}
4087
9dc3f7de 4088
1107f3ae
IR
4089/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4090 either as shift by a scalar or by a vector. */
4091
4092bool
4093vect_supportable_shift (enum tree_code code, tree scalar_type)
4094{
4095
4096 enum machine_mode vec_mode;
4097 optab optab;
4098 int icode;
4099 tree vectype;
4100
4101 vectype = get_vectype_for_scalar_type (scalar_type);
4102 if (!vectype)
4103 return false;
4104
4105 optab = optab_for_tree_code (code, vectype, optab_scalar);
4106 if (!optab
4107 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4108 {
4109 optab = optab_for_tree_code (code, vectype, optab_vector);
4110 if (!optab
4111 || (optab_handler (optab, TYPE_MODE (vectype))
4112 == CODE_FOR_nothing))
4113 return false;
4114 }
4115
4116 vec_mode = TYPE_MODE (vectype);
4117 icode = (int) optab_handler (optab, vec_mode);
4118 if (icode == CODE_FOR_nothing)
4119 return false;
4120
4121 return true;
4122}
4123
4124
9dc3f7de
IR
4125/* Function vectorizable_shift.
4126
4127 Check if STMT performs a shift operation that can be vectorized.
4128 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4129 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4130 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4131
4132static bool
4133vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4134 gimple *vec_stmt, slp_tree slp_node)
4135{
4136 tree vec_dest;
4137 tree scalar_dest;
4138 tree op0, op1 = NULL;
4139 tree vec_oprnd1 = NULL_TREE;
4140 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4141 tree vectype;
4142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4143 enum tree_code code;
4144 enum machine_mode vec_mode;
4145 tree new_temp;
4146 optab optab;
4147 int icode;
4148 enum machine_mode optab_op2_mode;
4149 tree def;
4150 gimple def_stmt;
4151 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4152 gimple new_stmt = NULL;
4153 stmt_vec_info prev_stmt_info;
4154 int nunits_in;
4155 int nunits_out;
4156 tree vectype_out;
cede2577 4157 tree op1_vectype;
9dc3f7de
IR
4158 int ncopies;
4159 int j, i;
6e1aa848
DN
4160 vec<tree> vec_oprnds0 = vNULL;
4161 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4162 tree vop0, vop1;
4163 unsigned int k;
49eab32e 4164 bool scalar_shift_arg = true;
9dc3f7de
IR
4165 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4166 int vf;
4167
4168 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4169 return false;
4170
4171 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4172 return false;
4173
4174 /* Is STMT a vectorizable binary/unary operation? */
4175 if (!is_gimple_assign (stmt))
4176 return false;
4177
4178 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4179 return false;
4180
4181 code = gimple_assign_rhs_code (stmt);
4182
4183 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4184 || code == RROTATE_EXPR))
4185 return false;
4186
4187 scalar_dest = gimple_assign_lhs (stmt);
4188 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4189 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4190 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4191 {
73fbfcad 4192 if (dump_enabled_p ())
78c60e3d 4193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4194 "bit-precision shifts not supported.\n");
7b7b1813
RG
4195 return false;
4196 }
9dc3f7de
IR
4197
4198 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4199 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4200 &def_stmt, &def, &dt[0], &vectype))
4201 {
73fbfcad 4202 if (dump_enabled_p ())
78c60e3d 4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4204 "use not simple.\n");
9dc3f7de
IR
4205 return false;
4206 }
4207 /* If op0 is an external or constant def use a vector type with
4208 the same size as the output vector type. */
4209 if (!vectype)
4210 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4211 if (vec_stmt)
4212 gcc_assert (vectype);
4213 if (!vectype)
4214 {
73fbfcad 4215 if (dump_enabled_p ())
78c60e3d 4216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4217 "no vectype for scalar type\n");
9dc3f7de
IR
4218 return false;
4219 }
4220
4221 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4222 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4223 if (nunits_out != nunits_in)
4224 return false;
4225
4226 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4227 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4228 &def, &dt[1], &op1_vectype))
9dc3f7de 4229 {
73fbfcad 4230 if (dump_enabled_p ())
78c60e3d 4231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4232 "use not simple.\n");
9dc3f7de
IR
4233 return false;
4234 }
4235
4236 if (loop_vinfo)
4237 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4238 else
4239 vf = 1;
4240
4241 /* Multiple types in SLP are handled by creating the appropriate number of
4242 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4243 case of SLP. */
437f4a00 4244 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4245 ncopies = 1;
4246 else
4247 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4248
4249 gcc_assert (ncopies >= 1);
4250
4251 /* Determine whether the shift amount is a vector, or scalar. If the
4252 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4253
49eab32e
JJ
4254 if (dt[1] == vect_internal_def && !slp_node)
4255 scalar_shift_arg = false;
4256 else if (dt[1] == vect_constant_def
4257 || dt[1] == vect_external_def
4258 || dt[1] == vect_internal_def)
4259 {
4260 /* In SLP, need to check whether the shift count is the same,
4261 in loops if it is a constant or invariant, it is always
4262 a scalar shift. */
4263 if (slp_node)
4264 {
9771b263 4265 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4266 gimple slpstmt;
4267
9771b263 4268 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4269 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4270 scalar_shift_arg = false;
4271 }
4272 }
4273 else
4274 {
73fbfcad 4275 if (dump_enabled_p ())
78c60e3d 4276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4277 "operand mode requires invariant argument.\n");
49eab32e
JJ
4278 return false;
4279 }
4280
9dc3f7de 4281 /* Vector shifted by vector. */
49eab32e 4282 if (!scalar_shift_arg)
9dc3f7de
IR
4283 {
4284 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4285 if (dump_enabled_p ())
78c60e3d 4286 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4287 "vector/vector shift/rotate found.\n");
78c60e3d 4288
aa948027
JJ
4289 if (!op1_vectype)
4290 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4291 if (op1_vectype == NULL_TREE
4292 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4293 {
73fbfcad 4294 if (dump_enabled_p ())
78c60e3d
SS
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "unusable type for last operand in"
e645e942 4297 " vector/vector shift/rotate.\n");
cede2577
JJ
4298 return false;
4299 }
9dc3f7de
IR
4300 }
4301 /* See if the machine has a vector shifted by scalar insn and if not
4302 then see if it has a vector shifted by vector insn. */
49eab32e 4303 else
9dc3f7de
IR
4304 {
4305 optab = optab_for_tree_code (code, vectype, optab_scalar);
4306 if (optab
4307 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4308 {
73fbfcad 4309 if (dump_enabled_p ())
78c60e3d 4310 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4311 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4312 }
4313 else
4314 {
4315 optab = optab_for_tree_code (code, vectype, optab_vector);
4316 if (optab
4317 && (optab_handler (optab, TYPE_MODE (vectype))
4318 != CODE_FOR_nothing))
4319 {
49eab32e
JJ
4320 scalar_shift_arg = false;
4321
73fbfcad 4322 if (dump_enabled_p ())
78c60e3d 4323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4324 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4325
4326 /* Unlike the other binary operators, shifts/rotates have
4327 the rhs being int, instead of the same type as the lhs,
4328 so make sure the scalar is the right type if we are
aa948027 4329 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4330 if (dt[1] == vect_constant_def)
4331 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4332 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4333 TREE_TYPE (op1)))
4334 {
4335 if (slp_node
4336 && TYPE_MODE (TREE_TYPE (vectype))
4337 != TYPE_MODE (TREE_TYPE (op1)))
4338 {
73fbfcad 4339 if (dump_enabled_p ())
78c60e3d
SS
4340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4341 "unusable type for last operand in"
e645e942 4342 " vector/vector shift/rotate.\n");
aa948027
JJ
4343 return false;
4344 }
4345 if (vec_stmt && !slp_node)
4346 {
4347 op1 = fold_convert (TREE_TYPE (vectype), op1);
4348 op1 = vect_init_vector (stmt, op1,
4349 TREE_TYPE (vectype), NULL);
4350 }
4351 }
9dc3f7de
IR
4352 }
4353 }
4354 }
9dc3f7de
IR
4355
4356 /* Supportable by target? */
4357 if (!optab)
4358 {
73fbfcad 4359 if (dump_enabled_p ())
78c60e3d 4360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4361 "no optab.\n");
9dc3f7de
IR
4362 return false;
4363 }
4364 vec_mode = TYPE_MODE (vectype);
4365 icode = (int) optab_handler (optab, vec_mode);
4366 if (icode == CODE_FOR_nothing)
4367 {
73fbfcad 4368 if (dump_enabled_p ())
78c60e3d 4369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4370 "op not supported by target.\n");
9dc3f7de
IR
4371 /* Check only during analysis. */
4372 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4373 || (vf < vect_min_worthwhile_factor (code)
4374 && !vec_stmt))
4375 return false;
73fbfcad 4376 if (dump_enabled_p ())
e645e942
TJ
4377 dump_printf_loc (MSG_NOTE, vect_location,
4378 "proceeding using word mode.\n");
9dc3f7de
IR
4379 }
4380
4381 /* Worthwhile without SIMD support? Check only during analysis. */
4382 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4383 && vf < vect_min_worthwhile_factor (code)
4384 && !vec_stmt)
4385 {
73fbfcad 4386 if (dump_enabled_p ())
78c60e3d 4387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4388 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4389 return false;
4390 }
4391
4392 if (!vec_stmt) /* transformation not required. */
4393 {
4394 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4395 if (dump_enabled_p ())
e645e942
TJ
4396 dump_printf_loc (MSG_NOTE, vect_location,
4397 "=== vectorizable_shift ===\n");
c3e7ee41 4398 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4399 return true;
4400 }
4401
4402 /** Transform. **/
4403
73fbfcad 4404 if (dump_enabled_p ())
78c60e3d 4405 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4406 "transform binary/unary operation.\n");
9dc3f7de
IR
4407
4408 /* Handle def. */
4409 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4410
9dc3f7de
IR
4411 prev_stmt_info = NULL;
4412 for (j = 0; j < ncopies; j++)
4413 {
4414 /* Handle uses. */
4415 if (j == 0)
4416 {
4417 if (scalar_shift_arg)
4418 {
4419 /* Vector shl and shr insn patterns can be defined with scalar
4420 operand 2 (shift operand). In this case, use constant or loop
4421 invariant op1 directly, without extending it to vector mode
4422 first. */
4423 optab_op2_mode = insn_data[icode].operand[2].mode;
4424 if (!VECTOR_MODE_P (optab_op2_mode))
4425 {
73fbfcad 4426 if (dump_enabled_p ())
78c60e3d 4427 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4428 "operand 1 using scalar mode.\n");
9dc3f7de 4429 vec_oprnd1 = op1;
8930f723 4430 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4431 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4432 if (slp_node)
4433 {
4434 /* Store vec_oprnd1 for every vector stmt to be created
4435 for SLP_NODE. We check during the analysis that all
4436 the shift arguments are the same.
4437 TODO: Allow different constants for different vector
4438 stmts generated for an SLP instance. */
4439 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4440 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4441 }
4442 }
4443 }
4444
4445 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4446 (a special case for certain kind of vector shifts); otherwise,
4447 operand 1 should be of a vector type (the usual case). */
4448 if (vec_oprnd1)
4449 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4450 slp_node, -1);
9dc3f7de
IR
4451 else
4452 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4453 slp_node, -1);
9dc3f7de
IR
4454 }
4455 else
4456 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4457
4458 /* Arguments are ready. Create the new vector stmt. */
9771b263 4459 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4460 {
9771b263 4461 vop1 = vec_oprnds1[i];
9dc3f7de
IR
4462 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4463 new_temp = make_ssa_name (vec_dest, new_stmt);
4464 gimple_assign_set_lhs (new_stmt, new_temp);
4465 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4466 if (slp_node)
9771b263 4467 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4468 }
4469
4470 if (slp_node)
4471 continue;
4472
4473 if (j == 0)
4474 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4475 else
4476 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4477 prev_stmt_info = vinfo_for_stmt (new_stmt);
4478 }
4479
9771b263
DN
4480 vec_oprnds0.release ();
4481 vec_oprnds1.release ();
9dc3f7de
IR
4482
4483 return true;
4484}
4485
4486
ebfd146a
IR
4487/* Function vectorizable_operation.
4488
16949072
RG
4489 Check if STMT performs a binary, unary or ternary operation that can
4490 be vectorized.
b8698a0f 4491 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4492 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4493 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4494
4495static bool
4496vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4497 gimple *vec_stmt, slp_tree slp_node)
4498{
00f07b86 4499 tree vec_dest;
ebfd146a 4500 tree scalar_dest;
16949072 4501 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4502 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4503 tree vectype;
ebfd146a
IR
4504 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4505 enum tree_code code;
4506 enum machine_mode vec_mode;
4507 tree new_temp;
4508 int op_type;
00f07b86 4509 optab optab;
ebfd146a 4510 int icode;
ebfd146a
IR
4511 tree def;
4512 gimple def_stmt;
16949072
RG
4513 enum vect_def_type dt[3]
4514 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4515 gimple new_stmt = NULL;
4516 stmt_vec_info prev_stmt_info;
b690cc0f 4517 int nunits_in;
ebfd146a
IR
4518 int nunits_out;
4519 tree vectype_out;
4520 int ncopies;
4521 int j, i;
6e1aa848
DN
4522 vec<tree> vec_oprnds0 = vNULL;
4523 vec<tree> vec_oprnds1 = vNULL;
4524 vec<tree> vec_oprnds2 = vNULL;
16949072 4525 tree vop0, vop1, vop2;
a70d6342
IR
4526 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4527 int vf;
4528
a70d6342 4529 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4530 return false;
4531
8644a673 4532 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4533 return false;
4534
4535 /* Is STMT a vectorizable binary/unary operation? */
4536 if (!is_gimple_assign (stmt))
4537 return false;
4538
4539 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4540 return false;
4541
ebfd146a
IR
4542 code = gimple_assign_rhs_code (stmt);
4543
4544 /* For pointer addition, we should use the normal plus for
4545 the vector addition. */
4546 if (code == POINTER_PLUS_EXPR)
4547 code = PLUS_EXPR;
4548
4549 /* Support only unary or binary operations. */
4550 op_type = TREE_CODE_LENGTH (code);
16949072 4551 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4552 {
73fbfcad 4553 if (dump_enabled_p ())
78c60e3d 4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4555 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4556 op_type);
ebfd146a
IR
4557 return false;
4558 }
4559
b690cc0f
RG
4560 scalar_dest = gimple_assign_lhs (stmt);
4561 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4562
7b7b1813
RG
4563 /* Most operations cannot handle bit-precision types without extra
4564 truncations. */
4565 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4566 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4567 /* Exception are bitwise binary operations. */
4568 && code != BIT_IOR_EXPR
4569 && code != BIT_XOR_EXPR
4570 && code != BIT_AND_EXPR)
4571 {
73fbfcad 4572 if (dump_enabled_p ())
78c60e3d 4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4574 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4575 return false;
4576 }
4577
ebfd146a 4578 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4579 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4580 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4581 {
73fbfcad 4582 if (dump_enabled_p ())
78c60e3d 4583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4584 "use not simple.\n");
ebfd146a
IR
4585 return false;
4586 }
b690cc0f
RG
4587 /* If op0 is an external or constant def use a vector type with
4588 the same size as the output vector type. */
4589 if (!vectype)
4590 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4591 if (vec_stmt)
4592 gcc_assert (vectype);
4593 if (!vectype)
4594 {
73fbfcad 4595 if (dump_enabled_p ())
7d8930a0 4596 {
78c60e3d
SS
4597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4598 "no vectype for scalar type ");
4599 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4600 TREE_TYPE (op0));
e645e942 4601 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4602 }
4603
4604 return false;
4605 }
b690cc0f
RG
4606
4607 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4608 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4609 if (nunits_out != nunits_in)
4610 return false;
ebfd146a 4611
16949072 4612 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4613 {
4614 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4615 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4616 &def, &dt[1]))
ebfd146a 4617 {
73fbfcad 4618 if (dump_enabled_p ())
78c60e3d 4619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4620 "use not simple.\n");
ebfd146a
IR
4621 return false;
4622 }
4623 }
16949072
RG
4624 if (op_type == ternary_op)
4625 {
4626 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4627 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4628 &def, &dt[2]))
16949072 4629 {
73fbfcad 4630 if (dump_enabled_p ())
78c60e3d 4631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4632 "use not simple.\n");
16949072
RG
4633 return false;
4634 }
4635 }
ebfd146a 4636
b690cc0f
RG
4637 if (loop_vinfo)
4638 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4639 else
4640 vf = 1;
4641
4642 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4643 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4644 case of SLP. */
437f4a00 4645 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4646 ncopies = 1;
4647 else
4648 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4649
4650 gcc_assert (ncopies >= 1);
4651
9dc3f7de 4652 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4653 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4654 || code == RROTATE_EXPR)
9dc3f7de 4655 return false;
ebfd146a 4656
ebfd146a 4657 /* Supportable by target? */
00f07b86
RH
4658
4659 vec_mode = TYPE_MODE (vectype);
4660 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4661 {
00f07b86 4662 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4663 icode = LAST_INSN_CODE;
00f07b86
RH
4664 else
4665 icode = CODE_FOR_nothing;
ebfd146a 4666 }
00f07b86
RH
4667 else
4668 {
4669 optab = optab_for_tree_code (code, vectype, optab_default);
4670 if (!optab)
5deb57cb 4671 {
73fbfcad 4672 if (dump_enabled_p ())
78c60e3d 4673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4674 "no optab.\n");
00f07b86 4675 return false;
5deb57cb 4676 }
00f07b86 4677 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4678 }
4679
ebfd146a
IR
4680 if (icode == CODE_FOR_nothing)
4681 {
73fbfcad 4682 if (dump_enabled_p ())
78c60e3d 4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4684 "op not supported by target.\n");
ebfd146a
IR
4685 /* Check only during analysis. */
4686 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4687 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4688 return false;
73fbfcad 4689 if (dump_enabled_p ())
e645e942
TJ
4690 dump_printf_loc (MSG_NOTE, vect_location,
4691 "proceeding using word mode.\n");
383d9c83
IR
4692 }
4693
4a00c761 4694 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4695 if (!VECTOR_MODE_P (vec_mode)
4696 && !vec_stmt
4697 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4698 {
73fbfcad 4699 if (dump_enabled_p ())
78c60e3d 4700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4701 "not worthwhile without SIMD support.\n");
e34842c6 4702 return false;
7d8930a0 4703 }
ebfd146a 4704
ebfd146a
IR
4705 if (!vec_stmt) /* transformation not required. */
4706 {
4a00c761 4707 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4708 if (dump_enabled_p ())
78c60e3d 4709 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4710 "=== vectorizable_operation ===\n");
c3e7ee41 4711 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4712 return true;
4713 }
4714
4715 /** Transform. **/
4716
73fbfcad 4717 if (dump_enabled_p ())
78c60e3d 4718 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4719 "transform binary/unary operation.\n");
383d9c83 4720
ebfd146a 4721 /* Handle def. */
00f07b86 4722 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4723
ebfd146a
IR
4724 /* In case the vectorization factor (VF) is bigger than the number
4725 of elements that we can fit in a vectype (nunits), we have to generate
4726 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4727 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4728 from one copy of the vector stmt to the next, in the field
4729 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4730 stages to find the correct vector defs to be used when vectorizing
4731 stmts that use the defs of the current stmt. The example below
4732 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4733 we need to create 4 vectorized stmts):
4734
4735 before vectorization:
4736 RELATED_STMT VEC_STMT
4737 S1: x = memref - -
4738 S2: z = x + 1 - -
4739
4740 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4741 there):
4742 RELATED_STMT VEC_STMT
4743 VS1_0: vx0 = memref0 VS1_1 -
4744 VS1_1: vx1 = memref1 VS1_2 -
4745 VS1_2: vx2 = memref2 VS1_3 -
4746 VS1_3: vx3 = memref3 - -
4747 S1: x = load - VS1_0
4748 S2: z = x + 1 - -
4749
4750 step2: vectorize stmt S2 (done here):
4751 To vectorize stmt S2 we first need to find the relevant vector
4752 def for the first operand 'x'. This is, as usual, obtained from
4753 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4754 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4755 relevant vector def 'vx0'. Having found 'vx0' we can generate
4756 the vector stmt VS2_0, and as usual, record it in the
4757 STMT_VINFO_VEC_STMT of stmt S2.
4758 When creating the second copy (VS2_1), we obtain the relevant vector
4759 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4760 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4761 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4762 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4763 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4764 chain of stmts and pointers:
4765 RELATED_STMT VEC_STMT
4766 VS1_0: vx0 = memref0 VS1_1 -
4767 VS1_1: vx1 = memref1 VS1_2 -
4768 VS1_2: vx2 = memref2 VS1_3 -
4769 VS1_3: vx3 = memref3 - -
4770 S1: x = load - VS1_0
4771 VS2_0: vz0 = vx0 + v1 VS2_1 -
4772 VS2_1: vz1 = vx1 + v1 VS2_2 -
4773 VS2_2: vz2 = vx2 + v1 VS2_3 -
4774 VS2_3: vz3 = vx3 + v1 - -
4775 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4776
4777 prev_stmt_info = NULL;
4778 for (j = 0; j < ncopies; j++)
4779 {
4780 /* Handle uses. */
4781 if (j == 0)
4a00c761
JJ
4782 {
4783 if (op_type == binary_op || op_type == ternary_op)
4784 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4785 slp_node, -1);
4786 else
4787 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4788 slp_node, -1);
4789 if (op_type == ternary_op)
36ba4aae 4790 {
9771b263
DN
4791 vec_oprnds2.create (1);
4792 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4793 stmt,
4794 NULL));
36ba4aae 4795 }
4a00c761 4796 }
ebfd146a 4797 else
4a00c761
JJ
4798 {
4799 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4800 if (op_type == ternary_op)
4801 {
9771b263
DN
4802 tree vec_oprnd = vec_oprnds2.pop ();
4803 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4804 vec_oprnd));
4a00c761
JJ
4805 }
4806 }
4807
4808 /* Arguments are ready. Create the new vector stmt. */
9771b263 4809 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4810 {
4a00c761 4811 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4812 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4813 vop2 = ((op_type == ternary_op)
9771b263 4814 ? vec_oprnds2[i] : NULL_TREE);
73804b12
RG
4815 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4816 vop0, vop1, vop2);
4a00c761
JJ
4817 new_temp = make_ssa_name (vec_dest, new_stmt);
4818 gimple_assign_set_lhs (new_stmt, new_temp);
4819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4820 if (slp_node)
9771b263 4821 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4822 }
4823
4a00c761
JJ
4824 if (slp_node)
4825 continue;
4826
4827 if (j == 0)
4828 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4829 else
4830 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4831 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4832 }
4833
9771b263
DN
4834 vec_oprnds0.release ();
4835 vec_oprnds1.release ();
4836 vec_oprnds2.release ();
ebfd146a 4837
ebfd146a
IR
4838 return true;
4839}
4840
c716e67f
XDL
4841/* A helper function to ensure data reference DR's base alignment
4842 for STMT_INFO. */
4843
4844static void
4845ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4846{
4847 if (!dr->aux)
4848 return;
4849
4850 if (((dataref_aux *)dr->aux)->base_misaligned)
4851 {
4852 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4853 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4854
4855 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4856 DECL_USER_ALIGN (base_decl) = 1;
4857 ((dataref_aux *)dr->aux)->base_misaligned = false;
4858 }
4859}
4860
ebfd146a 4861
09dfa495
BM
4862/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4863 reversal of the vector elements. If that is impossible to do,
4864 returns NULL. */
4865
4866static tree
4867perm_mask_for_reverse (tree vectype)
4868{
4869 int i, nunits;
4870 unsigned char *sel;
4871
4872 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4873 sel = XALLOCAVEC (unsigned char, nunits);
4874
4875 for (i = 0; i < nunits; ++i)
4876 sel[i] = nunits - 1 - i;
4877
4878 return vect_gen_perm_mask (vectype, sel);
4879}
4880
ebfd146a
IR
4881/* Function vectorizable_store.
4882
b8698a0f
L
4883 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4884 can be vectorized.
4885 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4886 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4887 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4888
4889static bool
4890vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4891 slp_tree slp_node)
ebfd146a
IR
4892{
4893 tree scalar_dest;
4894 tree data_ref;
4895 tree op;
4896 tree vec_oprnd = NULL_TREE;
4897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4898 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4899 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 4900 tree elem_type;
ebfd146a 4901 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 4902 struct loop *loop = NULL;
ebfd146a
IR
4903 enum machine_mode vec_mode;
4904 tree dummy;
4905 enum dr_alignment_support alignment_support_scheme;
4906 tree def;
4907 gimple def_stmt;
4908 enum vect_def_type dt;
4909 stmt_vec_info prev_stmt_info = NULL;
4910 tree dataref_ptr = NULL_TREE;
74bf76ed 4911 tree dataref_offset = NULL_TREE;
fef4d2b3 4912 gimple ptr_incr = NULL;
ebfd146a
IR
4913 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4914 int ncopies;
4915 int j;
4916 gimple next_stmt, first_stmt = NULL;
0d0293ac 4917 bool grouped_store = false;
272c6793 4918 bool store_lanes_p = false;
ebfd146a 4919 unsigned int group_size, i;
6e1aa848
DN
4920 vec<tree> dr_chain = vNULL;
4921 vec<tree> oprnds = vNULL;
4922 vec<tree> result_chain = vNULL;
ebfd146a 4923 bool inv_p;
09dfa495
BM
4924 bool negative = false;
4925 tree offset = NULL_TREE;
6e1aa848 4926 vec<tree> vec_oprnds = vNULL;
ebfd146a 4927 bool slp = (slp_node != NULL);
ebfd146a 4928 unsigned int vec_num;
a70d6342 4929 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 4930 tree aggr_type;
a70d6342
IR
4931
4932 if (loop_vinfo)
4933 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
4934
4935 /* Multiple types in SLP are handled by creating the appropriate number of
4936 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4937 case of SLP. */
437f4a00 4938 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4939 ncopies = 1;
4940 else
4941 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4942
4943 gcc_assert (ncopies >= 1);
4944
4945 /* FORNOW. This restriction should be relaxed. */
a70d6342 4946 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 4947 {
73fbfcad 4948 if (dump_enabled_p ())
78c60e3d 4949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4950 "multiple types in nested loop.\n");
ebfd146a
IR
4951 return false;
4952 }
4953
a70d6342 4954 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4955 return false;
4956
8644a673 4957 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4958 return false;
4959
4960 /* Is vectorizable store? */
4961
4962 if (!is_gimple_assign (stmt))
4963 return false;
4964
4965 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
4966 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
4967 && is_pattern_stmt_p (stmt_info))
4968 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 4969 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 4970 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 4971 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
4972 && TREE_CODE (scalar_dest) != COMPONENT_REF
4973 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
4974 && TREE_CODE (scalar_dest) != REALPART_EXPR
4975 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
4976 return false;
4977
4978 gcc_assert (gimple_assign_single_p (stmt));
4979 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
4980 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4981 &def, &dt))
ebfd146a 4982 {
73fbfcad 4983 if (dump_enabled_p ())
78c60e3d 4984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4985 "use not simple.\n");
ebfd146a
IR
4986 return false;
4987 }
4988
272c6793 4989 elem_type = TREE_TYPE (vectype);
ebfd146a 4990 vec_mode = TYPE_MODE (vectype);
7b7b1813 4991
ebfd146a
IR
4992 /* FORNOW. In some cases can vectorize even if data-type not supported
4993 (e.g. - array initialization with 0). */
947131ba 4994 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
4995 return false;
4996
4997 if (!STMT_VINFO_DATA_REF (stmt_info))
4998 return false;
4999
09dfa495
BM
5000 negative =
5001 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5002 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5003 size_zero_node) < 0;
5004 if (negative && ncopies > 1)
a1e53f3f 5005 {
73fbfcad 5006 if (dump_enabled_p ())
78c60e3d 5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
09dfa495 5008 "multiple types with negative step.");
a1e53f3f
L
5009 return false;
5010 }
5011
09dfa495
BM
5012 if (negative)
5013 {
5014 gcc_assert (!grouped_store);
5015 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5016 if (alignment_support_scheme != dr_aligned
5017 && alignment_support_scheme != dr_unaligned_supported)
5018 {
5019 if (dump_enabled_p ())
5020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5021 "negative step but alignment required.");
5022 return false;
5023 }
5024 if (!perm_mask_for_reverse (vectype))
5025 {
5026 if (dump_enabled_p ())
5027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5028 "negative step and reversing not supported.");
5029 return false;
5030 }
5031 }
5032
0d0293ac 5033 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5034 {
0d0293ac 5035 grouped_store = true;
e14c1050 5036 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5037 if (!slp && !PURE_SLP_STMT (stmt_info))
5038 {
e14c1050 5039 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5040 if (vect_store_lanes_supported (vectype, group_size))
5041 store_lanes_p = true;
0d0293ac 5042 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5043 return false;
5044 }
b8698a0f 5045
ebfd146a
IR
5046 if (first_stmt == stmt)
5047 {
5048 /* STMT is the leader of the group. Check the operands of all the
5049 stmts of the group. */
e14c1050 5050 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5051 while (next_stmt)
5052 {
5053 gcc_assert (gimple_assign_single_p (next_stmt));
5054 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5055 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5056 &def_stmt, &def, &dt))
ebfd146a 5057 {
73fbfcad 5058 if (dump_enabled_p ())
78c60e3d 5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5060 "use not simple.\n");
ebfd146a
IR
5061 return false;
5062 }
e14c1050 5063 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5064 }
5065 }
5066 }
5067
5068 if (!vec_stmt) /* transformation not required. */
5069 {
5070 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
92345349
BS
5071 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5072 NULL, NULL, NULL);
ebfd146a
IR
5073 return true;
5074 }
5075
5076 /** Transform. **/
5077
c716e67f
XDL
5078 ensure_base_align (stmt_info, dr);
5079
0d0293ac 5080 if (grouped_store)
ebfd146a
IR
5081 {
5082 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5083 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5084
e14c1050 5085 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5086
5087 /* FORNOW */
a70d6342 5088 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5089
5090 /* We vectorize all the stmts of the interleaving group when we
5091 reach the last stmt in the group. */
e14c1050
IR
5092 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5093 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5094 && !slp)
5095 {
5096 *vec_stmt = NULL;
5097 return true;
5098 }
5099
5100 if (slp)
4b5caab7 5101 {
0d0293ac 5102 grouped_store = false;
4b5caab7
IR
5103 /* VEC_NUM is the number of vect stmts to be created for this
5104 group. */
5105 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5106 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5107 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5108 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5109 }
ebfd146a 5110 else
4b5caab7
IR
5111 /* VEC_NUM is the number of vect stmts to be created for this
5112 group. */
ebfd146a
IR
5113 vec_num = group_size;
5114 }
b8698a0f 5115 else
ebfd146a
IR
5116 {
5117 first_stmt = stmt;
5118 first_dr = dr;
5119 group_size = vec_num = 1;
ebfd146a 5120 }
b8698a0f 5121
73fbfcad 5122 if (dump_enabled_p ())
78c60e3d 5123 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5124 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5125
9771b263
DN
5126 dr_chain.create (group_size);
5127 oprnds.create (group_size);
ebfd146a 5128
720f5239 5129 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5130 gcc_assert (alignment_support_scheme);
272c6793
RS
5131 /* Targets with store-lane instructions must not require explicit
5132 realignment. */
5133 gcc_assert (!store_lanes_p
5134 || alignment_support_scheme == dr_aligned
5135 || alignment_support_scheme == dr_unaligned_supported);
5136
09dfa495
BM
5137 if (negative)
5138 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5139
272c6793
RS
5140 if (store_lanes_p)
5141 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5142 else
5143 aggr_type = vectype;
ebfd146a
IR
5144
5145 /* In case the vectorization factor (VF) is bigger than the number
5146 of elements that we can fit in a vectype (nunits), we have to generate
5147 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5148 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5149 vect_get_vec_def_for_copy_stmt. */
5150
0d0293ac 5151 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5152
5153 S1: &base + 2 = x2
5154 S2: &base = x0
5155 S3: &base + 1 = x1
5156 S4: &base + 3 = x3
5157
5158 We create vectorized stores starting from base address (the access of the
5159 first stmt in the chain (S2 in the above example), when the last store stmt
5160 of the chain (S4) is reached:
5161
5162 VS1: &base = vx2
5163 VS2: &base + vec_size*1 = vx0
5164 VS3: &base + vec_size*2 = vx1
5165 VS4: &base + vec_size*3 = vx3
5166
5167 Then permutation statements are generated:
5168
3fcc1b55
JJ
5169 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5170 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5171 ...
b8698a0f 5172
ebfd146a
IR
5173 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5174 (the order of the data-refs in the output of vect_permute_store_chain
5175 corresponds to the order of scalar stmts in the interleaving chain - see
5176 the documentation of vect_permute_store_chain()).
5177
5178 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5179 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5180 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5181 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5182 */
5183
5184 prev_stmt_info = NULL;
5185 for (j = 0; j < ncopies; j++)
5186 {
5187 gimple new_stmt;
ebfd146a
IR
5188
5189 if (j == 0)
5190 {
5191 if (slp)
5192 {
5193 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5194 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5195 NULL, slp_node, -1);
ebfd146a 5196
9771b263 5197 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5198 }
5199 else
5200 {
b8698a0f
L
5201 /* For interleaved stores we collect vectorized defs for all the
5202 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5203 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5204 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5205
0d0293ac 5206 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5207 OPRNDS are of size 1. */
b8698a0f 5208 next_stmt = first_stmt;
ebfd146a
IR
5209 for (i = 0; i < group_size; i++)
5210 {
b8698a0f
L
5211 /* Since gaps are not supported for interleaved stores,
5212 GROUP_SIZE is the exact number of stmts in the chain.
5213 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5214 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5215 iteration of the loop will be executed. */
5216 gcc_assert (next_stmt
5217 && gimple_assign_single_p (next_stmt));
5218 op = gimple_assign_rhs1 (next_stmt);
5219
b8698a0f 5220 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5221 NULL);
9771b263
DN
5222 dr_chain.quick_push (vec_oprnd);
5223 oprnds.quick_push (vec_oprnd);
e14c1050 5224 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5225 }
5226 }
5227
5228 /* We should have catched mismatched types earlier. */
5229 gcc_assert (useless_type_conversion_p (vectype,
5230 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5231 bool simd_lane_access_p
5232 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5233 if (simd_lane_access_p
5234 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5235 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5236 && integer_zerop (DR_OFFSET (first_dr))
5237 && integer_zerop (DR_INIT (first_dr))
5238 && alias_sets_conflict_p (get_alias_set (aggr_type),
5239 get_alias_set (DR_REF (first_dr))))
5240 {
5241 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5242 dataref_offset = build_int_cst (reference_alias_ptr_type
5243 (DR_REF (first_dr)), 0);
8928eff3 5244 inv_p = false;
74bf76ed
JJ
5245 }
5246 else
5247 dataref_ptr
5248 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5249 simd_lane_access_p ? loop : NULL,
09dfa495 5250 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5251 simd_lane_access_p, &inv_p);
a70d6342 5252 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5253 }
b8698a0f 5254 else
ebfd146a 5255 {
b8698a0f
L
5256 /* For interleaved stores we created vectorized defs for all the
5257 defs stored in OPRNDS in the previous iteration (previous copy).
5258 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5259 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5260 next copy.
0d0293ac 5261 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5262 OPRNDS are of size 1. */
5263 for (i = 0; i < group_size; i++)
5264 {
9771b263 5265 op = oprnds[i];
24ee1384
IR
5266 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5267 &def, &dt);
b8698a0f 5268 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5269 dr_chain[i] = vec_oprnd;
5270 oprnds[i] = vec_oprnd;
ebfd146a 5271 }
74bf76ed
JJ
5272 if (dataref_offset)
5273 dataref_offset
5274 = int_const_binop (PLUS_EXPR, dataref_offset,
5275 TYPE_SIZE_UNIT (aggr_type));
5276 else
5277 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5278 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5279 }
5280
272c6793 5281 if (store_lanes_p)
ebfd146a 5282 {
272c6793 5283 tree vec_array;
267d3070 5284
272c6793
RS
5285 /* Combine all the vectors into an array. */
5286 vec_array = create_vector_array (vectype, vec_num);
5287 for (i = 0; i < vec_num; i++)
c2d7ab2a 5288 {
9771b263 5289 vec_oprnd = dr_chain[i];
272c6793 5290 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5291 }
b8698a0f 5292
272c6793
RS
5293 /* Emit:
5294 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5295 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5296 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5297 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5298 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5299 }
5300 else
5301 {
5302 new_stmt = NULL;
0d0293ac 5303 if (grouped_store)
272c6793 5304 {
b6b9227d
JJ
5305 if (j == 0)
5306 result_chain.create (group_size);
272c6793
RS
5307 /* Permute. */
5308 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5309 &result_chain);
5310 }
c2d7ab2a 5311
272c6793
RS
5312 next_stmt = first_stmt;
5313 for (i = 0; i < vec_num; i++)
5314 {
644ffefd 5315 unsigned align, misalign;
272c6793
RS
5316
5317 if (i > 0)
5318 /* Bump the vector pointer. */
5319 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5320 stmt, NULL_TREE);
5321
5322 if (slp)
9771b263 5323 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5324 else if (grouped_store)
5325 /* For grouped stores vectorized defs are interleaved in
272c6793 5326 vect_permute_store_chain(). */
9771b263 5327 vec_oprnd = result_chain[i];
272c6793
RS
5328
5329 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
5330 dataref_offset
5331 ? dataref_offset
5332 : build_int_cst (reference_alias_ptr_type
5333 (DR_REF (first_dr)), 0));
644ffefd 5334 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5335 if (aligned_access_p (first_dr))
644ffefd 5336 misalign = 0;
272c6793
RS
5337 else if (DR_MISALIGNMENT (first_dr) == -1)
5338 {
5339 TREE_TYPE (data_ref)
5340 = build_aligned_type (TREE_TYPE (data_ref),
5341 TYPE_ALIGN (elem_type));
644ffefd
MJ
5342 align = TYPE_ALIGN_UNIT (elem_type);
5343 misalign = 0;
272c6793
RS
5344 }
5345 else
5346 {
5347 TREE_TYPE (data_ref)
5348 = build_aligned_type (TREE_TYPE (data_ref),
5349 TYPE_ALIGN (elem_type));
644ffefd 5350 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5351 }
74bf76ed
JJ
5352 if (dataref_offset == NULL_TREE)
5353 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5354 misalign);
c2d7ab2a 5355
09dfa495
BM
5356 if (negative)
5357 {
5358 tree perm_mask = perm_mask_for_reverse (vectype);
5359 tree perm_dest
5360 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5361 vectype);
5362 tree new_temp = make_ssa_name (perm_dest, NULL);
5363
5364 /* Generate the permute statement. */
5365 gimple perm_stmt
5366 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5367 vec_oprnd, vec_oprnd,
5368 perm_mask);
5369 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5370
5371 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5372 vec_oprnd = new_temp;
5373 }
5374
272c6793
RS
5375 /* Arguments are ready. Create the new vector stmt. */
5376 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5377 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5378
5379 if (slp)
5380 continue;
5381
e14c1050 5382 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5383 if (!next_stmt)
5384 break;
5385 }
ebfd146a 5386 }
1da0876c
RS
5387 if (!slp)
5388 {
5389 if (j == 0)
5390 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5391 else
5392 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5393 prev_stmt_info = vinfo_for_stmt (new_stmt);
5394 }
ebfd146a
IR
5395 }
5396
9771b263
DN
5397 dr_chain.release ();
5398 oprnds.release ();
5399 result_chain.release ();
5400 vec_oprnds.release ();
ebfd146a
IR
5401
5402 return true;
5403}
5404
aec7ae7d
JJ
5405/* Given a vector type VECTYPE and permutation SEL returns
5406 the VECTOR_CST mask that implements the permutation of the
5407 vector elements. If that is impossible to do, returns NULL. */
a1e53f3f 5408
3fcc1b55
JJ
5409tree
5410vect_gen_perm_mask (tree vectype, unsigned char *sel)
a1e53f3f 5411{
d2a12ae7 5412 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5413 int i, nunits;
a1e53f3f 5414
22e4dee7 5415 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7
RH
5416
5417 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
a1e53f3f
L
5418 return NULL;
5419
96f9265a
RG
5420 mask_elt_type = lang_hooks.types.type_for_mode
5421 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5422 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5423
d2a12ae7 5424 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5425 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5426 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5427 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5428
2635892a 5429 return mask_vec;
a1e53f3f
L
5430}
5431
aec7ae7d
JJ
5432/* Given a vector variable X and Y, that was generated for the scalar
5433 STMT, generate instructions to permute the vector elements of X and Y
5434 using permutation mask MASK_VEC, insert them at *GSI and return the
5435 permuted vector variable. */
a1e53f3f
L
5436
5437static tree
aec7ae7d
JJ
5438permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5439 gimple_stmt_iterator *gsi)
a1e53f3f
L
5440{
5441 tree vectype = TREE_TYPE (x);
aec7ae7d 5442 tree perm_dest, data_ref;
a1e53f3f
L
5443 gimple perm_stmt;
5444
a1e53f3f 5445 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
aec7ae7d 5446 data_ref = make_ssa_name (perm_dest, NULL);
a1e53f3f
L
5447
5448 /* Generate the permute statement. */
73804b12
RG
5449 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5450 x, y, mask_vec);
a1e53f3f
L
5451 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5452
5453 return data_ref;
5454}
5455
ebfd146a
IR
5456/* vectorizable_load.
5457
b8698a0f
L
5458 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5459 can be vectorized.
5460 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5461 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5462 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5463
5464static bool
5465vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5466 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5467{
5468 tree scalar_dest;
5469 tree vec_dest = NULL;
5470 tree data_ref = NULL;
5471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5472 stmt_vec_info prev_stmt_info;
ebfd146a 5473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5474 struct loop *loop = NULL;
ebfd146a 5475 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5476 bool nested_in_vect_loop = false;
c716e67f 5477 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5479 tree elem_type;
ebfd146a 5480 tree new_temp;
947131ba 5481 enum machine_mode mode;
ebfd146a
IR
5482 gimple new_stmt = NULL;
5483 tree dummy;
5484 enum dr_alignment_support alignment_support_scheme;
5485 tree dataref_ptr = NULL_TREE;
74bf76ed 5486 tree dataref_offset = NULL_TREE;
fef4d2b3 5487 gimple ptr_incr = NULL;
ebfd146a
IR
5488 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5489 int ncopies;
a64b9c26 5490 int i, j, group_size, group_gap;
ebfd146a
IR
5491 tree msq = NULL_TREE, lsq;
5492 tree offset = NULL_TREE;
5493 tree realignment_token = NULL_TREE;
5494 gimple phi = NULL;
6e1aa848 5495 vec<tree> dr_chain = vNULL;
0d0293ac 5496 bool grouped_load = false;
272c6793 5497 bool load_lanes_p = false;
ebfd146a 5498 gimple first_stmt;
ebfd146a 5499 bool inv_p;
319e6439 5500 bool negative = false;
ebfd146a
IR
5501 bool compute_in_loop = false;
5502 struct loop *at_loop;
5503 int vec_num;
5504 bool slp = (slp_node != NULL);
5505 bool slp_perm = false;
5506 enum tree_code code;
a70d6342
IR
5507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5508 int vf;
272c6793 5509 tree aggr_type;
aec7ae7d
JJ
5510 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5511 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5512 int gather_scale = 1;
5513 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5514
5515 if (loop_vinfo)
5516 {
5517 loop = LOOP_VINFO_LOOP (loop_vinfo);
5518 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5519 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5520 }
5521 else
3533e503 5522 vf = 1;
ebfd146a
IR
5523
5524 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5525 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5526 case of SLP. */
437f4a00 5527 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5528 ncopies = 1;
5529 else
5530 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5531
5532 gcc_assert (ncopies >= 1);
5533
5534 /* FORNOW. This restriction should be relaxed. */
5535 if (nested_in_vect_loop && ncopies > 1)
5536 {
73fbfcad 5537 if (dump_enabled_p ())
78c60e3d 5538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5539 "multiple types in nested loop.\n");
ebfd146a
IR
5540 return false;
5541 }
5542
a70d6342 5543 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5544 return false;
5545
8644a673 5546 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5547 return false;
5548
5549 /* Is vectorizable load? */
5550 if (!is_gimple_assign (stmt))
5551 return false;
5552
5553 scalar_dest = gimple_assign_lhs (stmt);
5554 if (TREE_CODE (scalar_dest) != SSA_NAME)
5555 return false;
5556
5557 code = gimple_assign_rhs_code (stmt);
5558 if (code != ARRAY_REF
38000232 5559 && code != BIT_FIELD_REF
ebfd146a 5560 && code != INDIRECT_REF
e9dbe7bb
IR
5561 && code != COMPONENT_REF
5562 && code != IMAGPART_EXPR
70f34814 5563 && code != REALPART_EXPR
42373e0b
RG
5564 && code != MEM_REF
5565 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5566 return false;
5567
5568 if (!STMT_VINFO_DATA_REF (stmt_info))
5569 return false;
5570
7b7b1813 5571 elem_type = TREE_TYPE (vectype);
947131ba 5572 mode = TYPE_MODE (vectype);
ebfd146a
IR
5573
5574 /* FORNOW. In some cases can vectorize even if data-type not supported
5575 (e.g. - data copies). */
947131ba 5576 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5577 {
73fbfcad 5578 if (dump_enabled_p ())
78c60e3d 5579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5580 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5581 return false;
5582 }
5583
ebfd146a 5584 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5585 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5586 {
0d0293ac 5587 grouped_load = true;
ebfd146a 5588 /* FORNOW */
aec7ae7d 5589 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5590
e14c1050 5591 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
b602d918
RS
5592 if (!slp && !PURE_SLP_STMT (stmt_info))
5593 {
e14c1050 5594 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
272c6793
RS
5595 if (vect_load_lanes_supported (vectype, group_size))
5596 load_lanes_p = true;
0d0293ac 5597 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5598 return false;
5599 }
ebfd146a
IR
5600 }
5601
a1e53f3f 5602
aec7ae7d
JJ
5603 if (STMT_VINFO_GATHER_P (stmt_info))
5604 {
5605 gimple def_stmt;
5606 tree def;
5607 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5608 &gather_off, &gather_scale);
5609 gcc_assert (gather_decl);
24ee1384 5610 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
5611 &def_stmt, &def, &gather_dt,
5612 &gather_off_vectype))
5613 {
73fbfcad 5614 if (dump_enabled_p ())
78c60e3d 5615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5616 "gather index use not simple.\n");
aec7ae7d
JJ
5617 return false;
5618 }
5619 }
7d75abc8 5620 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
14ac6aa2 5621 ;
319e6439
RG
5622 else
5623 {
5624 negative = tree_int_cst_compare (nested_in_vect_loop
5625 ? STMT_VINFO_DR_STEP (stmt_info)
5626 : DR_STEP (dr),
5627 size_zero_node) < 0;
5628 if (negative && ncopies > 1)
5629 {
73fbfcad 5630 if (dump_enabled_p ())
78c60e3d 5631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5632 "multiple types with negative step.\n");
319e6439
RG
5633 return false;
5634 }
5635
5636 if (negative)
5637 {
08940f33
RB
5638 if (grouped_load)
5639 {
5640 if (dump_enabled_p ())
5641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5642 "negative step for group load not supported"
5643 "\n");
08940f33
RB
5644 return false;
5645 }
319e6439
RG
5646 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5647 if (alignment_support_scheme != dr_aligned
5648 && alignment_support_scheme != dr_unaligned_supported)
5649 {
73fbfcad 5650 if (dump_enabled_p ())
78c60e3d 5651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5652 "negative step but alignment required.\n");
319e6439
RG
5653 return false;
5654 }
5655 if (!perm_mask_for_reverse (vectype))
5656 {
73fbfcad 5657 if (dump_enabled_p ())
78c60e3d 5658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
5659 "negative step and reversing not supported."
5660 "\n");
319e6439
RG
5661 return false;
5662 }
5663 }
7d75abc8 5664 }
aec7ae7d 5665
ebfd146a
IR
5666 if (!vec_stmt) /* transformation not required. */
5667 {
5668 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
92345349 5669 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
ebfd146a
IR
5670 return true;
5671 }
5672
73fbfcad 5673 if (dump_enabled_p ())
78c60e3d 5674 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5675 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
5676
5677 /** Transform. **/
5678
c716e67f
XDL
5679 ensure_base_align (stmt_info, dr);
5680
aec7ae7d
JJ
5681 if (STMT_VINFO_GATHER_P (stmt_info))
5682 {
5683 tree vec_oprnd0 = NULL_TREE, op;
5684 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5685 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5686 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5687 edge pe = loop_preheader_edge (loop);
5688 gimple_seq seq;
5689 basic_block new_bb;
5690 enum { NARROW, NONE, WIDEN } modifier;
5691 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5692
5693 if (nunits == gather_off_nunits)
5694 modifier = NONE;
5695 else if (nunits == gather_off_nunits / 2)
5696 {
5697 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5698 modifier = WIDEN;
5699
5700 for (i = 0; i < gather_off_nunits; ++i)
5701 sel[i] = i | nunits;
5702
3fcc1b55 5703 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
aec7ae7d
JJ
5704 gcc_assert (perm_mask != NULL_TREE);
5705 }
5706 else if (nunits == gather_off_nunits * 2)
5707 {
5708 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5709 modifier = NARROW;
5710
5711 for (i = 0; i < nunits; ++i)
5712 sel[i] = i < gather_off_nunits
5713 ? i : i + nunits - gather_off_nunits;
5714
3fcc1b55 5715 perm_mask = vect_gen_perm_mask (vectype, sel);
aec7ae7d
JJ
5716 gcc_assert (perm_mask != NULL_TREE);
5717 ncopies *= 2;
5718 }
5719 else
5720 gcc_unreachable ();
5721
5722 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5723 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5724 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5725 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5726 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5727 scaletype = TREE_VALUE (arglist);
5728 gcc_checking_assert (types_compatible_p (srctype, rettype)
5729 && types_compatible_p (srctype, masktype));
5730
5731 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5732
5733 ptr = fold_convert (ptrtype, gather_base);
5734 if (!is_gimple_min_invariant (ptr))
5735 {
5736 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5737 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5738 gcc_assert (!new_bb);
5739 }
5740
5741 /* Currently we support only unconditional gather loads,
5742 so mask should be all ones. */
5743 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5744 mask = build_int_cst (TREE_TYPE (masktype), -1);
5745 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5746 {
5747 REAL_VALUE_TYPE r;
5748 long tmp[6];
5749 for (j = 0; j < 6; ++j)
5750 tmp[j] = -1;
5751 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5752 mask = build_real (TREE_TYPE (masktype), r);
5753 }
5754 else
5755 gcc_unreachable ();
5756 mask = build_vector_from_val (masktype, mask);
5757 mask = vect_init_vector (stmt, mask, masktype, NULL);
5758
5759 scale = build_int_cst (scaletype, gather_scale);
5760
5761 prev_stmt_info = NULL;
5762 for (j = 0; j < ncopies; ++j)
5763 {
5764 if (modifier == WIDEN && (j & 1))
5765 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5766 perm_mask, stmt, gsi);
5767 else if (j == 0)
5768 op = vec_oprnd0
5769 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5770 else
5771 op = vec_oprnd0
5772 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5773
5774 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5775 {
5776 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5777 == TYPE_VECTOR_SUBPARTS (idxtype));
5778 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
aec7ae7d
JJ
5779 var = make_ssa_name (var, NULL);
5780 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5781 new_stmt
5782 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5783 op, NULL_TREE);
5784 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5785 op = var;
5786 }
5787
5788 new_stmt
5789 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
5790
5791 if (!useless_type_conversion_p (vectype, rettype))
5792 {
5793 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5794 == TYPE_VECTOR_SUBPARTS (rettype));
5795 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
5796 op = make_ssa_name (var, new_stmt);
5797 gimple_call_set_lhs (new_stmt, op);
5798 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5799 var = make_ssa_name (vec_dest, NULL);
5800 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5801 new_stmt
5802 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5803 NULL_TREE);
5804 }
5805 else
5806 {
5807 var = make_ssa_name (vec_dest, new_stmt);
5808 gimple_call_set_lhs (new_stmt, var);
5809 }
5810
5811 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5812
5813 if (modifier == NARROW)
5814 {
5815 if ((j & 1) == 0)
5816 {
5817 prev_res = var;
5818 continue;
5819 }
5820 var = permute_vec_elements (prev_res, var,
5821 perm_mask, stmt, gsi);
5822 new_stmt = SSA_NAME_DEF_STMT (var);
5823 }
5824
5825 if (prev_stmt_info == NULL)
5826 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5827 else
5828 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5829 prev_stmt_info = vinfo_for_stmt (new_stmt);
5830 }
5831 return true;
5832 }
7d75abc8
MM
5833 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5834 {
5835 gimple_stmt_iterator incr_gsi;
5836 bool insert_after;
5837 gimple incr;
5838 tree offvar;
7d75abc8
MM
5839 tree ivstep;
5840 tree running_off;
9771b263 5841 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 5842 gimple_seq stmts = NULL;
14ac6aa2
RB
5843 tree stride_base, stride_step, alias_off;
5844
5845 gcc_assert (!nested_in_vect_loop);
7d75abc8 5846
14ac6aa2
RB
5847 stride_base
5848 = fold_build_pointer_plus
5849 (unshare_expr (DR_BASE_ADDRESS (dr)),
5850 size_binop (PLUS_EXPR,
5851 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 5852 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 5853 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
5854
5855 /* For a load with loop-invariant (but other than power-of-2)
5856 stride (i.e. not a grouped access) like so:
5857
5858 for (i = 0; i < n; i += stride)
5859 ... = array[i];
5860
5861 we generate a new induction variable and new accesses to
5862 form a new vector (or vectors, depending on ncopies):
5863
5864 for (j = 0; ; j += VF*stride)
5865 tmp1 = array[j];
5866 tmp2 = array[j + stride];
5867 ...
5868 vectemp = {tmp1, tmp2, ...}
5869 */
5870
5871 ivstep = stride_step;
5872 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5873 build_int_cst (TREE_TYPE (ivstep), vf));
5874
5875 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5876
5877 create_iv (stride_base, ivstep, NULL,
5878 loop, &incr_gsi, insert_after,
5879 &offvar, NULL);
5880 incr = gsi_stmt (incr_gsi);
5881 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5882
5883 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5884 if (stmts)
5885 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5886
5887 prev_stmt_info = NULL;
5888 running_off = offvar;
14ac6aa2 5889 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7d75abc8
MM
5890 for (j = 0; j < ncopies; j++)
5891 {
5892 tree vec_inv;
5893
9771b263 5894 vec_alloc (v, nunits);
7d75abc8
MM
5895 for (i = 0; i < nunits; i++)
5896 {
5897 tree newref, newoff;
5898 gimple incr;
14ac6aa2
RB
5899 newref = build2 (MEM_REF, TREE_TYPE (vectype),
5900 running_off, alias_off);
7d75abc8
MM
5901
5902 newref = force_gimple_operand_gsi (gsi, newref, true,
5903 NULL_TREE, true,
5904 GSI_SAME_STMT);
5905 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
070ecdfd 5906 newoff = copy_ssa_name (running_off, NULL);
14ac6aa2
RB
5907 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
5908 running_off, stride_step);
7d75abc8
MM
5909 vect_finish_stmt_generation (stmt, incr, gsi);
5910
5911 running_off = newoff;
5912 }
5913
5914 vec_inv = build_constructor (vectype, v);
5915 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
5916 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
5917
5918 if (j == 0)
5919 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5920 else
5921 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5922 prev_stmt_info = vinfo_for_stmt (new_stmt);
5923 }
5924 return true;
5925 }
aec7ae7d 5926
0d0293ac 5927 if (grouped_load)
ebfd146a 5928 {
e14c1050 5929 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 5930 if (slp
01d8bf07 5931 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
5932 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
5933 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 5934
ebfd146a 5935 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
5936 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
5937 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
5938 ??? But we can only do so if there is exactly one
5939 as we have no way to get at the rest. Leave the CSE
5940 opportunity alone.
5941 ??? With the group load eventually participating
5942 in multiple different permutations (having multiple
5943 slp nodes which refer to the same group) the CSE
5944 is even wrong code. See PR56270. */
5945 && !slp)
ebfd146a
IR
5946 {
5947 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5948 return true;
5949 }
5950 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5951 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a
IR
5952
5953 /* VEC_NUM is the number of vect stmts to be created for this group. */
5954 if (slp)
5955 {
0d0293ac 5956 grouped_load = false;
ebfd146a 5957 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
01d8bf07 5958 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
a70d6342 5959 slp_perm = true;
a64b9c26 5960 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
a70d6342 5961 }
ebfd146a 5962 else
a64b9c26
RB
5963 {
5964 vec_num = group_size;
5965 group_gap = 0;
5966 }
ebfd146a
IR
5967 }
5968 else
5969 {
5970 first_stmt = stmt;
5971 first_dr = dr;
5972 group_size = vec_num = 1;
a64b9c26 5973 group_gap = 0;
ebfd146a
IR
5974 }
5975
720f5239 5976 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5977 gcc_assert (alignment_support_scheme);
272c6793
RS
5978 /* Targets with load-lane instructions must not require explicit
5979 realignment. */
5980 gcc_assert (!load_lanes_p
5981 || alignment_support_scheme == dr_aligned
5982 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
5983
5984 /* In case the vectorization factor (VF) is bigger than the number
5985 of elements that we can fit in a vectype (nunits), we have to generate
5986 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 5987 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 5988 from one copy of the vector stmt to the next, in the field
ff802fa1 5989 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 5990 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
5991 stmts that use the defs of the current stmt. The example below
5992 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
5993 need to create 4 vectorized stmts):
ebfd146a
IR
5994
5995 before vectorization:
5996 RELATED_STMT VEC_STMT
5997 S1: x = memref - -
5998 S2: z = x + 1 - -
5999
6000 step 1: vectorize stmt S1:
6001 We first create the vector stmt VS1_0, and, as usual, record a
6002 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6003 Next, we create the vector stmt VS1_1, and record a pointer to
6004 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6005 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6006 stmts and pointers:
6007 RELATED_STMT VEC_STMT
6008 VS1_0: vx0 = memref0 VS1_1 -
6009 VS1_1: vx1 = memref1 VS1_2 -
6010 VS1_2: vx2 = memref2 VS1_3 -
6011 VS1_3: vx3 = memref3 - -
6012 S1: x = load - VS1_0
6013 S2: z = x + 1 - -
6014
b8698a0f
L
6015 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6016 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6017 stmt S2. */
6018
0d0293ac 6019 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6020
6021 S1: x2 = &base + 2
6022 S2: x0 = &base
6023 S3: x1 = &base + 1
6024 S4: x3 = &base + 3
6025
b8698a0f 6026 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6027 starting from the access of the first stmt of the chain:
6028
6029 VS1: vx0 = &base
6030 VS2: vx1 = &base + vec_size*1
6031 VS3: vx3 = &base + vec_size*2
6032 VS4: vx4 = &base + vec_size*3
6033
6034 Then permutation statements are generated:
6035
e2c83630
RH
6036 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6037 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6038 ...
6039
6040 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6041 (the order of the data-refs in the output of vect_permute_load_chain
6042 corresponds to the order of scalar stmts in the interleaving chain - see
6043 the documentation of vect_permute_load_chain()).
6044 The generation of permutation stmts and recording them in
0d0293ac 6045 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6046
b8698a0f 6047 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6048 permutation stmts above are created for every copy. The result vector
6049 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6050 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6051
6052 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6053 on a target that supports unaligned accesses (dr_unaligned_supported)
6054 we generate the following code:
6055 p = initial_addr;
6056 indx = 0;
6057 loop {
6058 p = p + indx * vectype_size;
6059 vec_dest = *(p);
6060 indx = indx + 1;
6061 }
6062
6063 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6064 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6065 then generate the following code, in which the data in each iteration is
6066 obtained by two vector loads, one from the previous iteration, and one
6067 from the current iteration:
6068 p1 = initial_addr;
6069 msq_init = *(floor(p1))
6070 p2 = initial_addr + VS - 1;
6071 realignment_token = call target_builtin;
6072 indx = 0;
6073 loop {
6074 p2 = p2 + indx * vectype_size
6075 lsq = *(floor(p2))
6076 vec_dest = realign_load (msq, lsq, realignment_token)
6077 indx = indx + 1;
6078 msq = lsq;
6079 } */
6080
6081 /* If the misalignment remains the same throughout the execution of the
6082 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6083 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6084 This can only occur when vectorizing memory accesses in the inner-loop
6085 nested within an outer-loop that is being vectorized. */
6086
d1e4b493 6087 if (nested_in_vect_loop
211bea38 6088 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6089 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6090 {
6091 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6092 compute_in_loop = true;
6093 }
6094
6095 if ((alignment_support_scheme == dr_explicit_realign_optimized
6096 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6097 && !compute_in_loop)
ebfd146a
IR
6098 {
6099 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6100 alignment_support_scheme, NULL_TREE,
6101 &at_loop);
6102 if (alignment_support_scheme == dr_explicit_realign_optimized)
6103 {
6104 phi = SSA_NAME_DEF_STMT (msq);
6105 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6106 }
6107 }
6108 else
6109 at_loop = loop;
6110
a1e53f3f
L
6111 if (negative)
6112 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6113
272c6793
RS
6114 if (load_lanes_p)
6115 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6116 else
6117 aggr_type = vectype;
6118
ebfd146a
IR
6119 prev_stmt_info = NULL;
6120 for (j = 0; j < ncopies; j++)
b8698a0f 6121 {
272c6793 6122 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6123 if (j == 0)
74bf76ed
JJ
6124 {
6125 bool simd_lane_access_p
6126 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6127 if (simd_lane_access_p
6128 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6129 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6130 && integer_zerop (DR_OFFSET (first_dr))
6131 && integer_zerop (DR_INIT (first_dr))
6132 && alias_sets_conflict_p (get_alias_set (aggr_type),
6133 get_alias_set (DR_REF (first_dr)))
6134 && (alignment_support_scheme == dr_aligned
6135 || alignment_support_scheme == dr_unaligned_supported))
6136 {
6137 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6138 dataref_offset = build_int_cst (reference_alias_ptr_type
6139 (DR_REF (first_dr)), 0);
8928eff3 6140 inv_p = false;
74bf76ed
JJ
6141 }
6142 else
6143 dataref_ptr
6144 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6145 offset, &dummy, gsi, &ptr_incr,
6146 simd_lane_access_p, &inv_p);
6147 }
6148 else if (dataref_offset)
6149 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6150 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6151 else
272c6793
RS
6152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6153 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6154
0d0293ac 6155 if (grouped_load || slp_perm)
9771b263 6156 dr_chain.create (vec_num);
5ce1ee7f 6157
272c6793 6158 if (load_lanes_p)
ebfd146a 6159 {
272c6793
RS
6160 tree vec_array;
6161
6162 vec_array = create_vector_array (vectype, vec_num);
6163
6164 /* Emit:
6165 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6166 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6167 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6168 gimple_call_set_lhs (new_stmt, vec_array);
6169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6170
272c6793
RS
6171 /* Extract each vector into an SSA_NAME. */
6172 for (i = 0; i < vec_num; i++)
ebfd146a 6173 {
272c6793
RS
6174 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6175 vec_array, i);
9771b263 6176 dr_chain.quick_push (new_temp);
272c6793
RS
6177 }
6178
6179 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6180 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6181 }
6182 else
6183 {
6184 for (i = 0; i < vec_num; i++)
6185 {
6186 if (i > 0)
6187 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6188 stmt, NULL_TREE);
6189
6190 /* 2. Create the vector-load in the loop. */
6191 switch (alignment_support_scheme)
6192 {
6193 case dr_aligned:
6194 case dr_unaligned_supported:
be1ac4ec 6195 {
644ffefd
MJ
6196 unsigned int align, misalign;
6197
272c6793
RS
6198 data_ref
6199 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
6200 dataref_offset
6201 ? dataref_offset
6202 : build_int_cst (reference_alias_ptr_type
6203 (DR_REF (first_dr)), 0));
644ffefd 6204 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6205 if (alignment_support_scheme == dr_aligned)
6206 {
6207 gcc_assert (aligned_access_p (first_dr));
644ffefd 6208 misalign = 0;
272c6793
RS
6209 }
6210 else if (DR_MISALIGNMENT (first_dr) == -1)
6211 {
6212 TREE_TYPE (data_ref)
6213 = build_aligned_type (TREE_TYPE (data_ref),
6214 TYPE_ALIGN (elem_type));
644ffefd
MJ
6215 align = TYPE_ALIGN_UNIT (elem_type);
6216 misalign = 0;
272c6793
RS
6217 }
6218 else
6219 {
6220 TREE_TYPE (data_ref)
6221 = build_aligned_type (TREE_TYPE (data_ref),
6222 TYPE_ALIGN (elem_type));
644ffefd 6223 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6224 }
74bf76ed
JJ
6225 if (dataref_offset == NULL_TREE)
6226 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6227 align, misalign);
272c6793 6228 break;
be1ac4ec 6229 }
272c6793 6230 case dr_explicit_realign:
267d3070 6231 {
272c6793
RS
6232 tree ptr, bump;
6233 tree vs_minus_1;
6234
6235 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6236
6237 if (compute_in_loop)
6238 msq = vect_setup_realignment (first_stmt, gsi,
6239 &realignment_token,
6240 dr_explicit_realign,
6241 dataref_ptr, NULL);
6242
070ecdfd 6243 ptr = copy_ssa_name (dataref_ptr, NULL);
272c6793 6244 new_stmt = gimple_build_assign_with_ops
070ecdfd 6245 (BIT_AND_EXPR, ptr, dataref_ptr,
272c6793
RS
6246 build_int_cst
6247 (TREE_TYPE (dataref_ptr),
6248 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6250 data_ref
6251 = build2 (MEM_REF, vectype, ptr,
6252 build_int_cst (reference_alias_ptr_type
6253 (DR_REF (first_dr)), 0));
6254 vec_dest = vect_create_destination_var (scalar_dest,
6255 vectype);
6256 new_stmt = gimple_build_assign (vec_dest, data_ref);
6257 new_temp = make_ssa_name (vec_dest, new_stmt);
6258 gimple_assign_set_lhs (new_stmt, new_temp);
6259 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6260 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6262 msq = new_temp;
6263
6264 bump = size_binop (MULT_EXPR, vs_minus_1,
7b7b1813 6265 TYPE_SIZE_UNIT (elem_type));
272c6793
RS
6266 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6267 new_stmt = gimple_build_assign_with_ops
6268 (BIT_AND_EXPR, NULL_TREE, ptr,
6269 build_int_cst
6270 (TREE_TYPE (ptr),
6271 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 6272 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
6273 gimple_assign_set_lhs (new_stmt, ptr);
6274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6275 data_ref
6276 = build2 (MEM_REF, vectype, ptr,
6277 build_int_cst (reference_alias_ptr_type
6278 (DR_REF (first_dr)), 0));
6279 break;
267d3070 6280 }
272c6793 6281 case dr_explicit_realign_optimized:
070ecdfd 6282 new_temp = copy_ssa_name (dataref_ptr, NULL);
272c6793 6283 new_stmt = gimple_build_assign_with_ops
070ecdfd 6284 (BIT_AND_EXPR, new_temp, dataref_ptr,
272c6793
RS
6285 build_int_cst
6286 (TREE_TYPE (dataref_ptr),
6287 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6288 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6289 data_ref
6290 = build2 (MEM_REF, vectype, new_temp,
6291 build_int_cst (reference_alias_ptr_type
6292 (DR_REF (first_dr)), 0));
6293 break;
6294 default:
6295 gcc_unreachable ();
6296 }
ebfd146a 6297 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6298 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6299 new_temp = make_ssa_name (vec_dest, new_stmt);
6300 gimple_assign_set_lhs (new_stmt, new_temp);
6301 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6302
272c6793
RS
6303 /* 3. Handle explicit realignment if necessary/supported.
6304 Create in loop:
6305 vec_dest = realign_load (msq, lsq, realignment_token) */
6306 if (alignment_support_scheme == dr_explicit_realign_optimized
6307 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6308 {
272c6793
RS
6309 lsq = gimple_assign_lhs (new_stmt);
6310 if (!realignment_token)
6311 realignment_token = dataref_ptr;
6312 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6313 new_stmt
73804b12
RG
6314 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6315 vec_dest, msq, lsq,
6316 realignment_token);
272c6793
RS
6317 new_temp = make_ssa_name (vec_dest, new_stmt);
6318 gimple_assign_set_lhs (new_stmt, new_temp);
6319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6320
6321 if (alignment_support_scheme == dr_explicit_realign_optimized)
6322 {
6323 gcc_assert (phi);
6324 if (i == vec_num - 1 && j == ncopies - 1)
6325 add_phi_arg (phi, lsq,
6326 loop_latch_edge (containing_loop),
9e227d60 6327 UNKNOWN_LOCATION);
272c6793
RS
6328 msq = lsq;
6329 }
ebfd146a 6330 }
ebfd146a 6331
59fd17e3
RB
6332 /* 4. Handle invariant-load. */
6333 if (inv_p && !bb_vinfo)
6334 {
6335 gimple_stmt_iterator gsi2 = *gsi;
6336 gcc_assert (!grouped_load);
6337 gsi_next (&gsi2);
6338 new_temp = vect_init_vector (stmt, scalar_dest,
6339 vectype, &gsi2);
6340 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6341 }
6342
272c6793
RS
6343 if (negative)
6344 {
aec7ae7d
JJ
6345 tree perm_mask = perm_mask_for_reverse (vectype);
6346 new_temp = permute_vec_elements (new_temp, new_temp,
6347 perm_mask, stmt, gsi);
ebfd146a
IR
6348 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6349 }
267d3070 6350
272c6793 6351 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6352 vect_transform_grouped_load (). */
6353 if (grouped_load || slp_perm)
9771b263 6354 dr_chain.quick_push (new_temp);
267d3070 6355
272c6793
RS
6356 /* Store vector loads in the corresponding SLP_NODE. */
6357 if (slp && !slp_perm)
9771b263 6358 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6359 }
a64b9c26
RB
6360 /* Bump the vector pointer to account for a gap. */
6361 if (slp && group_gap != 0)
6362 {
6363 tree bump = size_binop (MULT_EXPR,
6364 TYPE_SIZE_UNIT (elem_type),
6365 size_int (group_gap));
6366 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6367 stmt, bump);
6368 }
ebfd146a
IR
6369 }
6370
6371 if (slp && !slp_perm)
6372 continue;
6373
6374 if (slp_perm)
6375 {
01d8bf07 6376 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6377 slp_node_instance, false))
6378 {
9771b263 6379 dr_chain.release ();
ebfd146a
IR
6380 return false;
6381 }
6382 }
6383 else
6384 {
0d0293ac 6385 if (grouped_load)
ebfd146a 6386 {
272c6793 6387 if (!load_lanes_p)
0d0293ac 6388 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6389 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6390 }
6391 else
6392 {
6393 if (j == 0)
6394 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6395 else
6396 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6397 prev_stmt_info = vinfo_for_stmt (new_stmt);
6398 }
6399 }
9771b263 6400 dr_chain.release ();
ebfd146a
IR
6401 }
6402
ebfd146a
IR
6403 return true;
6404}
6405
6406/* Function vect_is_simple_cond.
b8698a0f 6407
ebfd146a
IR
6408 Input:
6409 LOOP - the loop that is being vectorized.
6410 COND - Condition that is checked for simple use.
6411
e9e1d143
RG
6412 Output:
6413 *COMP_VECTYPE - the vector type for the comparison.
6414
ebfd146a
IR
6415 Returns whether a COND can be vectorized. Checks whether
6416 condition operands are supportable using vec_is_simple_use. */
6417
87aab9b2 6418static bool
24ee1384
IR
6419vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6420 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6421{
6422 tree lhs, rhs;
6423 tree def;
6424 enum vect_def_type dt;
e9e1d143 6425 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6426
6427 if (!COMPARISON_CLASS_P (cond))
6428 return false;
6429
6430 lhs = TREE_OPERAND (cond, 0);
6431 rhs = TREE_OPERAND (cond, 1);
6432
6433 if (TREE_CODE (lhs) == SSA_NAME)
6434 {
6435 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6436 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6437 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6438 return false;
6439 }
6440 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6441 && TREE_CODE (lhs) != FIXED_CST)
6442 return false;
6443
6444 if (TREE_CODE (rhs) == SSA_NAME)
6445 {
6446 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6447 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6448 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6449 return false;
6450 }
f7e531cf 6451 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6452 && TREE_CODE (rhs) != FIXED_CST)
6453 return false;
6454
e9e1d143 6455 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6456 return true;
6457}
6458
6459/* vectorizable_condition.
6460
b8698a0f
L
6461 Check if STMT is conditional modify expression that can be vectorized.
6462 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6463 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6464 at GSI.
6465
6466 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6467 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6468 else caluse if it is 2).
ebfd146a
IR
6469
6470 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6471
4bbe8262 6472bool
ebfd146a 6473vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
6474 gimple *vec_stmt, tree reduc_def, int reduc_index,
6475 slp_tree slp_node)
ebfd146a
IR
6476{
6477 tree scalar_dest = NULL_TREE;
6478 tree vec_dest = NULL_TREE;
ebfd146a
IR
6479 tree cond_expr, then_clause, else_clause;
6480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6481 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 6482 tree comp_vectype = NULL_TREE;
ff802fa1
IR
6483 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6484 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
6485 tree vec_compare, vec_cond_expr;
6486 tree new_temp;
6487 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 6488 tree def;
a855b1b1 6489 enum vect_def_type dt, dts[4];
ebfd146a 6490 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 6491 int ncopies;
ebfd146a 6492 enum tree_code code;
a855b1b1 6493 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
6494 int i, j;
6495 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
6496 vec<tree> vec_oprnds0 = vNULL;
6497 vec<tree> vec_oprnds1 = vNULL;
6498 vec<tree> vec_oprnds2 = vNULL;
6499 vec<tree> vec_oprnds3 = vNULL;
74946978 6500 tree vec_cmp_type;
b8698a0f 6501
f7e531cf
IR
6502 if (slp_node || PURE_SLP_STMT (stmt_info))
6503 ncopies = 1;
6504 else
6505 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 6506
ebfd146a 6507 gcc_assert (ncopies >= 1);
a855b1b1 6508 if (reduc_index && ncopies > 1)
ebfd146a
IR
6509 return false; /* FORNOW */
6510
f7e531cf
IR
6511 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6512 return false;
6513
6514 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6515 return false;
6516
4bbe8262
IR
6517 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6518 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6519 && reduc_def))
ebfd146a
IR
6520 return false;
6521
ebfd146a 6522 /* FORNOW: not yet supported. */
b8698a0f 6523 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6524 {
73fbfcad 6525 if (dump_enabled_p ())
78c60e3d 6526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6527 "value used after loop.\n");
ebfd146a
IR
6528 return false;
6529 }
6530
6531 /* Is vectorizable conditional operation? */
6532 if (!is_gimple_assign (stmt))
6533 return false;
6534
6535 code = gimple_assign_rhs_code (stmt);
6536
6537 if (code != COND_EXPR)
6538 return false;
6539
4e71066d
RG
6540 cond_expr = gimple_assign_rhs1 (stmt);
6541 then_clause = gimple_assign_rhs2 (stmt);
6542 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 6543
24ee1384
IR
6544 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6545 &comp_vectype)
e9e1d143 6546 || !comp_vectype)
ebfd146a
IR
6547 return false;
6548
6549 if (TREE_CODE (then_clause) == SSA_NAME)
6550 {
6551 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 6552 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6553 &then_def_stmt, &def, &dt))
6554 return false;
6555 }
b8698a0f 6556 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
6557 && TREE_CODE (then_clause) != REAL_CST
6558 && TREE_CODE (then_clause) != FIXED_CST)
6559 return false;
6560
6561 if (TREE_CODE (else_clause) == SSA_NAME)
6562 {
6563 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 6564 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
6565 &else_def_stmt, &def, &dt))
6566 return false;
6567 }
b8698a0f 6568 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
6569 && TREE_CODE (else_clause) != REAL_CST
6570 && TREE_CODE (else_clause) != FIXED_CST)
6571 return false;
6572
74946978
MP
6573 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6574 /* The result of a vector comparison should be signed type. */
6575 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6576 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6577 if (vec_cmp_type == NULL_TREE)
6578 return false;
784fb9b3 6579
b8698a0f 6580 if (!vec_stmt)
ebfd146a
IR
6581 {
6582 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 6583 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
6584 }
6585
f7e531cf
IR
6586 /* Transform. */
6587
6588 if (!slp_node)
6589 {
9771b263
DN
6590 vec_oprnds0.create (1);
6591 vec_oprnds1.create (1);
6592 vec_oprnds2.create (1);
6593 vec_oprnds3.create (1);
f7e531cf 6594 }
ebfd146a
IR
6595
6596 /* Handle def. */
6597 scalar_dest = gimple_assign_lhs (stmt);
6598 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6599
6600 /* Handle cond expr. */
a855b1b1
MM
6601 for (j = 0; j < ncopies; j++)
6602 {
f7e531cf 6603 gimple new_stmt = NULL;
a855b1b1
MM
6604 if (j == 0)
6605 {
f7e531cf
IR
6606 if (slp_node)
6607 {
07687835
TS
6608 stack_vec<tree, 4> ops;
6609 stack_vec<vec<tree>, 4> vec_defs;
9771b263 6610
9771b263
DN
6611 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6612 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6613 ops.safe_push (then_clause);
6614 ops.safe_push (else_clause);
f7e531cf 6615 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
6616 vec_oprnds3 = vec_defs.pop ();
6617 vec_oprnds2 = vec_defs.pop ();
6618 vec_oprnds1 = vec_defs.pop ();
6619 vec_oprnds0 = vec_defs.pop ();
f7e531cf 6620
9771b263
DN
6621 ops.release ();
6622 vec_defs.release ();
f7e531cf
IR
6623 }
6624 else
6625 {
6626 gimple gtemp;
6627 vec_cond_lhs =
a855b1b1
MM
6628 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6629 stmt, NULL);
24ee1384
IR
6630 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6631 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
6632
6633 vec_cond_rhs =
6634 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6635 stmt, NULL);
24ee1384
IR
6636 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6637 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
6638 if (reduc_index == 1)
6639 vec_then_clause = reduc_def;
6640 else
6641 {
6642 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6643 stmt, NULL);
24ee1384 6644 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
6645 NULL, &gtemp, &def, &dts[2]);
6646 }
6647 if (reduc_index == 2)
6648 vec_else_clause = reduc_def;
6649 else
6650 {
6651 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 6652 stmt, NULL);
24ee1384 6653 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 6654 NULL, &gtemp, &def, &dts[3]);
f7e531cf 6655 }
a855b1b1
MM
6656 }
6657 }
6658 else
6659 {
f7e531cf 6660 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 6661 vec_oprnds0.pop ());
f7e531cf 6662 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 6663 vec_oprnds1.pop ());
a855b1b1 6664 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 6665 vec_oprnds2.pop ());
a855b1b1 6666 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 6667 vec_oprnds3.pop ());
f7e531cf
IR
6668 }
6669
6670 if (!slp_node)
6671 {
9771b263
DN
6672 vec_oprnds0.quick_push (vec_cond_lhs);
6673 vec_oprnds1.quick_push (vec_cond_rhs);
6674 vec_oprnds2.quick_push (vec_then_clause);
6675 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
6676 }
6677
9dc3f7de 6678 /* Arguments are ready. Create the new vector stmt. */
9771b263 6679 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 6680 {
9771b263
DN
6681 vec_cond_rhs = vec_oprnds1[i];
6682 vec_then_clause = vec_oprnds2[i];
6683 vec_else_clause = vec_oprnds3[i];
a855b1b1 6684
784fb9b3
JJ
6685 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6686 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
6687 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6688 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 6689
f7e531cf
IR
6690 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6691 new_temp = make_ssa_name (vec_dest, new_stmt);
6692 gimple_assign_set_lhs (new_stmt, new_temp);
6693 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6694 if (slp_node)
9771b263 6695 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
6696 }
6697
6698 if (slp_node)
6699 continue;
6700
6701 if (j == 0)
6702 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6703 else
6704 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6705
6706 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 6707 }
b8698a0f 6708
9771b263
DN
6709 vec_oprnds0.release ();
6710 vec_oprnds1.release ();
6711 vec_oprnds2.release ();
6712 vec_oprnds3.release ();
f7e531cf 6713
ebfd146a
IR
6714 return true;
6715}
6716
6717
8644a673 6718/* Make sure the statement is vectorizable. */
ebfd146a
IR
6719
6720bool
a70d6342 6721vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 6722{
8644a673 6723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 6724 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 6725 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 6726 bool ok;
a70d6342 6727 tree scalar_type, vectype;
363477c0
JJ
6728 gimple pattern_stmt;
6729 gimple_seq pattern_def_seq;
ebfd146a 6730
73fbfcad 6731 if (dump_enabled_p ())
ebfd146a 6732 {
78c60e3d
SS
6733 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6734 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6735 dump_printf (MSG_NOTE, "\n");
8644a673 6736 }
ebfd146a 6737
1825a1f3 6738 if (gimple_has_volatile_ops (stmt))
b8698a0f 6739 {
73fbfcad 6740 if (dump_enabled_p ())
78c60e3d 6741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6742 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
6743
6744 return false;
6745 }
b8698a0f
L
6746
6747 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
6748 to include:
6749 - the COND_EXPR which is the loop exit condition
6750 - any LABEL_EXPRs in the loop
b8698a0f 6751 - computations that are used only for array indexing or loop control.
8644a673 6752 In basic blocks we only analyze statements that are a part of some SLP
83197f37 6753 instance, therefore, all the statements are relevant.
ebfd146a 6754
d092494c 6755 Pattern statement needs to be analyzed instead of the original statement
83197f37 6756 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
6757 statements. In basic blocks we are called from some SLP instance
6758 traversal, don't analyze pattern stmts instead, the pattern stmts
6759 already will be part of SLP instance. */
83197f37
IR
6760
6761 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 6762 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 6763 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6764 {
9d5e7640 6765 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 6766 && pattern_stmt
9d5e7640
IR
6767 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6768 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6769 {
83197f37 6770 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
6771 stmt = pattern_stmt;
6772 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 6773 if (dump_enabled_p ())
9d5e7640 6774 {
78c60e3d
SS
6775 dump_printf_loc (MSG_NOTE, vect_location,
6776 "==> examining pattern statement: ");
6777 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6778 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
6779 }
6780 }
6781 else
6782 {
73fbfcad 6783 if (dump_enabled_p ())
e645e942 6784 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 6785
9d5e7640
IR
6786 return true;
6787 }
8644a673 6788 }
83197f37 6789 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 6790 && node == NULL
83197f37
IR
6791 && pattern_stmt
6792 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6793 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6794 {
6795 /* Analyze PATTERN_STMT too. */
73fbfcad 6796 if (dump_enabled_p ())
83197f37 6797 {
78c60e3d
SS
6798 dump_printf_loc (MSG_NOTE, vect_location,
6799 "==> examining pattern statement: ");
6800 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6801 dump_printf (MSG_NOTE, "\n");
83197f37
IR
6802 }
6803
6804 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6805 return false;
6806 }
ebfd146a 6807
1107f3ae 6808 if (is_pattern_stmt_p (stmt_info)
079c527f 6809 && node == NULL
363477c0 6810 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 6811 {
363477c0 6812 gimple_stmt_iterator si;
1107f3ae 6813
363477c0
JJ
6814 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
6815 {
6816 gimple pattern_def_stmt = gsi_stmt (si);
6817 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
6818 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
6819 {
6820 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 6821 if (dump_enabled_p ())
363477c0 6822 {
78c60e3d
SS
6823 dump_printf_loc (MSG_NOTE, vect_location,
6824 "==> examining pattern def statement: ");
6825 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
e645e942 6826 dump_printf (MSG_NOTE, "\n");
363477c0 6827 }
1107f3ae 6828
363477c0
JJ
6829 if (!vect_analyze_stmt (pattern_def_stmt,
6830 need_to_vectorize, node))
6831 return false;
6832 }
6833 }
6834 }
1107f3ae 6835
8644a673
IR
6836 switch (STMT_VINFO_DEF_TYPE (stmt_info))
6837 {
6838 case vect_internal_def:
6839 break;
ebfd146a 6840
8644a673 6841 case vect_reduction_def:
7c5222ff 6842 case vect_nested_cycle:
a70d6342 6843 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 6844 || relevance == vect_used_in_outer_by_reduction
a70d6342 6845 || relevance == vect_unused_in_scope));
8644a673
IR
6846 break;
6847
6848 case vect_induction_def:
6849 case vect_constant_def:
6850 case vect_external_def:
6851 case vect_unknown_def_type:
6852 default:
6853 gcc_unreachable ();
6854 }
ebfd146a 6855
a70d6342
IR
6856 if (bb_vinfo)
6857 {
6858 gcc_assert (PURE_SLP_STMT (stmt_info));
6859
b690cc0f 6860 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 6861 if (dump_enabled_p ())
a70d6342 6862 {
78c60e3d
SS
6863 dump_printf_loc (MSG_NOTE, vect_location,
6864 "get vectype for scalar type: ");
6865 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 6866 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
6867 }
6868
6869 vectype = get_vectype_for_scalar_type (scalar_type);
6870 if (!vectype)
6871 {
73fbfcad 6872 if (dump_enabled_p ())
a70d6342 6873 {
78c60e3d
SS
6874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6875 "not SLPed: unsupported data-type ");
6876 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
6877 scalar_type);
e645e942 6878 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
6879 }
6880 return false;
6881 }
6882
73fbfcad 6883 if (dump_enabled_p ())
a70d6342 6884 {
78c60e3d
SS
6885 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6886 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 6887 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
6888 }
6889
6890 STMT_VINFO_VECTYPE (stmt_info) = vectype;
6891 }
6892
8644a673 6893 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 6894 {
8644a673 6895 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
6896 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
6897 || (is_gimple_call (stmt)
6898 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 6899 *need_to_vectorize = true;
ebfd146a
IR
6900 }
6901
8644a673 6902 ok = true;
b8698a0f 6903 if (!bb_vinfo
a70d6342
IR
6904 && (STMT_VINFO_RELEVANT_P (stmt_info)
6905 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
0136f8f0
AH
6906 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
6907 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 6908 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
6909 || vectorizable_operation (stmt, NULL, NULL, NULL)
6910 || vectorizable_assignment (stmt, NULL, NULL, NULL)
6911 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
190c2236 6912 || vectorizable_call (stmt, NULL, NULL, NULL)
8644a673 6913 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 6914 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f7e531cf 6915 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
a70d6342
IR
6916 else
6917 {
6918 if (bb_vinfo)
0136f8f0
AH
6919 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
6920 || vectorizable_conversion (stmt, NULL, NULL, node)
4a00c761 6921 || vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 6922 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
6923 || vectorizable_assignment (stmt, NULL, NULL, node)
6924 || vectorizable_load (stmt, NULL, NULL, node, NULL)
190c2236 6925 || vectorizable_call (stmt, NULL, NULL, node)
f7e531cf
IR
6926 || vectorizable_store (stmt, NULL, NULL, node)
6927 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
b8698a0f 6928 }
8644a673
IR
6929
6930 if (!ok)
ebfd146a 6931 {
73fbfcad 6932 if (dump_enabled_p ())
8644a673 6933 {
78c60e3d
SS
6934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6935 "not vectorized: relevant stmt not ");
6936 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
6937 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 6938 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 6939 }
b8698a0f 6940
ebfd146a
IR
6941 return false;
6942 }
6943
a70d6342
IR
6944 if (bb_vinfo)
6945 return true;
6946
8644a673
IR
6947 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
6948 need extra handling, except for vectorizable reductions. */
6949 if (STMT_VINFO_LIVE_P (stmt_info)
6950 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6951 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 6952
8644a673 6953 if (!ok)
ebfd146a 6954 {
73fbfcad 6955 if (dump_enabled_p ())
8644a673 6956 {
78c60e3d
SS
6957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6958 "not vectorized: live stmt not ");
6959 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
6960 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 6961 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 6962 }
b8698a0f 6963
8644a673 6964 return false;
ebfd146a
IR
6965 }
6966
ebfd146a
IR
6967 return true;
6968}
6969
6970
6971/* Function vect_transform_stmt.
6972
6973 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6974
6975bool
6976vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 6977 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
6978 slp_instance slp_node_instance)
6979{
6980 bool is_store = false;
6981 gimple vec_stmt = NULL;
6982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 6983 bool done;
ebfd146a
IR
6984
6985 switch (STMT_VINFO_TYPE (stmt_info))
6986 {
6987 case type_demotion_vec_info_type:
ebfd146a 6988 case type_promotion_vec_info_type:
ebfd146a
IR
6989 case type_conversion_vec_info_type:
6990 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
6991 gcc_assert (done);
6992 break;
6993
6994 case induc_vec_info_type:
6995 gcc_assert (!slp_node);
6996 done = vectorizable_induction (stmt, gsi, &vec_stmt);
6997 gcc_assert (done);
6998 break;
6999
9dc3f7de
IR
7000 case shift_vec_info_type:
7001 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7002 gcc_assert (done);
7003 break;
7004
ebfd146a
IR
7005 case op_vec_info_type:
7006 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7007 gcc_assert (done);
7008 break;
7009
7010 case assignment_vec_info_type:
7011 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7012 gcc_assert (done);
7013 break;
7014
7015 case load_vec_info_type:
b8698a0f 7016 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7017 slp_node_instance);
7018 gcc_assert (done);
7019 break;
7020
7021 case store_vec_info_type:
7022 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7023 gcc_assert (done);
0d0293ac 7024 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7025 {
7026 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7027 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7028 one are skipped, and there vec_stmt_info shouldn't be freed
7029 meanwhile. */
0d0293ac 7030 *grouped_store = true;
ebfd146a
IR
7031 if (STMT_VINFO_VEC_STMT (stmt_info))
7032 is_store = true;
7033 }
7034 else
7035 is_store = true;
7036 break;
7037
7038 case condition_vec_info_type:
f7e531cf 7039 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7040 gcc_assert (done);
7041 break;
7042
7043 case call_vec_info_type:
190c2236 7044 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7045 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7046 if (is_gimple_call (stmt)
7047 && gimple_call_internal_p (stmt)
7048 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7049 is_store = true;
ebfd146a
IR
7050 break;
7051
0136f8f0
AH
7052 case call_simd_clone_vec_info_type:
7053 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7054 stmt = gsi_stmt (*gsi);
7055 break;
7056
ebfd146a 7057 case reduc_vec_info_type:
b5aeb3bb 7058 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7059 gcc_assert (done);
7060 break;
7061
7062 default:
7063 if (!STMT_VINFO_LIVE_P (stmt_info))
7064 {
73fbfcad 7065 if (dump_enabled_p ())
78c60e3d 7066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7067 "stmt not supported.\n");
ebfd146a
IR
7068 gcc_unreachable ();
7069 }
7070 }
7071
7072 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7073 is being vectorized, but outside the immediately enclosing loop. */
7074 if (vec_stmt
a70d6342
IR
7075 && STMT_VINFO_LOOP_VINFO (stmt_info)
7076 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7077 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7078 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7079 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7080 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7081 vect_used_in_outer_by_reduction))
ebfd146a 7082 {
a70d6342
IR
7083 struct loop *innerloop = LOOP_VINFO_LOOP (
7084 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7085 imm_use_iterator imm_iter;
7086 use_operand_p use_p;
7087 tree scalar_dest;
7088 gimple exit_phi;
7089
73fbfcad 7090 if (dump_enabled_p ())
78c60e3d 7091 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7092 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7093
7094 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7095 (to be used when vectorizing outer-loop stmts that use the DEF of
7096 STMT). */
7097 if (gimple_code (stmt) == GIMPLE_PHI)
7098 scalar_dest = PHI_RESULT (stmt);
7099 else
7100 scalar_dest = gimple_assign_lhs (stmt);
7101
7102 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7103 {
7104 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7105 {
7106 exit_phi = USE_STMT (use_p);
7107 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7108 }
7109 }
7110 }
7111
7112 /* Handle stmts whose DEF is used outside the loop-nest that is
7113 being vectorized. */
7114 if (STMT_VINFO_LIVE_P (stmt_info)
7115 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7116 {
7117 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7118 gcc_assert (done);
7119 }
7120
7121 if (vec_stmt)
83197f37 7122 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7123
b8698a0f 7124 return is_store;
ebfd146a
IR
7125}
7126
7127
b8698a0f 7128/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7129 stmt_vec_info. */
7130
7131void
7132vect_remove_stores (gimple first_stmt)
7133{
7134 gimple next = first_stmt;
7135 gimple tmp;
7136 gimple_stmt_iterator next_si;
7137
7138 while (next)
7139 {
78048b1c
JJ
7140 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7141
7142 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7143 if (is_pattern_stmt_p (stmt_info))
7144 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7145 /* Free the attached stmt_vec_info and remove the stmt. */
7146 next_si = gsi_for_stmt (next);
3d3f2249 7147 unlink_stmt_vdef (next);
ebfd146a 7148 gsi_remove (&next_si, true);
3d3f2249 7149 release_defs (next);
ebfd146a
IR
7150 free_stmt_vec_info (next);
7151 next = tmp;
7152 }
7153}
7154
7155
7156/* Function new_stmt_vec_info.
7157
7158 Create and initialize a new stmt_vec_info struct for STMT. */
7159
7160stmt_vec_info
b8698a0f 7161new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7162 bb_vec_info bb_vinfo)
ebfd146a
IR
7163{
7164 stmt_vec_info res;
7165 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7166
7167 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7168 STMT_VINFO_STMT (res) = stmt;
7169 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7170 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7171 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7172 STMT_VINFO_LIVE_P (res) = false;
7173 STMT_VINFO_VECTYPE (res) = NULL;
7174 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7175 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7176 STMT_VINFO_IN_PATTERN_P (res) = false;
7177 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7178 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7179 STMT_VINFO_DATA_REF (res) = NULL;
7180
7181 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7182 STMT_VINFO_DR_OFFSET (res) = NULL;
7183 STMT_VINFO_DR_INIT (res) = NULL;
7184 STMT_VINFO_DR_STEP (res) = NULL;
7185 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7186
7187 if (gimple_code (stmt) == GIMPLE_PHI
7188 && is_loop_header_bb_p (gimple_bb (stmt)))
7189 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7190 else
8644a673
IR
7191 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7192
9771b263 7193 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7194 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7195 GROUP_FIRST_ELEMENT (res) = NULL;
7196 GROUP_NEXT_ELEMENT (res) = NULL;
7197 GROUP_SIZE (res) = 0;
7198 GROUP_STORE_COUNT (res) = 0;
7199 GROUP_GAP (res) = 0;
7200 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7201
7202 return res;
7203}
7204
7205
7206/* Create a hash table for stmt_vec_info. */
7207
7208void
7209init_stmt_vec_info_vec (void)
7210{
9771b263
DN
7211 gcc_assert (!stmt_vec_info_vec.exists ());
7212 stmt_vec_info_vec.create (50);
ebfd146a
IR
7213}
7214
7215
7216/* Free hash table for stmt_vec_info. */
7217
7218void
7219free_stmt_vec_info_vec (void)
7220{
93675444
JJ
7221 unsigned int i;
7222 vec_void_p info;
7223 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7224 if (info != NULL)
7225 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7226 gcc_assert (stmt_vec_info_vec.exists ());
7227 stmt_vec_info_vec.release ();
ebfd146a
IR
7228}
7229
7230
7231/* Free stmt vectorization related info. */
7232
7233void
7234free_stmt_vec_info (gimple stmt)
7235{
7236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7237
7238 if (!stmt_info)
7239 return;
7240
78048b1c
JJ
7241 /* Check if this statement has a related "pattern stmt"
7242 (introduced by the vectorizer during the pattern recognition
7243 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7244 too. */
7245 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7246 {
7247 stmt_vec_info patt_info
7248 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7249 if (patt_info)
7250 {
363477c0
JJ
7251 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7252 if (seq)
7253 {
7254 gimple_stmt_iterator si;
7255 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7256 free_stmt_vec_info (gsi_stmt (si));
7257 }
78048b1c
JJ
7258 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
7259 }
7260 }
7261
9771b263 7262 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
ebfd146a
IR
7263 set_vinfo_for_stmt (stmt, NULL);
7264 free (stmt_info);
7265}
7266
7267
bb67d9c7 7268/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7269
bb67d9c7 7270 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7271 by the target. */
7272
bb67d9c7
RG
7273static tree
7274get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
7275{
7276 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 7277 enum machine_mode simd_mode;
2f816591 7278 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7279 int nunits;
7280 tree vectype;
7281
cc4b5170 7282 if (nbytes == 0)
ebfd146a
IR
7283 return NULL_TREE;
7284
48f2e373
RB
7285 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7286 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7287 return NULL_TREE;
7288
7b7b1813
RG
7289 /* For vector types of elements whose mode precision doesn't
7290 match their types precision we use a element type of mode
7291 precision. The vectorization routines will have to make sure
48f2e373
RB
7292 they support the proper result truncation/extension.
7293 We also make sure to build vector types with INTEGER_TYPE
7294 component type only. */
6d7971b8 7295 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7296 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7297 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7298 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7299 TYPE_UNSIGNED (scalar_type));
6d7971b8 7300
ccbf5bb4
RG
7301 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7302 When the component mode passes the above test simply use a type
7303 corresponding to that mode. The theory is that any use that
7304 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7305 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7306 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7307 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7308
7309 /* We can't build a vector type of elements with alignment bigger than
7310 their size. */
dfc2e2ac 7311 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7312 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7313 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7314
dfc2e2ac
RB
7315 /* If we felt back to using the mode fail if there was
7316 no scalar type for it. */
7317 if (scalar_type == NULL_TREE)
7318 return NULL_TREE;
7319
bb67d9c7
RG
7320 /* If no size was supplied use the mode the target prefers. Otherwise
7321 lookup a vector mode of the specified size. */
7322 if (size == 0)
7323 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7324 else
7325 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7326 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7327 if (nunits <= 1)
7328 return NULL_TREE;
ebfd146a
IR
7329
7330 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7331
7332 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7333 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7334 return NULL_TREE;
ebfd146a
IR
7335
7336 return vectype;
7337}
7338
bb67d9c7
RG
7339unsigned int current_vector_size;
7340
7341/* Function get_vectype_for_scalar_type.
7342
7343 Returns the vector type corresponding to SCALAR_TYPE as supported
7344 by the target. */
7345
7346tree
7347get_vectype_for_scalar_type (tree scalar_type)
7348{
7349 tree vectype;
7350 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7351 current_vector_size);
7352 if (vectype
7353 && current_vector_size == 0)
7354 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7355 return vectype;
7356}
7357
b690cc0f
RG
7358/* Function get_same_sized_vectype
7359
7360 Returns a vector type corresponding to SCALAR_TYPE of size
7361 VECTOR_TYPE if supported by the target. */
7362
7363tree
bb67d9c7 7364get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7365{
bb67d9c7
RG
7366 return get_vectype_for_scalar_type_and_size
7367 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7368}
7369
ebfd146a
IR
7370/* Function vect_is_simple_use.
7371
7372 Input:
a70d6342
IR
7373 LOOP_VINFO - the vect info of the loop that is being vectorized.
7374 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7375 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7376 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7377
7378 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7379 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7380 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7381 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7382 is the case in reduction/induction computations).
7383 For basic blocks, supportable operands are constants and bb invariants.
7384 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7385
7386bool
24ee1384 7387vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7388 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7389 tree *def, enum vect_def_type *dt)
b8698a0f 7390{
ebfd146a
IR
7391 basic_block bb;
7392 stmt_vec_info stmt_vinfo;
a70d6342 7393 struct loop *loop = NULL;
b8698a0f 7394
a70d6342
IR
7395 if (loop_vinfo)
7396 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
7397
7398 *def_stmt = NULL;
7399 *def = NULL_TREE;
b8698a0f 7400
73fbfcad 7401 if (dump_enabled_p ())
ebfd146a 7402 {
78c60e3d
SS
7403 dump_printf_loc (MSG_NOTE, vect_location,
7404 "vect_is_simple_use: operand ");
7405 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7406 dump_printf (MSG_NOTE, "\n");
ebfd146a 7407 }
b8698a0f 7408
b758f602 7409 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7410 {
7411 *dt = vect_constant_def;
7412 return true;
7413 }
b8698a0f 7414
ebfd146a
IR
7415 if (is_gimple_min_invariant (operand))
7416 {
7417 *def = operand;
8644a673 7418 *dt = vect_external_def;
ebfd146a
IR
7419 return true;
7420 }
7421
7422 if (TREE_CODE (operand) == PAREN_EXPR)
7423 {
73fbfcad 7424 if (dump_enabled_p ())
e645e942 7425 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
ebfd146a
IR
7426 operand = TREE_OPERAND (operand, 0);
7427 }
b8698a0f 7428
ebfd146a
IR
7429 if (TREE_CODE (operand) != SSA_NAME)
7430 {
73fbfcad 7431 if (dump_enabled_p ())
78c60e3d 7432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7433 "not ssa-name.\n");
ebfd146a
IR
7434 return false;
7435 }
b8698a0f 7436
ebfd146a
IR
7437 *def_stmt = SSA_NAME_DEF_STMT (operand);
7438 if (*def_stmt == NULL)
7439 {
73fbfcad 7440 if (dump_enabled_p ())
78c60e3d 7441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7442 "no def_stmt.\n");
ebfd146a
IR
7443 return false;
7444 }
7445
73fbfcad 7446 if (dump_enabled_p ())
ebfd146a 7447 {
78c60e3d
SS
7448 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
e645e942 7450 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
7451 }
7452
8644a673 7453 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
7454 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7455 if (gimple_nop_p (*def_stmt))
7456 {
7457 *def = operand;
8644a673 7458 *dt = vect_external_def;
ebfd146a
IR
7459 return true;
7460 }
7461
7462 bb = gimple_bb (*def_stmt);
a70d6342
IR
7463
7464 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7465 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 7466 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 7467 *dt = vect_external_def;
ebfd146a
IR
7468 else
7469 {
7470 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7471 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7472 }
7473
24ee1384
IR
7474 if (*dt == vect_unknown_def_type
7475 || (stmt
7476 && *dt == vect_double_reduction_def
7477 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 7478 {
73fbfcad 7479 if (dump_enabled_p ())
78c60e3d 7480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7481 "Unsupported pattern.\n");
ebfd146a
IR
7482 return false;
7483 }
7484
73fbfcad 7485 if (dump_enabled_p ())
e645e942 7486 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
ebfd146a
IR
7487
7488 switch (gimple_code (*def_stmt))
7489 {
7490 case GIMPLE_PHI:
7491 *def = gimple_phi_result (*def_stmt);
7492 break;
7493
7494 case GIMPLE_ASSIGN:
7495 *def = gimple_assign_lhs (*def_stmt);
7496 break;
7497
7498 case GIMPLE_CALL:
7499 *def = gimple_call_lhs (*def_stmt);
7500 if (*def != NULL)
7501 break;
7502 /* FALLTHRU */
7503 default:
73fbfcad 7504 if (dump_enabled_p ())
78c60e3d 7505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7506 "unsupported defining stmt:\n");
ebfd146a
IR
7507 return false;
7508 }
7509
7510 return true;
7511}
7512
b690cc0f
RG
7513/* Function vect_is_simple_use_1.
7514
7515 Same as vect_is_simple_use_1 but also determines the vector operand
7516 type of OPERAND and stores it to *VECTYPE. If the definition of
7517 OPERAND is vect_uninitialized_def, vect_constant_def or
7518 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7519 is responsible to compute the best suited vector type for the
7520 scalar operand. */
7521
7522bool
24ee1384 7523vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
7524 bb_vec_info bb_vinfo, gimple *def_stmt,
7525 tree *def, enum vect_def_type *dt, tree *vectype)
7526{
24ee1384
IR
7527 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7528 def, dt))
b690cc0f
RG
7529 return false;
7530
7531 /* Now get a vector type if the def is internal, otherwise supply
7532 NULL_TREE and leave it up to the caller to figure out a proper
7533 type for the use stmt. */
7534 if (*dt == vect_internal_def
7535 || *dt == vect_induction_def
7536 || *dt == vect_reduction_def
7537 || *dt == vect_double_reduction_def
7538 || *dt == vect_nested_cycle)
7539 {
7540 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
7541
7542 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7543 && !STMT_VINFO_RELEVANT (stmt_info)
7544 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 7545 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 7546
b690cc0f
RG
7547 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7548 gcc_assert (*vectype != NULL_TREE);
7549 }
7550 else if (*dt == vect_uninitialized_def
7551 || *dt == vect_constant_def
7552 || *dt == vect_external_def)
7553 *vectype = NULL_TREE;
7554 else
7555 gcc_unreachable ();
7556
7557 return true;
7558}
7559
ebfd146a
IR
7560
7561/* Function supportable_widening_operation
7562
b8698a0f
L
7563 Check whether an operation represented by the code CODE is a
7564 widening operation that is supported by the target platform in
b690cc0f
RG
7565 vector form (i.e., when operating on arguments of type VECTYPE_IN
7566 producing a result of type VECTYPE_OUT).
b8698a0f 7567
ebfd146a
IR
7568 Widening operations we currently support are NOP (CONVERT), FLOAT
7569 and WIDEN_MULT. This function checks if these operations are supported
7570 by the target platform either directly (via vector tree-codes), or via
7571 target builtins.
7572
7573 Output:
b8698a0f
L
7574 - CODE1 and CODE2 are codes of vector operations to be used when
7575 vectorizing the operation, if available.
ebfd146a
IR
7576 - MULTI_STEP_CVT determines the number of required intermediate steps in
7577 case of multi-step conversion (like char->short->int - in that case
7578 MULTI_STEP_CVT will be 1).
b8698a0f
L
7579 - INTERM_TYPES contains the intermediate type required to perform the
7580 widening operation (short in the above example). */
ebfd146a
IR
7581
7582bool
b690cc0f
RG
7583supportable_widening_operation (enum tree_code code, gimple stmt,
7584 tree vectype_out, tree vectype_in,
ebfd146a
IR
7585 enum tree_code *code1, enum tree_code *code2,
7586 int *multi_step_cvt,
9771b263 7587 vec<tree> *interm_types)
ebfd146a
IR
7588{
7589 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7590 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 7591 struct loop *vect_loop = NULL;
ebfd146a 7592 enum machine_mode vec_mode;
81f40b79 7593 enum insn_code icode1, icode2;
ebfd146a 7594 optab optab1, optab2;
b690cc0f
RG
7595 tree vectype = vectype_in;
7596 tree wide_vectype = vectype_out;
ebfd146a 7597 enum tree_code c1, c2;
4a00c761
JJ
7598 int i;
7599 tree prev_type, intermediate_type;
7600 enum machine_mode intermediate_mode, prev_mode;
7601 optab optab3, optab4;
ebfd146a 7602
4a00c761 7603 *multi_step_cvt = 0;
4ef69dfc
IR
7604 if (loop_info)
7605 vect_loop = LOOP_VINFO_LOOP (loop_info);
7606
ebfd146a
IR
7607 switch (code)
7608 {
7609 case WIDEN_MULT_EXPR:
6ae6116f
RH
7610 /* The result of a vectorized widening operation usually requires
7611 two vectors (because the widened results do not fit into one vector).
7612 The generated vector results would normally be expected to be
7613 generated in the same order as in the original scalar computation,
7614 i.e. if 8 results are generated in each vector iteration, they are
7615 to be organized as follows:
7616 vect1: [res1,res2,res3,res4],
7617 vect2: [res5,res6,res7,res8].
7618
7619 However, in the special case that the result of the widening
7620 operation is used in a reduction computation only, the order doesn't
7621 matter (because when vectorizing a reduction we change the order of
7622 the computation). Some targets can take advantage of this and
7623 generate more efficient code. For example, targets like Altivec,
7624 that support widen_mult using a sequence of {mult_even,mult_odd}
7625 generate the following vectors:
7626 vect1: [res1,res3,res5,res7],
7627 vect2: [res2,res4,res6,res8].
7628
7629 When vectorizing outer-loops, we execute the inner-loop sequentially
7630 (each vectorized inner-loop iteration contributes to VF outer-loop
7631 iterations in parallel). We therefore don't allow to change the
7632 order of the computation in the inner-loop during outer-loop
7633 vectorization. */
7634 /* TODO: Another case in which order doesn't *really* matter is when we
7635 widen and then contract again, e.g. (short)((int)x * y >> 8).
7636 Normally, pack_trunc performs an even/odd permute, whereas the
7637 repack from an even/odd expansion would be an interleave, which
7638 would be significantly simpler for e.g. AVX2. */
7639 /* In any case, in order to avoid duplicating the code below, recurse
7640 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7641 are properly set up for the caller. If we fail, we'll continue with
7642 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7643 if (vect_loop
7644 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7645 && !nested_in_vect_loop_p (vect_loop, stmt)
7646 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7647 stmt, vectype_out, vectype_in,
a86ec597
RH
7648 code1, code2, multi_step_cvt,
7649 interm_types))
6ae6116f 7650 return true;
4a00c761
JJ
7651 c1 = VEC_WIDEN_MULT_LO_EXPR;
7652 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
7653 break;
7654
6ae6116f
RH
7655 case VEC_WIDEN_MULT_EVEN_EXPR:
7656 /* Support the recursion induced just above. */
7657 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7658 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7659 break;
7660
36ba4aae 7661 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
7662 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7663 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
7664 break;
7665
ebfd146a 7666 CASE_CONVERT:
4a00c761
JJ
7667 c1 = VEC_UNPACK_LO_EXPR;
7668 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
7669 break;
7670
7671 case FLOAT_EXPR:
4a00c761
JJ
7672 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7673 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
7674 break;
7675
7676 case FIX_TRUNC_EXPR:
7677 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7678 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7679 computing the operation. */
7680 return false;
7681
7682 default:
7683 gcc_unreachable ();
7684 }
7685
6ae6116f 7686 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
7687 {
7688 enum tree_code ctmp = c1;
7689 c1 = c2;
7690 c2 = ctmp;
7691 }
7692
ebfd146a
IR
7693 if (code == FIX_TRUNC_EXPR)
7694 {
7695 /* The signedness is determined from output operand. */
b690cc0f
RG
7696 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7697 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
7698 }
7699 else
7700 {
7701 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7702 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7703 }
7704
7705 if (!optab1 || !optab2)
7706 return false;
7707
7708 vec_mode = TYPE_MODE (vectype);
947131ba
RS
7709 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7710 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
7711 return false;
7712
4a00c761
JJ
7713 *code1 = c1;
7714 *code2 = c2;
7715
7716 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7717 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7718 return true;
7719
b8698a0f 7720 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 7721 types. */
ebfd146a 7722
4a00c761
JJ
7723 prev_type = vectype;
7724 prev_mode = vec_mode;
b8698a0f 7725
4a00c761
JJ
7726 if (!CONVERT_EXPR_CODE_P (code))
7727 return false;
b8698a0f 7728
4a00c761
JJ
7729 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7730 intermediate steps in promotion sequence. We try
7731 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7732 not. */
9771b263 7733 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
7734 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7735 {
7736 intermediate_mode = insn_data[icode1].operand[0].mode;
7737 intermediate_type
7738 = lang_hooks.types.type_for_mode (intermediate_mode,
7739 TYPE_UNSIGNED (prev_type));
7740 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7741 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7742
7743 if (!optab3 || !optab4
7744 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7745 || insn_data[icode1].operand[0].mode != intermediate_mode
7746 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7747 || insn_data[icode2].operand[0].mode != intermediate_mode
7748 || ((icode1 = optab_handler (optab3, intermediate_mode))
7749 == CODE_FOR_nothing)
7750 || ((icode2 = optab_handler (optab4, intermediate_mode))
7751 == CODE_FOR_nothing))
7752 break;
ebfd146a 7753
9771b263 7754 interm_types->quick_push (intermediate_type);
4a00c761
JJ
7755 (*multi_step_cvt)++;
7756
7757 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7758 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7759 return true;
7760
7761 prev_type = intermediate_type;
7762 prev_mode = intermediate_mode;
ebfd146a
IR
7763 }
7764
9771b263 7765 interm_types->release ();
4a00c761 7766 return false;
ebfd146a
IR
7767}
7768
7769
7770/* Function supportable_narrowing_operation
7771
b8698a0f
L
7772 Check whether an operation represented by the code CODE is a
7773 narrowing operation that is supported by the target platform in
b690cc0f
RG
7774 vector form (i.e., when operating on arguments of type VECTYPE_IN
7775 and producing a result of type VECTYPE_OUT).
b8698a0f 7776
ebfd146a 7777 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 7778 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
7779 the target platform directly via vector tree-codes.
7780
7781 Output:
b8698a0f
L
7782 - CODE1 is the code of a vector operation to be used when
7783 vectorizing the operation, if available.
ebfd146a
IR
7784 - MULTI_STEP_CVT determines the number of required intermediate steps in
7785 case of multi-step conversion (like int->short->char - in that case
7786 MULTI_STEP_CVT will be 1).
7787 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 7788 narrowing operation (short in the above example). */
ebfd146a
IR
7789
7790bool
7791supportable_narrowing_operation (enum tree_code code,
b690cc0f 7792 tree vectype_out, tree vectype_in,
ebfd146a 7793 enum tree_code *code1, int *multi_step_cvt,
9771b263 7794 vec<tree> *interm_types)
ebfd146a
IR
7795{
7796 enum machine_mode vec_mode;
7797 enum insn_code icode1;
7798 optab optab1, interm_optab;
b690cc0f
RG
7799 tree vectype = vectype_in;
7800 tree narrow_vectype = vectype_out;
ebfd146a 7801 enum tree_code c1;
4a00c761
JJ
7802 tree intermediate_type;
7803 enum machine_mode intermediate_mode, prev_mode;
ebfd146a 7804 int i;
4a00c761 7805 bool uns;
ebfd146a 7806
4a00c761 7807 *multi_step_cvt = 0;
ebfd146a
IR
7808 switch (code)
7809 {
7810 CASE_CONVERT:
7811 c1 = VEC_PACK_TRUNC_EXPR;
7812 break;
7813
7814 case FIX_TRUNC_EXPR:
7815 c1 = VEC_PACK_FIX_TRUNC_EXPR;
7816 break;
7817
7818 case FLOAT_EXPR:
7819 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
7820 tree code and optabs used for computing the operation. */
7821 return false;
7822
7823 default:
7824 gcc_unreachable ();
7825 }
7826
7827 if (code == FIX_TRUNC_EXPR)
7828 /* The signedness is determined from output operand. */
b690cc0f 7829 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
7830 else
7831 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7832
7833 if (!optab1)
7834 return false;
7835
7836 vec_mode = TYPE_MODE (vectype);
947131ba 7837 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
7838 return false;
7839
4a00c761
JJ
7840 *code1 = c1;
7841
7842 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
7843 return true;
7844
ebfd146a
IR
7845 /* Check if it's a multi-step conversion that can be done using intermediate
7846 types. */
4a00c761
JJ
7847 prev_mode = vec_mode;
7848 if (code == FIX_TRUNC_EXPR)
7849 uns = TYPE_UNSIGNED (vectype_out);
7850 else
7851 uns = TYPE_UNSIGNED (vectype);
7852
7853 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
7854 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
7855 costly than signed. */
7856 if (code == FIX_TRUNC_EXPR && uns)
7857 {
7858 enum insn_code icode2;
7859
7860 intermediate_type
7861 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
7862 interm_optab
7863 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 7864 if (interm_optab != unknown_optab
4a00c761
JJ
7865 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
7866 && insn_data[icode1].operand[0].mode
7867 == insn_data[icode2].operand[0].mode)
7868 {
7869 uns = false;
7870 optab1 = interm_optab;
7871 icode1 = icode2;
7872 }
7873 }
ebfd146a 7874
4a00c761
JJ
7875 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7876 intermediate steps in promotion sequence. We try
7877 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 7878 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
7879 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7880 {
7881 intermediate_mode = insn_data[icode1].operand[0].mode;
7882 intermediate_type
7883 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
7884 interm_optab
7885 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
7886 optab_default);
7887 if (!interm_optab
7888 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
7889 || insn_data[icode1].operand[0].mode != intermediate_mode
7890 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
7891 == CODE_FOR_nothing))
7892 break;
7893
9771b263 7894 interm_types->quick_push (intermediate_type);
4a00c761
JJ
7895 (*multi_step_cvt)++;
7896
7897 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
7898 return true;
7899
7900 prev_mode = intermediate_mode;
7901 optab1 = interm_optab;
ebfd146a
IR
7902 }
7903
9771b263 7904 interm_types->release ();
4a00c761 7905 return false;
ebfd146a 7906}