]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Daily bump.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
40e23961
MC
27#include "alias.h"
28#include "symtab.h"
ebfd146a 29#include "tree.h"
40e23961 30#include "fold-const.h"
d8a2d370 31#include "stor-layout.h"
ebfd146a 32#include "target.h"
60393bbc 33#include "predict.h"
60393bbc 34#include "hard-reg-set.h"
60393bbc
AM
35#include "function.h"
36#include "dominance.h"
37#include "cfg.h"
ebfd146a 38#include "basic-block.h"
cf835838 39#include "gimple-pretty-print.h"
2fb9a547
AM
40#include "tree-ssa-alias.h"
41#include "internal-fn.h"
42#include "tree-eh.h"
43#include "gimple-expr.h"
18f429e2 44#include "gimple.h"
45b0be94 45#include "gimplify.h"
5be5c238 46#include "gimple-iterator.h"
18f429e2 47#include "gimplify-me.h"
442b4905
AM
48#include "gimple-ssa.h"
49#include "tree-cfg.h"
50#include "tree-phinodes.h"
51#include "ssa-iterators.h"
d8a2d370 52#include "stringpool.h"
442b4905 53#include "tree-ssanames.h"
e28030cf 54#include "tree-ssa-loop-manip.h"
ebfd146a 55#include "cfgloop.h"
0136f8f0
AH
56#include "tree-ssa-loop.h"
57#include "tree-scalar-evolution.h"
36566b39
PK
58#include "rtl.h"
59#include "flags.h"
36566b39
PK
60#include "insn-config.h"
61#include "expmed.h"
62#include "dojump.h"
63#include "explow.h"
64#include "calls.h"
65#include "emit-rtl.h"
66#include "varasm.h"
67#include "stmt.h"
ebfd146a 68#include "expr.h"
7ee2468b 69#include "recog.h" /* FIXME: for insn_data */
b0710fe1 70#include "insn-codes.h"
ebfd146a 71#include "optabs.h"
718f9c0f 72#include "diagnostic-core.h"
ebfd146a 73#include "tree-vectorizer.h"
c582198b
AM
74#include "plugin-api.h"
75#include "ipa-ref.h"
0136f8f0 76#include "cgraph.h"
9b2b7279 77#include "builtins.h"
ebfd146a 78
7ee2468b
SB
79/* For lang_hooks.types.type_for_mode. */
80#include "langhooks.h"
ebfd146a 81
c3e7ee41
BS
82/* Return the vectorized type for the given statement. */
83
84tree
85stmt_vectype (struct _stmt_vec_info *stmt_info)
86{
87 return STMT_VINFO_VECTYPE (stmt_info);
88}
89
90/* Return TRUE iff the given statement is in an inner loop relative to
91 the loop being vectorized. */
92bool
93stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
94{
95 gimple stmt = STMT_VINFO_STMT (stmt_info);
96 basic_block bb = gimple_bb (stmt);
97 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
98 struct loop* loop;
99
100 if (!loop_vinfo)
101 return false;
102
103 loop = LOOP_VINFO_LOOP (loop_vinfo);
104
105 return (bb->loop_father == loop->inner);
106}
107
108/* Record the cost of a statement, either by directly informing the
109 target model or by saving it in a vector for later processing.
110 Return a preliminary estimate of the statement's cost. */
111
112unsigned
92345349 113record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 114 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 115 int misalign, enum vect_cost_model_location where)
c3e7ee41 116{
92345349 117 if (body_cost_vec)
c3e7ee41 118 {
92345349
BS
119 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
120 add_stmt_info_to_vec (body_cost_vec, count, kind,
121 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
122 misalign);
c3e7ee41 123 return (unsigned)
92345349 124 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
125
126 }
127 else
128 {
129 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
130 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
131 void *target_cost_data;
132
133 if (loop_vinfo)
134 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
135 else
136 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
137
92345349
BS
138 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
139 misalign, where);
c3e7ee41
BS
140 }
141}
142
272c6793
RS
143/* Return a variable of type ELEM_TYPE[NELEMS]. */
144
145static tree
146create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
147{
148 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
149 "vect_array");
150}
151
152/* ARRAY is an array of vectors created by create_vector_array.
153 Return an SSA_NAME for the vector in index N. The reference
154 is part of the vectorization of STMT and the vector is associated
155 with scalar destination SCALAR_DEST. */
156
157static tree
158read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
159 tree array, unsigned HOST_WIDE_INT n)
160{
161 tree vect_type, vect, vect_name, array_ref;
162 gimple new_stmt;
163
164 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
165 vect_type = TREE_TYPE (TREE_TYPE (array));
166 vect = vect_create_destination_var (scalar_dest, vect_type);
167 array_ref = build4 (ARRAY_REF, vect_type, array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
170
171 new_stmt = gimple_build_assign (vect, array_ref);
172 vect_name = make_ssa_name (vect, new_stmt);
173 gimple_assign_set_lhs (new_stmt, vect_name);
174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
175
176 return vect_name;
177}
178
179/* ARRAY is an array of vectors created by create_vector_array.
180 Emit code to store SSA_NAME VECT in index N of the array.
181 The store is part of the vectorization of STMT. */
182
183static void
184write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
185 tree array, unsigned HOST_WIDE_INT n)
186{
187 tree array_ref;
188 gimple new_stmt;
189
190 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
191 build_int_cst (size_type_node, n),
192 NULL_TREE, NULL_TREE);
193
194 new_stmt = gimple_build_assign (array_ref, vect);
195 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
196}
197
198/* PTR is a pointer to an array of type TYPE. Return a representation
199 of *PTR. The memory reference replaces those in FIRST_DR
200 (and its group). */
201
202static tree
203create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
204{
272c6793
RS
205 tree mem_ref, alias_ptr_type;
206
207 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
208 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
209 /* Arrays have the same alignment as their type. */
644ffefd 210 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
211 return mem_ref;
212}
213
ebfd146a
IR
214/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
215
216/* Function vect_mark_relevant.
217
218 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
219
220static void
9771b263 221vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
222 enum vect_relevant relevant, bool live_p,
223 bool used_in_pattern)
ebfd146a
IR
224{
225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
226 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 228 gimple pattern_stmt;
ebfd146a 229
73fbfcad 230 if (dump_enabled_p ())
78c60e3d 231 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 232 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 233
83197f37
IR
234 /* If this stmt is an original stmt in a pattern, we might need to mark its
235 related pattern stmt instead of the original stmt. However, such stmts
236 may have their own uses that are not in any pattern, in such cases the
237 stmt itself should be marked. */
ebfd146a
IR
238 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
239 {
83197f37
IR
240 bool found = false;
241 if (!used_in_pattern)
242 {
243 imm_use_iterator imm_iter;
244 use_operand_p use_p;
245 gimple use_stmt;
246 tree lhs;
13c931c9
JJ
247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 249
83197f37
IR
250 if (is_gimple_assign (stmt))
251 lhs = gimple_assign_lhs (stmt);
252 else
253 lhs = gimple_call_lhs (stmt);
ebfd146a 254
83197f37
IR
255 /* This use is out of pattern use, if LHS has other uses that are
256 pattern uses, we should mark the stmt itself, and not the pattern
257 stmt. */
5ce9450f 258 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
259 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
260 {
261 if (is_gimple_debug (USE_STMT (use_p)))
262 continue;
263 use_stmt = USE_STMT (use_p);
264
13c931c9
JJ
265 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
266 continue;
267
ab0ef706
JJ
268 if (vinfo_for_stmt (use_stmt)
269 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
270 {
271 found = true;
272 break;
273 }
274 }
83197f37
IR
275 }
276
277 if (!found)
278 {
279 /* This is the last stmt in a sequence that was detected as a
280 pattern that can potentially be vectorized. Don't mark the stmt
281 as relevant/live because it's not going to be vectorized.
282 Instead mark the pattern-stmt that replaces it. */
283
284 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
285
73fbfcad 286 if (dump_enabled_p ())
78c60e3d
SS
287 dump_printf_loc (MSG_NOTE, vect_location,
288 "last stmt in pattern. don't mark"
e645e942 289 " relevant/live.\n");
83197f37
IR
290 stmt_info = vinfo_for_stmt (pattern_stmt);
291 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
292 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
293 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
294 stmt = pattern_stmt;
295 }
ebfd146a
IR
296 }
297
298 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
299 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
300 STMT_VINFO_RELEVANT (stmt_info) = relevant;
301
302 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
303 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
304 {
73fbfcad 305 if (dump_enabled_p ())
78c60e3d 306 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 307 "already marked relevant/live.\n");
ebfd146a
IR
308 return;
309 }
310
9771b263 311 worklist->safe_push (stmt);
ebfd146a
IR
312}
313
314
315/* Function vect_stmt_relevant_p.
316
317 Return true if STMT in loop that is represented by LOOP_VINFO is
318 "relevant for vectorization".
319
320 A stmt is considered "relevant for vectorization" if:
321 - it has uses outside the loop.
322 - it has vdefs (it alters memory).
323 - control stmts in the loop (except for the exit condition).
324
325 CHECKME: what other side effects would the vectorizer allow? */
326
327static bool
328vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
329 enum vect_relevant *relevant, bool *live_p)
330{
331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
332 ssa_op_iter op_iter;
333 imm_use_iterator imm_iter;
334 use_operand_p use_p;
335 def_operand_p def_p;
336
8644a673 337 *relevant = vect_unused_in_scope;
ebfd146a
IR
338 *live_p = false;
339
340 /* cond stmt other than loop exit cond. */
b8698a0f
L
341 if (is_ctrl_stmt (stmt)
342 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
343 != loop_exit_ctrl_vec_info_type)
8644a673 344 *relevant = vect_used_in_scope;
ebfd146a
IR
345
346 /* changing memory. */
347 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
348 if (gimple_vdef (stmt)
349 && !gimple_clobber_p (stmt))
ebfd146a 350 {
73fbfcad 351 if (dump_enabled_p ())
78c60e3d 352 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 353 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 354 *relevant = vect_used_in_scope;
ebfd146a
IR
355 }
356
357 /* uses outside the loop. */
358 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
359 {
360 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
361 {
362 basic_block bb = gimple_bb (USE_STMT (use_p));
363 if (!flow_bb_inside_loop_p (loop, bb))
364 {
73fbfcad 365 if (dump_enabled_p ())
78c60e3d 366 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 367 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 368
3157b0c2
AO
369 if (is_gimple_debug (USE_STMT (use_p)))
370 continue;
371
ebfd146a
IR
372 /* We expect all such uses to be in the loop exit phis
373 (because of loop closed form) */
374 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
375 gcc_assert (bb == single_exit (loop)->dest);
376
377 *live_p = true;
378 }
379 }
380 }
381
382 return (*live_p || *relevant);
383}
384
385
b8698a0f 386/* Function exist_non_indexing_operands_for_use_p
ebfd146a 387
ff802fa1 388 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
389 used in STMT for anything other than indexing an array. */
390
391static bool
392exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
393{
394 tree operand;
395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 396
ff802fa1 397 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
398 reference in STMT, then any operand that corresponds to USE
399 is not indexing an array. */
400 if (!STMT_VINFO_DATA_REF (stmt_info))
401 return true;
59a05b0c 402
ebfd146a
IR
403 /* STMT has a data_ref. FORNOW this means that its of one of
404 the following forms:
405 -1- ARRAY_REF = var
406 -2- var = ARRAY_REF
407 (This should have been verified in analyze_data_refs).
408
409 'var' in the second case corresponds to a def, not a use,
b8698a0f 410 so USE cannot correspond to any operands that are not used
ebfd146a
IR
411 for array indexing.
412
413 Therefore, all we need to check is if STMT falls into the
414 first case, and whether var corresponds to USE. */
ebfd146a
IR
415
416 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
417 {
418 if (is_gimple_call (stmt)
419 && gimple_call_internal_p (stmt))
420 switch (gimple_call_internal_fn (stmt))
421 {
422 case IFN_MASK_STORE:
423 operand = gimple_call_arg (stmt, 3);
424 if (operand == use)
425 return true;
426 /* FALLTHRU */
427 case IFN_MASK_LOAD:
428 operand = gimple_call_arg (stmt, 2);
429 if (operand == use)
430 return true;
431 break;
432 default:
433 break;
434 }
435 return false;
436 }
437
59a05b0c
EB
438 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
439 return false;
ebfd146a 440 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
441 if (TREE_CODE (operand) != SSA_NAME)
442 return false;
443
444 if (operand == use)
445 return true;
446
447 return false;
448}
449
450
b8698a0f 451/*
ebfd146a
IR
452 Function process_use.
453
454 Inputs:
455 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 456 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 457 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 458 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
459 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
460 be performed.
ebfd146a
IR
461
462 Outputs:
463 Generally, LIVE_P and RELEVANT are used to define the liveness and
464 relevance info of the DEF_STMT of this USE:
465 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
466 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
467 Exceptions:
468 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 469 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 470 of the respective DEF_STMT is left unchanged.
b8698a0f
L
471 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
472 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
473 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
474 be modified accordingly.
475
476 Return true if everything is as expected. Return false otherwise. */
477
478static bool
b8698a0f 479process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 480 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 481 bool force)
ebfd146a
IR
482{
483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
484 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
485 stmt_vec_info dstmt_vinfo;
486 basic_block bb, def_bb;
487 tree def;
488 gimple def_stmt;
489 enum vect_def_type dt;
490
b8698a0f 491 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 492 that are used for address computation are not considered relevant. */
aec7ae7d 493 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
494 return true;
495
24ee1384 496 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 497 {
73fbfcad 498 if (dump_enabled_p ())
78c60e3d 499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 500 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
501 return false;
502 }
503
504 if (!def_stmt || gimple_nop_p (def_stmt))
505 return true;
506
507 def_bb = gimple_bb (def_stmt);
508 if (!flow_bb_inside_loop_p (loop, def_bb))
509 {
73fbfcad 510 if (dump_enabled_p ())
e645e942 511 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
512 return true;
513 }
514
b8698a0f
L
515 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
516 DEF_STMT must have already been processed, because this should be the
517 only way that STMT, which is a reduction-phi, was put in the worklist,
518 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
519 check that everything is as expected, and we are done. */
520 dstmt_vinfo = vinfo_for_stmt (def_stmt);
521 bb = gimple_bb (stmt);
522 if (gimple_code (stmt) == GIMPLE_PHI
523 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
524 && gimple_code (def_stmt) != GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
526 && bb->loop_father == def_bb->loop_father)
527 {
73fbfcad 528 if (dump_enabled_p ())
78c60e3d 529 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 530 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
531 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
532 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
533 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 534 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 535 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
536 return true;
537 }
538
539 /* case 3a: outer-loop stmt defining an inner-loop stmt:
540 outer-loop-header-bb:
541 d = def_stmt
542 inner-loop:
543 stmt # use (d)
544 outer-loop-tail-bb:
545 ... */
546 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
547 {
73fbfcad 548 if (dump_enabled_p ())
78c60e3d 549 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 550 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 551
ebfd146a
IR
552 switch (relevant)
553 {
8644a673 554 case vect_unused_in_scope:
7c5222ff
IR
555 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
556 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 557 break;
7c5222ff 558
ebfd146a 559 case vect_used_in_outer_by_reduction:
7c5222ff 560 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
561 relevant = vect_used_by_reduction;
562 break;
7c5222ff 563
ebfd146a 564 case vect_used_in_outer:
7c5222ff 565 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 566 relevant = vect_used_in_scope;
ebfd146a 567 break;
7c5222ff 568
8644a673 569 case vect_used_in_scope:
ebfd146a
IR
570 break;
571
572 default:
573 gcc_unreachable ();
b8698a0f 574 }
ebfd146a
IR
575 }
576
577 /* case 3b: inner-loop stmt defining an outer-loop stmt:
578 outer-loop-header-bb:
579 ...
580 inner-loop:
581 d = def_stmt
06066f92 582 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
583 stmt # use (d) */
584 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
585 {
73fbfcad 586 if (dump_enabled_p ())
78c60e3d 587 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 588 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 589
ebfd146a
IR
590 switch (relevant)
591 {
8644a673 592 case vect_unused_in_scope:
b8698a0f 593 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 594 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 595 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
596 break;
597
ebfd146a
IR
598 case vect_used_by_reduction:
599 relevant = vect_used_in_outer_by_reduction;
600 break;
601
8644a673 602 case vect_used_in_scope:
ebfd146a
IR
603 relevant = vect_used_in_outer;
604 break;
605
606 default:
607 gcc_unreachable ();
608 }
609 }
610
83197f37
IR
611 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
612 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
613 return true;
614}
615
616
617/* Function vect_mark_stmts_to_be_vectorized.
618
619 Not all stmts in the loop need to be vectorized. For example:
620
621 for i...
622 for j...
623 1. T0 = i + j
624 2. T1 = a[T0]
625
626 3. j = j + 1
627
628 Stmt 1 and 3 do not need to be vectorized, because loop control and
629 addressing of vectorized data-refs are handled differently.
630
631 This pass detects such stmts. */
632
633bool
634vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
635{
ebfd146a
IR
636 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
637 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
638 unsigned int nbbs = loop->num_nodes;
639 gimple_stmt_iterator si;
640 gimple stmt;
641 unsigned int i;
642 stmt_vec_info stmt_vinfo;
643 basic_block bb;
644 gimple phi;
645 bool live_p;
06066f92
IR
646 enum vect_relevant relevant, tmp_relevant;
647 enum vect_def_type def_type;
ebfd146a 648
73fbfcad 649 if (dump_enabled_p ())
78c60e3d 650 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 651 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 652
00f96dc9 653 auto_vec<gimple, 64> worklist;
ebfd146a
IR
654
655 /* 1. Init worklist. */
656 for (i = 0; i < nbbs; i++)
657 {
658 bb = bbs[i];
659 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 660 {
ebfd146a 661 phi = gsi_stmt (si);
73fbfcad 662 if (dump_enabled_p ())
ebfd146a 663 {
78c60e3d
SS
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
666 }
667
668 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 669 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
670 }
671 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 {
673 stmt = gsi_stmt (si);
73fbfcad 674 if (dump_enabled_p ())
ebfd146a 675 {
78c60e3d
SS
676 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 678 }
ebfd146a
IR
679
680 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 681 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
682 }
683 }
684
685 /* 2. Process_worklist */
9771b263 686 while (worklist.length () > 0)
ebfd146a
IR
687 {
688 use_operand_p use_p;
689 ssa_op_iter iter;
690
9771b263 691 stmt = worklist.pop ();
73fbfcad 692 if (dump_enabled_p ())
ebfd146a 693 {
78c60e3d
SS
694 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
695 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
696 }
697
b8698a0f
L
698 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
699 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
700 liveness and relevance properties of STMT. */
701 stmt_vinfo = vinfo_for_stmt (stmt);
702 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
703 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
704
705 /* Generally, the liveness and relevance properties of STMT are
706 propagated as is to the DEF_STMTs of its USEs:
707 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
708 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
709
710 One exception is when STMT has been identified as defining a reduction
711 variable; in this case we set the liveness/relevance as follows:
712 live_p = false
713 relevant = vect_used_by_reduction
714 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 715 those that are used by a reduction computation, and those that are
ff802fa1 716 (also) used by a regular computation. This allows us later on to
b8698a0f 717 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 718 order of the results that they produce does not have to be kept. */
ebfd146a 719
06066f92
IR
720 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
721 tmp_relevant = relevant;
722 switch (def_type)
ebfd146a 723 {
06066f92
IR
724 case vect_reduction_def:
725 switch (tmp_relevant)
726 {
727 case vect_unused_in_scope:
728 relevant = vect_used_by_reduction;
729 break;
730
731 case vect_used_by_reduction:
732 if (gimple_code (stmt) == GIMPLE_PHI)
733 break;
734 /* fall through */
735
736 default:
73fbfcad 737 if (dump_enabled_p ())
78c60e3d 738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 739 "unsupported use of reduction.\n");
06066f92
IR
740 return false;
741 }
742
b8698a0f 743 live_p = false;
06066f92 744 break;
b8698a0f 745
06066f92
IR
746 case vect_nested_cycle:
747 if (tmp_relevant != vect_unused_in_scope
748 && tmp_relevant != vect_used_in_outer_by_reduction
749 && tmp_relevant != vect_used_in_outer)
750 {
73fbfcad 751 if (dump_enabled_p ())
78c60e3d 752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 753 "unsupported use of nested cycle.\n");
7c5222ff 754
06066f92
IR
755 return false;
756 }
7c5222ff 757
b8698a0f
L
758 live_p = false;
759 break;
760
06066f92
IR
761 case vect_double_reduction_def:
762 if (tmp_relevant != vect_unused_in_scope
763 && tmp_relevant != vect_used_by_reduction)
764 {
73fbfcad 765 if (dump_enabled_p ())
78c60e3d 766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 767 "unsupported use of double reduction.\n");
7c5222ff 768
7c5222ff 769 return false;
06066f92
IR
770 }
771
772 live_p = false;
b8698a0f 773 break;
7c5222ff 774
06066f92
IR
775 default:
776 break;
7c5222ff 777 }
b8698a0f 778
aec7ae7d 779 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
780 {
781 /* Pattern statements are not inserted into the code, so
782 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
783 have to scan the RHS or function arguments instead. */
784 if (is_gimple_assign (stmt))
785 {
69d2aade
JJ
786 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
787 tree op = gimple_assign_rhs1 (stmt);
788
789 i = 1;
790 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
791 {
792 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 793 live_p, relevant, &worklist, false)
69d2aade 794 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 795 live_p, relevant, &worklist, false))
566d377a 796 return false;
69d2aade
JJ
797 i = 2;
798 }
799 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 800 {
69d2aade 801 op = gimple_op (stmt, i);
afbe6325
RB
802 if (TREE_CODE (op) == SSA_NAME
803 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
804 &worklist, false))
07687835 805 return false;
9d5e7640
IR
806 }
807 }
808 else if (is_gimple_call (stmt))
809 {
810 for (i = 0; i < gimple_call_num_args (stmt); i++)
811 {
812 tree arg = gimple_call_arg (stmt, i);
813 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 814 &worklist, false))
07687835 815 return false;
9d5e7640
IR
816 }
817 }
818 }
819 else
820 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
821 {
822 tree op = USE_FROM_PTR (use_p);
823 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 824 &worklist, false))
07687835 825 return false;
9d5e7640 826 }
aec7ae7d
JJ
827
828 if (STMT_VINFO_GATHER_P (stmt_vinfo))
829 {
830 tree off;
831 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
832 gcc_assert (decl);
833 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
834 &worklist, true))
566d377a 835 return false;
aec7ae7d 836 }
ebfd146a
IR
837 } /* while worklist */
838
ebfd146a
IR
839 return true;
840}
841
842
b8698a0f 843/* Function vect_model_simple_cost.
ebfd146a 844
b8698a0f 845 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
846 single op. Right now, this does not account for multiple insns that could
847 be generated for the single vector op. We will handle that shortly. */
848
849void
b8698a0f 850vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
851 enum vect_def_type *dt,
852 stmt_vector_for_cost *prologue_cost_vec,
853 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
854{
855 int i;
92345349 856 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
857
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
860 return;
861
ebfd146a
IR
862 /* FORNOW: Assuming maximum 2 args per stmts. */
863 for (i = 0; i < 2; i++)
92345349
BS
864 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
865 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
866 stmt_info, 0, vect_prologue);
c3e7ee41
BS
867
868 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
869 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
870 stmt_info, 0, vect_body);
c3e7ee41 871
73fbfcad 872 if (dump_enabled_p ())
78c60e3d
SS
873 dump_printf_loc (MSG_NOTE, vect_location,
874 "vect_model_simple_cost: inside_cost = %d, "
e645e942 875 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
876}
877
878
8bd37302
BS
879/* Model cost for type demotion and promotion operations. PWR is normally
880 zero for single-step promotions and demotions. It will be one if
881 two-step promotion/demotion is required, and so on. Each additional
882 step doubles the number of instructions required. */
883
884static void
885vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
886 enum vect_def_type *dt, int pwr)
887{
888 int i, tmp;
92345349 889 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
891 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
892 void *target_cost_data;
8bd37302
BS
893
894 /* The SLP costs were already calculated during SLP tree build. */
895 if (PURE_SLP_STMT (stmt_info))
896 return;
897
c3e7ee41
BS
898 if (loop_vinfo)
899 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
900 else
901 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
902
8bd37302
BS
903 for (i = 0; i < pwr + 1; i++)
904 {
905 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
906 (i + 1) : i;
c3e7ee41 907 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
908 vec_promote_demote, stmt_info, 0,
909 vect_body);
8bd37302
BS
910 }
911
912 /* FORNOW: Assuming maximum 2 args per stmts. */
913 for (i = 0; i < 2; i++)
92345349
BS
914 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
915 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
916 stmt_info, 0, vect_prologue);
8bd37302 917
73fbfcad 918 if (dump_enabled_p ())
78c60e3d
SS
919 dump_printf_loc (MSG_NOTE, vect_location,
920 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 921 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
922}
923
0d0293ac 924/* Function vect_cost_group_size
b8698a0f 925
0d0293ac 926 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
927 load or store of a group, else return 1. This ensures that group size is
928 only returned once per group. */
929
930static int
0d0293ac 931vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 932{
e14c1050 933 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
934
935 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 936 return GROUP_SIZE (stmt_info);
ebfd146a
IR
937
938 return 1;
939}
940
941
942/* Function vect_model_store_cost
943
0d0293ac
MM
944 Models cost for stores. In the case of grouped accesses, one access
945 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
946
947void
b8698a0f 948vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 949 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
950 slp_tree slp_node,
951 stmt_vector_for_cost *prologue_cost_vec,
952 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
953{
954 int group_size;
92345349 955 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
956 struct data_reference *first_dr;
957 gimple first_stmt;
ebfd146a 958
8644a673 959 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
960 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
961 stmt_info, 0, vect_prologue);
ebfd146a 962
0d0293ac
MM
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
965 {
966 if (slp_node)
967 {
9771b263 968 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
969 group_size = 1;
970 }
971 else
972 {
e14c1050 973 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 974 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
975 }
976
977 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
978 }
0d0293ac 979 /* Not a grouped access. */
ebfd146a 980 else
720f5239
IR
981 {
982 group_size = 1;
983 first_dr = STMT_VINFO_DATA_REF (stmt_info);
984 }
ebfd146a 985
272c6793 986 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
cee62fee
MM
990 if (!store_lanes_p && group_size > 1
991 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 992 {
e1377713
ES
993 /* Uses a high and low interleave or shuffle operations for each
994 needed permute. */
995 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
996 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
997 stmt_info, 0, vect_body);
ebfd146a 998
73fbfcad 999 if (dump_enabled_p ())
78c60e3d 1000 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1001 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1002 group_size);
ebfd146a
IR
1003 }
1004
cee62fee 1005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1006 /* Costs of the stores. */
cee62fee
MM
1007 if (STMT_VINFO_STRIDED_P (stmt_info)
1008 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f2e2a985
MM
1009 {
1010 /* N scalar stores plus extracting the elements. */
f2e2a985
MM
1011 inside_cost += record_stmt_cost (body_cost_vec,
1012 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1013 scalar_store, stmt_info, 0, vect_body);
f2e2a985
MM
1014 }
1015 else
1016 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 1017
cee62fee
MM
1018 if (STMT_VINFO_STRIDED_P (stmt_info))
1019 inside_cost += record_stmt_cost (body_cost_vec,
1020 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1021 vec_to_scalar, stmt_info, 0, vect_body);
1022
73fbfcad 1023 if (dump_enabled_p ())
78c60e3d
SS
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "vect_model_store_cost: inside_cost = %d, "
e645e942 1026 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1027}
1028
1029
720f5239
IR
1030/* Calculate cost of DR's memory access. */
1031void
1032vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1033 unsigned int *inside_cost,
92345349 1034 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1035{
1036 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1037 gimple stmt = DR_STMT (dr);
1038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1039
1040 switch (alignment_support_scheme)
1041 {
1042 case dr_aligned:
1043 {
92345349
BS
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045 vector_store, stmt_info, 0,
1046 vect_body);
720f5239 1047
73fbfcad 1048 if (dump_enabled_p ())
78c60e3d 1049 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1050 "vect_model_store_cost: aligned.\n");
720f5239
IR
1051 break;
1052 }
1053
1054 case dr_unaligned_supported:
1055 {
720f5239 1056 /* Here, we assign an additional cost for the unaligned store. */
92345349 1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1058 unaligned_store, stmt_info,
92345349 1059 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1060 if (dump_enabled_p ())
78c60e3d
SS
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: unaligned supported by "
e645e942 1063 "hardware.\n");
720f5239
IR
1064 break;
1065 }
1066
38eec4c6
UW
1067 case dr_unaligned_unsupported:
1068 {
1069 *inside_cost = VECT_MAX_COST;
1070
73fbfcad 1071 if (dump_enabled_p ())
78c60e3d 1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1073 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1074 break;
1075 }
1076
720f5239
IR
1077 default:
1078 gcc_unreachable ();
1079 }
1080}
1081
1082
ebfd146a
IR
1083/* Function vect_model_load_cost
1084
0d0293ac
MM
1085 Models cost for loads. In the case of grouped accesses, the last access
1086 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1087 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1088 access scheme chosen. */
1089
1090void
92345349
BS
1091vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092 bool load_lanes_p, slp_tree slp_node,
1093 stmt_vector_for_cost *prologue_cost_vec,
1094 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1095{
1096 int group_size;
ebfd146a
IR
1097 gimple first_stmt;
1098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1099 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1100
0d0293ac 1101 /* Grouped accesses? */
e14c1050 1102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1103 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1104 {
0d0293ac 1105 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1106 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1107 }
0d0293ac 1108 /* Not a grouped access. */
ebfd146a
IR
1109 else
1110 {
1111 group_size = 1;
1112 first_dr = dr;
1113 }
1114
272c6793 1115 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1116 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1117 access is instead being provided by a load-and-permute operation,
1118 include the cost of the permutes. */
7b5fc413 1119 if (!load_lanes_p && group_size > 1
f2e2a985 1120 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 1121 {
2c23db6d
ES
1122 /* Uses an even and odd extract operations or shuffle operations
1123 for each needed permute. */
1124 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1125 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1126 stmt_info, 0, vect_body);
ebfd146a 1127
73fbfcad 1128 if (dump_enabled_p ())
e645e942
TJ
1129 dump_printf_loc (MSG_NOTE, vect_location,
1130 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1131 group_size);
ebfd146a
IR
1132 }
1133
1134 /* The loads themselves. */
f2e2a985 1135 if (STMT_VINFO_STRIDED_P (stmt_info)
7b5fc413 1136 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
a82960aa 1137 {
a21892ad
BS
1138 /* N scalar loads plus gathering them into a vector. */
1139 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1140 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1141 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1142 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1143 }
1144 else
1145 vect_get_load_cost (first_dr, ncopies,
1146 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1147 || group_size > 1 || slp_node),
92345349
BS
1148 &inside_cost, &prologue_cost,
1149 prologue_cost_vec, body_cost_vec, true);
f2e2a985 1150 if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
1151 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1152 stmt_info, 0, vect_body);
720f5239 1153
73fbfcad 1154 if (dump_enabled_p ())
78c60e3d
SS
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: inside_cost = %d, "
e645e942 1157 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1158}
1159
1160
1161/* Calculate cost of DR's memory access. */
1162void
1163vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1164 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1165 unsigned int *prologue_cost,
1166 stmt_vector_for_cost *prologue_cost_vec,
1167 stmt_vector_for_cost *body_cost_vec,
1168 bool record_prologue_costs)
720f5239
IR
1169{
1170 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1171 gimple stmt = DR_STMT (dr);
1172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1173
1174 switch (alignment_support_scheme)
ebfd146a
IR
1175 {
1176 case dr_aligned:
1177 {
92345349
BS
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1179 stmt_info, 0, vect_body);
ebfd146a 1180
73fbfcad 1181 if (dump_enabled_p ())
78c60e3d 1182 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1183 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1184
1185 break;
1186 }
1187 case dr_unaligned_supported:
1188 {
720f5239 1189 /* Here, we assign an additional cost for the unaligned load. */
92345349 1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1191 unaligned_load, stmt_info,
92345349 1192 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1193
73fbfcad 1194 if (dump_enabled_p ())
78c60e3d
SS
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "vect_model_load_cost: unaligned supported by "
e645e942 1197 "hardware.\n");
ebfd146a
IR
1198
1199 break;
1200 }
1201 case dr_explicit_realign:
1202 {
92345349
BS
1203 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1204 vector_load, stmt_info, 0, vect_body);
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1206 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1207
1208 /* FIXME: If the misalignment remains fixed across the iterations of
1209 the containing loop, the following cost should be added to the
92345349 1210 prologue costs. */
ebfd146a 1211 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1212 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1213 stmt_info, 0, vect_body);
ebfd146a 1214
73fbfcad 1215 if (dump_enabled_p ())
e645e942
TJ
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: explicit realign\n");
8bd37302 1218
ebfd146a
IR
1219 break;
1220 }
1221 case dr_explicit_realign_optimized:
1222 {
73fbfcad 1223 if (dump_enabled_p ())
e645e942 1224 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1225 "vect_model_load_cost: unaligned software "
e645e942 1226 "pipelined.\n");
ebfd146a
IR
1227
1228 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1229 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1230 if this is an access in a group of loads, which provide grouped
ebfd146a 1231 access, then the above cost should only be considered for one
ff802fa1 1232 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1233 and a realignment op. */
1234
92345349 1235 if (add_realign_cost && record_prologue_costs)
ebfd146a 1236 {
92345349
BS
1237 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1238 vector_stmt, stmt_info,
1239 0, vect_prologue);
ebfd146a 1240 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1241 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1242 vector_stmt, stmt_info,
1243 0, vect_prologue);
ebfd146a
IR
1244 }
1245
92345349
BS
1246 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1247 stmt_info, 0, vect_body);
1248 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1249 stmt_info, 0, vect_body);
8bd37302 1250
73fbfcad 1251 if (dump_enabled_p ())
78c60e3d 1252 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1253 "vect_model_load_cost: explicit realign optimized"
1254 "\n");
8bd37302 1255
ebfd146a
IR
1256 break;
1257 }
1258
38eec4c6
UW
1259 case dr_unaligned_unsupported:
1260 {
1261 *inside_cost = VECT_MAX_COST;
1262
73fbfcad 1263 if (dump_enabled_p ())
78c60e3d 1264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1265 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1266 break;
1267 }
1268
ebfd146a
IR
1269 default:
1270 gcc_unreachable ();
1271 }
ebfd146a
IR
1272}
1273
418b7df3
RG
1274/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1275 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1276
418b7df3
RG
1277static void
1278vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1279{
ebfd146a 1280 if (gsi)
418b7df3 1281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1282 else
1283 {
418b7df3 1284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1285 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1286
a70d6342
IR
1287 if (loop_vinfo)
1288 {
1289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1290 basic_block new_bb;
1291 edge pe;
a70d6342
IR
1292
1293 if (nested_in_vect_loop_p (loop, stmt))
1294 loop = loop->inner;
b8698a0f 1295
a70d6342 1296 pe = loop_preheader_edge (loop);
418b7df3 1297 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1298 gcc_assert (!new_bb);
1299 }
1300 else
1301 {
1302 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1303 basic_block bb;
1304 gimple_stmt_iterator gsi_bb_start;
1305
1306 gcc_assert (bb_vinfo);
1307 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1308 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1309 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1310 }
ebfd146a
IR
1311 }
1312
73fbfcad 1313 if (dump_enabled_p ())
ebfd146a 1314 {
78c60e3d
SS
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "created new init_stmt: ");
1317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1318 }
418b7df3
RG
1319}
1320
1321/* Function vect_init_vector.
ebfd146a 1322
5467ee52
RG
1323 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1324 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1325 vector type a vector with all elements equal to VAL is created first.
1326 Place the initialization at BSI if it is not NULL. Otherwise, place the
1327 initialization at the loop preheader.
418b7df3
RG
1328 Return the DEF of INIT_STMT.
1329 It will be used in the vectorization of STMT. */
1330
1331tree
5467ee52 1332vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1333{
1334 tree new_var;
1335 gimple init_stmt;
1336 tree vec_oprnd;
1337 tree new_temp;
1338
5467ee52
RG
1339 if (TREE_CODE (type) == VECTOR_TYPE
1340 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1341 {
5467ee52 1342 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1343 {
5467ee52
RG
1344 if (CONSTANT_CLASS_P (val))
1345 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1346 else
1347 {
b731b390 1348 new_temp = make_ssa_name (TREE_TYPE (type));
0d0e4a03 1349 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1350 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1351 val = new_temp;
418b7df3
RG
1352 }
1353 }
5467ee52 1354 val = build_vector_from_val (type, val);
418b7df3
RG
1355 }
1356
5467ee52 1357 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1358 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1359 new_temp = make_ssa_name (new_var, init_stmt);
1360 gimple_assign_set_lhs (init_stmt, new_temp);
1361 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1362 vec_oprnd = gimple_assign_lhs (init_stmt);
1363 return vec_oprnd;
1364}
1365
a70d6342 1366
ebfd146a
IR
1367/* Function vect_get_vec_def_for_operand.
1368
ff802fa1 1369 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1370 used in the vectorized stmt for STMT.
1371
1372 In the case that OP is an SSA_NAME which is defined in the loop, then
1373 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1374
1375 In case OP is an invariant or constant, a new stmt that creates a vector def
1376 needs to be introduced. */
1377
1378tree
1379vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1380{
1381 tree vec_oprnd;
1382 gimple vec_stmt;
1383 gimple def_stmt;
1384 stmt_vec_info def_stmt_info = NULL;
1385 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1386 unsigned int nunits;
ebfd146a 1387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1388 tree def;
ebfd146a
IR
1389 enum vect_def_type dt;
1390 bool is_simple_use;
1391 tree vector_type;
1392
73fbfcad 1393 if (dump_enabled_p ())
ebfd146a 1394 {
78c60e3d
SS
1395 dump_printf_loc (MSG_NOTE, vect_location,
1396 "vect_get_vec_def_for_operand: ");
1397 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1398 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1399 }
1400
24ee1384
IR
1401 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1402 &def_stmt, &def, &dt);
ebfd146a 1403 gcc_assert (is_simple_use);
73fbfcad 1404 if (dump_enabled_p ())
ebfd146a 1405 {
78c60e3d 1406 int loc_printed = 0;
ebfd146a
IR
1407 if (def)
1408 {
78c60e3d
SS
1409 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1410 loc_printed = 1;
1411 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1412 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1413 }
1414 if (def_stmt)
1415 {
78c60e3d
SS
1416 if (loc_printed)
1417 dump_printf (MSG_NOTE, " def_stmt = ");
1418 else
1419 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1420 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1421 }
1422 }
1423
1424 switch (dt)
1425 {
1426 /* Case 1: operand is a constant. */
1427 case vect_constant_def:
1428 {
7569a6cc
RG
1429 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1430 gcc_assert (vector_type);
9dc3f7de 1431 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1432
b8698a0f 1433 if (scalar_def)
ebfd146a
IR
1434 *scalar_def = op;
1435
1436 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1437 if (dump_enabled_p ())
78c60e3d 1438 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1439 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1440
418b7df3 1441 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1442 }
1443
1444 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1445 case vect_external_def:
ebfd146a
IR
1446 {
1447 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1448 gcc_assert (vector_type);
ebfd146a 1449
b8698a0f 1450 if (scalar_def)
ebfd146a
IR
1451 *scalar_def = def;
1452
1453 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1454 if (dump_enabled_p ())
e645e942 1455 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1456
418b7df3 1457 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1458 }
1459
1460 /* Case 3: operand is defined inside the loop. */
8644a673 1461 case vect_internal_def:
ebfd146a 1462 {
b8698a0f 1463 if (scalar_def)
ebfd146a
IR
1464 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1465
1466 /* Get the def from the vectorized stmt. */
1467 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1468
ebfd146a 1469 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1470 /* Get vectorized pattern statement. */
1471 if (!vec_stmt
1472 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1473 && !STMT_VINFO_RELEVANT (def_stmt_info))
1474 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1475 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1476 gcc_assert (vec_stmt);
1477 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1478 vec_oprnd = PHI_RESULT (vec_stmt);
1479 else if (is_gimple_call (vec_stmt))
1480 vec_oprnd = gimple_call_lhs (vec_stmt);
1481 else
1482 vec_oprnd = gimple_assign_lhs (vec_stmt);
1483 return vec_oprnd;
1484 }
1485
1486 /* Case 4: operand is defined by a loop header phi - reduction */
1487 case vect_reduction_def:
06066f92 1488 case vect_double_reduction_def:
7c5222ff 1489 case vect_nested_cycle:
ebfd146a
IR
1490 {
1491 struct loop *loop;
1492
1493 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1494 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1495
1496 /* Get the def before the loop */
1497 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1498 return get_initial_def_for_reduction (stmt, op, scalar_def);
1499 }
1500
1501 /* Case 5: operand is defined by loop-header phi - induction. */
1502 case vect_induction_def:
1503 {
1504 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1505
1506 /* Get the def from the vectorized stmt. */
1507 def_stmt_info = vinfo_for_stmt (def_stmt);
1508 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1509 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1510 vec_oprnd = PHI_RESULT (vec_stmt);
1511 else
1512 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1513 return vec_oprnd;
1514 }
1515
1516 default:
1517 gcc_unreachable ();
1518 }
1519}
1520
1521
1522/* Function vect_get_vec_def_for_stmt_copy
1523
ff802fa1 1524 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1525 vectorized stmt to be created (by the caller to this function) is a "copy"
1526 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1527 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1528 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1529 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1530 DT is the type of the vector def VEC_OPRND.
1531
1532 Context:
1533 In case the vectorization factor (VF) is bigger than the number
1534 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1535 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1536 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1537 smallest data-type determines the VF, and as a result, when vectorizing
1538 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1539 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1540 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1541 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1542 which VF=16 and nunits=4, so the number of copies required is 4):
1543
1544 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1545
ebfd146a
IR
1546 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1547 VS1.1: vx.1 = memref1 VS1.2
1548 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1549 VS1.3: vx.3 = memref3
ebfd146a
IR
1550
1551 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1552 VSnew.1: vz1 = vx.1 + ... VSnew.2
1553 VSnew.2: vz2 = vx.2 + ... VSnew.3
1554 VSnew.3: vz3 = vx.3 + ...
1555
1556 The vectorization of S1 is explained in vectorizable_load.
1557 The vectorization of S2:
b8698a0f
L
1558 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1559 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1560 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1561 returns the vector-def 'vx.0'.
1562
b8698a0f
L
1563 To create the remaining copies of the vector-stmt (VSnew.j), this
1564 function is called to get the relevant vector-def for each operand. It is
1565 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1566 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1567
b8698a0f
L
1568 For example, to obtain the vector-def 'vx.1' in order to create the
1569 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1570 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1571 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1572 and return its def ('vx.1').
1573 Overall, to create the above sequence this function will be called 3 times:
1574 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1575 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1576 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1577
1578tree
1579vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1580{
1581 gimple vec_stmt_for_operand;
1582 stmt_vec_info def_stmt_info;
1583
1584 /* Do nothing; can reuse same def. */
8644a673 1585 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1586 return vec_oprnd;
1587
1588 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1589 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1590 gcc_assert (def_stmt_info);
1591 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1592 gcc_assert (vec_stmt_for_operand);
1593 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1594 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1595 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1596 else
1597 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1598 return vec_oprnd;
1599}
1600
1601
1602/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1603 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1604
1605static void
b8698a0f 1606vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1607 vec<tree> *vec_oprnds0,
1608 vec<tree> *vec_oprnds1)
ebfd146a 1609{
9771b263 1610 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1611
1612 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1613 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1614
9771b263 1615 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1616 {
9771b263 1617 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1618 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1619 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1620 }
1621}
1622
1623
d092494c
IR
1624/* Get vectorized definitions for OP0 and OP1.
1625 REDUC_INDEX is the index of reduction operand in case of reduction,
1626 and -1 otherwise. */
ebfd146a 1627
d092494c 1628void
ebfd146a 1629vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1630 vec<tree> *vec_oprnds0,
1631 vec<tree> *vec_oprnds1,
d092494c 1632 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1633{
1634 if (slp_node)
d092494c
IR
1635 {
1636 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1637 auto_vec<tree> ops (nops);
1638 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1639
9771b263 1640 ops.quick_push (op0);
d092494c 1641 if (op1)
9771b263 1642 ops.quick_push (op1);
d092494c
IR
1643
1644 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1645
37b5ec8f 1646 *vec_oprnds0 = vec_defs[0];
d092494c 1647 if (op1)
37b5ec8f 1648 *vec_oprnds1 = vec_defs[1];
d092494c 1649 }
ebfd146a
IR
1650 else
1651 {
1652 tree vec_oprnd;
1653
9771b263 1654 vec_oprnds0->create (1);
b8698a0f 1655 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1656 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1657
1658 if (op1)
1659 {
9771b263 1660 vec_oprnds1->create (1);
b8698a0f 1661 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1662 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1663 }
1664 }
1665}
1666
1667
1668/* Function vect_finish_stmt_generation.
1669
1670 Insert a new stmt. */
1671
1672void
1673vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1674 gimple_stmt_iterator *gsi)
1675{
1676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1679
1680 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1681
54e8e2c3
RG
1682 if (!gsi_end_p (*gsi)
1683 && gimple_has_mem_ops (vec_stmt))
1684 {
1685 gimple at_stmt = gsi_stmt (*gsi);
1686 tree vuse = gimple_vuse (at_stmt);
1687 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1688 {
1689 tree vdef = gimple_vdef (at_stmt);
1690 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1691 /* If we have an SSA vuse and insert a store, update virtual
1692 SSA form to avoid triggering the renamer. Do so only
1693 if we can easily see all uses - which is what almost always
1694 happens with the way vectorized stmts are inserted. */
1695 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1696 && ((is_gimple_assign (vec_stmt)
1697 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1698 || (is_gimple_call (vec_stmt)
1699 && !(gimple_call_flags (vec_stmt)
1700 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1701 {
1702 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1703 gimple_set_vdef (vec_stmt, new_vdef);
1704 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1705 }
1706 }
1707 }
ebfd146a
IR
1708 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1709
b8698a0f 1710 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1711 bb_vinfo));
ebfd146a 1712
73fbfcad 1713 if (dump_enabled_p ())
ebfd146a 1714 {
78c60e3d
SS
1715 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1717 }
1718
ad885386 1719 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1720
1721 /* While EH edges will generally prevent vectorization, stmt might
1722 e.g. be in a must-not-throw region. Ensure newly created stmts
1723 that could throw are part of the same region. */
1724 int lp_nr = lookup_stmt_eh_lp (stmt);
1725 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1726 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1727}
1728
1729/* Checks if CALL can be vectorized in type VECTYPE. Returns
1730 a function declaration if the target has a vectorized version
1731 of the function, or NULL_TREE if the function cannot be vectorized. */
1732
1733tree
538dd0b7 1734vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
ebfd146a
IR
1735{
1736 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1737
1738 /* We only handle functions that do not read or clobber memory -- i.e.
1739 const or novops ones. */
1740 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1741 return NULL_TREE;
1742
1743 if (!fndecl
1744 || TREE_CODE (fndecl) != FUNCTION_DECL
1745 || !DECL_BUILT_IN (fndecl))
1746 return NULL_TREE;
1747
62f7fd21 1748 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1749 vectype_in);
1750}
1751
5ce9450f
JJ
1752
1753static tree permute_vec_elements (tree, tree, tree, gimple,
1754 gimple_stmt_iterator *);
1755
1756
1757/* Function vectorizable_mask_load_store.
1758
1759 Check if STMT performs a conditional load or store that can be vectorized.
1760 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1761 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1762 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1763
1764static bool
1765vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1766 gimple *vec_stmt, slp_tree slp_node)
1767{
1768 tree vec_dest = NULL;
1769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1770 stmt_vec_info prev_stmt_info;
1771 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1772 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1773 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1774 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1775 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1776 tree elem_type;
1777 gimple new_stmt;
1778 tree dummy;
1779 tree dataref_ptr = NULL_TREE;
1780 gimple ptr_incr;
1781 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 int ncopies;
1783 int i, j;
1784 bool inv_p;
1785 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1786 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1787 int gather_scale = 1;
1788 enum vect_def_type gather_dt = vect_unknown_def_type;
1789 bool is_store;
1790 tree mask;
1791 gimple def_stmt;
1792 tree def;
1793 enum vect_def_type dt;
1794
1795 if (slp_node != NULL)
1796 return false;
1797
1798 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1799 gcc_assert (ncopies >= 1);
1800
1801 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1802 mask = gimple_call_arg (stmt, 2);
1803 if (TYPE_PRECISION (TREE_TYPE (mask))
1804 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1805 return false;
1806
1807 /* FORNOW. This restriction should be relaxed. */
1808 if (nested_in_vect_loop && ncopies > 1)
1809 {
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types in nested loop.");
1813 return false;
1814 }
1815
1816 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1817 return false;
1818
1819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1820 return false;
1821
1822 if (!STMT_VINFO_DATA_REF (stmt_info))
1823 return false;
1824
1825 elem_type = TREE_TYPE (vectype);
1826
1827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1828 return false;
1829
f2e2a985 1830 if (STMT_VINFO_STRIDED_P (stmt_info))
5ce9450f
JJ
1831 return false;
1832
1833 if (STMT_VINFO_GATHER_P (stmt_info))
1834 {
1835 gimple def_stmt;
1836 tree def;
1837 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1838 &gather_off, &gather_scale);
1839 gcc_assert (gather_decl);
1840 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1841 &def_stmt, &def, &gather_dt,
1842 &gather_off_vectype))
1843 {
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "gather index use not simple.");
1847 return false;
1848 }
03b9e8e4
JJ
1849
1850 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1851 tree masktype
1852 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1853 if (TREE_CODE (masktype) == INTEGER_TYPE)
1854 {
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857 "masked gather with integer mask not supported.");
1858 return false;
1859 }
5ce9450f
JJ
1860 }
1861 else if (tree_int_cst_compare (nested_in_vect_loop
1862 ? STMT_VINFO_DR_STEP (stmt_info)
1863 : DR_STEP (dr), size_zero_node) <= 0)
1864 return false;
1865 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1866 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1867 return false;
1868
1869 if (TREE_CODE (mask) != SSA_NAME)
1870 return false;
1871
1872 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1873 &def_stmt, &def, &dt))
1874 return false;
1875
1876 if (is_store)
1877 {
1878 tree rhs = gimple_call_arg (stmt, 3);
1879 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1880 &def_stmt, &def, &dt))
1881 return false;
1882 }
1883
1884 if (!vec_stmt) /* transformation not required. */
1885 {
1886 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1887 if (is_store)
1888 vect_model_store_cost (stmt_info, ncopies, false, dt,
1889 NULL, NULL, NULL);
1890 else
1891 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1892 return true;
1893 }
1894
1895 /** Transform. **/
1896
1897 if (STMT_VINFO_GATHER_P (stmt_info))
1898 {
1899 tree vec_oprnd0 = NULL_TREE, op;
1900 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1901 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1902 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1903 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1904 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1905 edge pe = loop_preheader_edge (loop);
1906 gimple_seq seq;
1907 basic_block new_bb;
1908 enum { NARROW, NONE, WIDEN } modifier;
1909 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1910
acdcd61b
JJ
1911 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1912 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1913 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 scaletype = TREE_VALUE (arglist);
1917 gcc_checking_assert (types_compatible_p (srctype, rettype)
1918 && types_compatible_p (srctype, masktype));
1919
5ce9450f
JJ
1920 if (nunits == gather_off_nunits)
1921 modifier = NONE;
1922 else if (nunits == gather_off_nunits / 2)
1923 {
1924 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1925 modifier = WIDEN;
1926
1927 for (i = 0; i < gather_off_nunits; ++i)
1928 sel[i] = i | nunits;
1929
557be5a8 1930 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1931 }
1932 else if (nunits == gather_off_nunits * 2)
1933 {
1934 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1935 modifier = NARROW;
1936
1937 for (i = 0; i < nunits; ++i)
1938 sel[i] = i < gather_off_nunits
1939 ? i : i + nunits - gather_off_nunits;
1940
557be5a8 1941 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1942 ncopies *= 2;
acdcd61b
JJ
1943 for (i = 0; i < nunits; ++i)
1944 sel[i] = i | gather_off_nunits;
557be5a8 1945 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1946 }
1947 else
1948 gcc_unreachable ();
1949
5ce9450f
JJ
1950 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1951
1952 ptr = fold_convert (ptrtype, gather_base);
1953 if (!is_gimple_min_invariant (ptr))
1954 {
1955 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1956 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1957 gcc_assert (!new_bb);
1958 }
1959
1960 scale = build_int_cst (scaletype, gather_scale);
1961
1962 prev_stmt_info = NULL;
1963 for (j = 0; j < ncopies; ++j)
1964 {
1965 if (modifier == WIDEN && (j & 1))
1966 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1967 perm_mask, stmt, gsi);
1968 else if (j == 0)
1969 op = vec_oprnd0
1970 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1971 else
1972 op = vec_oprnd0
1973 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1974
1975 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1976 {
1977 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1978 == TYPE_VECTOR_SUBPARTS (idxtype));
1979 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 1980 var = make_ssa_name (var);
5ce9450f
JJ
1981 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1982 new_stmt
0d0e4a03 1983 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985 op = var;
1986 }
1987
acdcd61b
JJ
1988 if (mask_perm_mask && (j & 1))
1989 mask_op = permute_vec_elements (mask_op, mask_op,
1990 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1991 else
1992 {
acdcd61b
JJ
1993 if (j == 0)
1994 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1995 else
1996 {
1997 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1998 &def_stmt, &def, &dt);
1999 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2000 }
5ce9450f 2001
acdcd61b
JJ
2002 mask_op = vec_mask;
2003 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2004 {
2005 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2006 == TYPE_VECTOR_SUBPARTS (masktype));
2007 var = vect_get_new_vect_var (masktype, vect_simple_var,
2008 NULL);
b731b390 2009 var = make_ssa_name (var);
acdcd61b
JJ
2010 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2011 new_stmt
0d0e4a03 2012 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2014 mask_op = var;
2015 }
5ce9450f
JJ
2016 }
2017
2018 new_stmt
2019 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2020 scale);
2021
2022 if (!useless_type_conversion_p (vectype, rettype))
2023 {
2024 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2025 == TYPE_VECTOR_SUBPARTS (rettype));
2026 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2027 op = make_ssa_name (var, new_stmt);
2028 gimple_call_set_lhs (new_stmt, op);
2029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2030 var = make_ssa_name (vec_dest);
5ce9450f 2031 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2032 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2033 }
2034 else
2035 {
2036 var = make_ssa_name (vec_dest, new_stmt);
2037 gimple_call_set_lhs (new_stmt, var);
2038 }
2039
2040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2041
2042 if (modifier == NARROW)
2043 {
2044 if ((j & 1) == 0)
2045 {
2046 prev_res = var;
2047 continue;
2048 }
2049 var = permute_vec_elements (prev_res, var,
2050 perm_mask, stmt, gsi);
2051 new_stmt = SSA_NAME_DEF_STMT (var);
2052 }
2053
2054 if (prev_stmt_info == NULL)
2055 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2056 else
2057 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2058 prev_stmt_info = vinfo_for_stmt (new_stmt);
2059 }
3efe2e2c
JJ
2060
2061 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2062 from the IL. */
2063 tree lhs = gimple_call_lhs (stmt);
2064 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2065 set_vinfo_for_stmt (new_stmt, stmt_info);
2066 set_vinfo_for_stmt (stmt, NULL);
2067 STMT_VINFO_STMT (stmt_info) = new_stmt;
2068 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2069 return true;
2070 }
2071 else if (is_store)
2072 {
2073 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2074 prev_stmt_info = NULL;
2075 for (i = 0; i < ncopies; i++)
2076 {
2077 unsigned align, misalign;
2078
2079 if (i == 0)
2080 {
2081 tree rhs = gimple_call_arg (stmt, 3);
2082 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2083 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2084 /* We should have catched mismatched types earlier. */
2085 gcc_assert (useless_type_conversion_p (vectype,
2086 TREE_TYPE (vec_rhs)));
2087 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2088 NULL_TREE, &dummy, gsi,
2089 &ptr_incr, false, &inv_p);
2090 gcc_assert (!inv_p);
2091 }
2092 else
2093 {
2094 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2095 &def, &dt);
2096 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2097 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2098 &def, &dt);
2099 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2100 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2101 TYPE_SIZE_UNIT (vectype));
2102 }
2103
2104 align = TYPE_ALIGN_UNIT (vectype);
2105 if (aligned_access_p (dr))
2106 misalign = 0;
2107 else if (DR_MISALIGNMENT (dr) == -1)
2108 {
2109 align = TYPE_ALIGN_UNIT (elem_type);
2110 misalign = 0;
2111 }
2112 else
2113 misalign = DR_MISALIGNMENT (dr);
2114 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2115 misalign);
2116 new_stmt
2117 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2118 gimple_call_arg (stmt, 1),
2119 vec_mask, vec_rhs);
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2121 if (i == 0)
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2123 else
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2126 }
2127 }
2128 else
2129 {
2130 tree vec_mask = NULL_TREE;
2131 prev_stmt_info = NULL;
2132 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2133 for (i = 0; i < ncopies; i++)
2134 {
2135 unsigned align, misalign;
2136
2137 if (i == 0)
2138 {
2139 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2140 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2141 NULL_TREE, &dummy, gsi,
2142 &ptr_incr, false, &inv_p);
2143 gcc_assert (!inv_p);
2144 }
2145 else
2146 {
2147 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2148 &def, &dt);
2149 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2150 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2151 TYPE_SIZE_UNIT (vectype));
2152 }
2153
2154 align = TYPE_ALIGN_UNIT (vectype);
2155 if (aligned_access_p (dr))
2156 misalign = 0;
2157 else if (DR_MISALIGNMENT (dr) == -1)
2158 {
2159 align = TYPE_ALIGN_UNIT (elem_type);
2160 misalign = 0;
2161 }
2162 else
2163 misalign = DR_MISALIGNMENT (dr);
2164 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2165 misalign);
2166 new_stmt
2167 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2168 gimple_call_arg (stmt, 1),
2169 vec_mask);
b731b390 2170 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2172 if (i == 0)
2173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2174 else
2175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2176 prev_stmt_info = vinfo_for_stmt (new_stmt);
2177 }
2178 }
2179
3efe2e2c
JJ
2180 if (!is_store)
2181 {
2182 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2183 from the IL. */
2184 tree lhs = gimple_call_lhs (stmt);
2185 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2186 set_vinfo_for_stmt (new_stmt, stmt_info);
2187 set_vinfo_for_stmt (stmt, NULL);
2188 STMT_VINFO_STMT (stmt_info) = new_stmt;
2189 gsi_replace (gsi, new_stmt, true);
2190 }
2191
5ce9450f
JJ
2192 return true;
2193}
2194
2195
ebfd146a
IR
2196/* Function vectorizable_call.
2197
538dd0b7 2198 Check if GS performs a function call that can be vectorized.
b8698a0f 2199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2202
2203static bool
538dd0b7 2204vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
190c2236 2205 slp_tree slp_node)
ebfd146a 2206{
538dd0b7 2207 gcall *stmt;
ebfd146a
IR
2208 tree vec_dest;
2209 tree scalar_dest;
2210 tree op, type;
2211 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2212 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2213 tree vectype_out, vectype_in;
2214 int nunits_in;
2215 int nunits_out;
2216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2217 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2218 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2219 gimple def_stmt;
0502fb85
UB
2220 enum vect_def_type dt[3]
2221 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2222 gimple new_stmt = NULL;
ebfd146a 2223 int ncopies, j;
6e1aa848 2224 vec<tree> vargs = vNULL;
ebfd146a
IR
2225 enum { NARROW, NONE, WIDEN } modifier;
2226 size_t i, nargs;
9d5e7640 2227 tree lhs;
ebfd146a 2228
190c2236 2229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2230 return false;
2231
8644a673 2232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2233 return false;
2234
538dd0b7
DM
2235 /* Is GS a vectorizable call? */
2236 stmt = dyn_cast <gcall *> (gs);
2237 if (!stmt)
ebfd146a
IR
2238 return false;
2239
5ce9450f
JJ
2240 if (gimple_call_internal_p (stmt)
2241 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2242 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2243 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2244 slp_node);
2245
0136f8f0
AH
2246 if (gimple_call_lhs (stmt) == NULL_TREE
2247 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2248 return false;
2249
0136f8f0 2250 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2251
b690cc0f
RG
2252 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2253
ebfd146a
IR
2254 /* Process function arguments. */
2255 rhs_type = NULL_TREE;
b690cc0f 2256 vectype_in = NULL_TREE;
ebfd146a
IR
2257 nargs = gimple_call_num_args (stmt);
2258
1b1562a5
MM
2259 /* Bail out if the function has more than three arguments, we do not have
2260 interesting builtin functions to vectorize with more than two arguments
2261 except for fma. No arguments is also not good. */
2262 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2263 return false;
2264
74bf76ed
JJ
2265 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2266 if (gimple_call_internal_p (stmt)
2267 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2268 {
2269 nargs = 0;
2270 rhs_type = unsigned_type_node;
2271 }
2272
ebfd146a
IR
2273 for (i = 0; i < nargs; i++)
2274 {
b690cc0f
RG
2275 tree opvectype;
2276
ebfd146a
IR
2277 op = gimple_call_arg (stmt, i);
2278
2279 /* We can only handle calls with arguments of the same type. */
2280 if (rhs_type
8533c9d8 2281 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2282 {
73fbfcad 2283 if (dump_enabled_p ())
78c60e3d 2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2285 "argument types differ.\n");
ebfd146a
IR
2286 return false;
2287 }
b690cc0f
RG
2288 if (!rhs_type)
2289 rhs_type = TREE_TYPE (op);
ebfd146a 2290
24ee1384 2291 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2292 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2293 {
73fbfcad 2294 if (dump_enabled_p ())
78c60e3d 2295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2296 "use not simple.\n");
ebfd146a
IR
2297 return false;
2298 }
ebfd146a 2299
b690cc0f
RG
2300 if (!vectype_in)
2301 vectype_in = opvectype;
2302 else if (opvectype
2303 && opvectype != vectype_in)
2304 {
73fbfcad 2305 if (dump_enabled_p ())
78c60e3d 2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2307 "argument vector types differ.\n");
b690cc0f
RG
2308 return false;
2309 }
2310 }
2311 /* If all arguments are external or constant defs use a vector type with
2312 the same size as the output vector type. */
ebfd146a 2313 if (!vectype_in)
b690cc0f 2314 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2315 if (vec_stmt)
2316 gcc_assert (vectype_in);
2317 if (!vectype_in)
2318 {
73fbfcad 2319 if (dump_enabled_p ())
7d8930a0 2320 {
78c60e3d
SS
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "no vectype for scalar type ");
2323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2324 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2325 }
2326
2327 return false;
2328 }
ebfd146a
IR
2329
2330 /* FORNOW */
b690cc0f
RG
2331 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2333 if (nunits_in == nunits_out / 2)
2334 modifier = NARROW;
2335 else if (nunits_out == nunits_in)
2336 modifier = NONE;
2337 else if (nunits_out == nunits_in / 2)
2338 modifier = WIDEN;
2339 else
2340 return false;
2341
2342 /* For now, we only vectorize functions if a target specific builtin
2343 is available. TODO -- in some cases, it might be profitable to
2344 insert the calls for pieces of the vector, in order to be able
2345 to vectorize other operations in the loop. */
2346 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2347 if (fndecl == NULL_TREE)
2348 {
74bf76ed
JJ
2349 if (gimple_call_internal_p (stmt)
2350 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2351 && !slp_node
2352 && loop_vinfo
2353 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2354 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2355 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2356 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2357 {
2358 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2359 { 0, 1, 2, ... vf - 1 } vector. */
2360 gcc_assert (nargs == 0);
2361 }
2362 else
2363 {
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2366 "function is not vectorizable.\n");
74bf76ed
JJ
2367 return false;
2368 }
ebfd146a
IR
2369 }
2370
5006671f 2371 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2372
190c2236
JJ
2373 if (slp_node || PURE_SLP_STMT (stmt_info))
2374 ncopies = 1;
2375 else if (modifier == NARROW)
ebfd146a
IR
2376 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2377 else
2378 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2379
2380 /* Sanity check: make sure that at least one copy of the vectorized stmt
2381 needs to be generated. */
2382 gcc_assert (ncopies >= 1);
2383
2384 if (!vec_stmt) /* transformation not required. */
2385 {
2386 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2387 if (dump_enabled_p ())
e645e942
TJ
2388 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2389 "\n");
c3e7ee41 2390 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2391 return true;
2392 }
2393
2394 /** Transform. **/
2395
73fbfcad 2396 if (dump_enabled_p ())
e645e942 2397 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2398
2399 /* Handle def. */
2400 scalar_dest = gimple_call_lhs (stmt);
2401 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2402
2403 prev_stmt_info = NULL;
2404 switch (modifier)
2405 {
2406 case NONE:
2407 for (j = 0; j < ncopies; ++j)
2408 {
2409 /* Build argument list for the vectorized call. */
2410 if (j == 0)
9771b263 2411 vargs.create (nargs);
ebfd146a 2412 else
9771b263 2413 vargs.truncate (0);
ebfd146a 2414
190c2236
JJ
2415 if (slp_node)
2416 {
ef062b13 2417 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2418 vec<tree> vec_oprnds0;
190c2236
JJ
2419
2420 for (i = 0; i < nargs; i++)
9771b263 2421 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2422 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2423 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2424
2425 /* Arguments are ready. Create the new vector stmt. */
9771b263 2426 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2427 {
2428 size_t k;
2429 for (k = 0; k < nargs; k++)
2430 {
37b5ec8f 2431 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2432 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2433 }
2434 new_stmt = gimple_build_call_vec (fndecl, vargs);
2435 new_temp = make_ssa_name (vec_dest, new_stmt);
2436 gimple_call_set_lhs (new_stmt, new_temp);
2437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2438 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2439 }
2440
2441 for (i = 0; i < nargs; i++)
2442 {
37b5ec8f 2443 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2444 vec_oprndsi.release ();
190c2236 2445 }
190c2236
JJ
2446 continue;
2447 }
2448
ebfd146a
IR
2449 for (i = 0; i < nargs; i++)
2450 {
2451 op = gimple_call_arg (stmt, i);
2452 if (j == 0)
2453 vec_oprnd0
2454 = vect_get_vec_def_for_operand (op, stmt, NULL);
2455 else
63827fb8
IR
2456 {
2457 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2458 vec_oprnd0
2459 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2460 }
ebfd146a 2461
9771b263 2462 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2463 }
2464
74bf76ed
JJ
2465 if (gimple_call_internal_p (stmt)
2466 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2467 {
2468 tree *v = XALLOCAVEC (tree, nunits_out);
2469 int k;
2470 for (k = 0; k < nunits_out; ++k)
2471 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2472 tree cst = build_vector (vectype_out, v);
2473 tree new_var
2474 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2475 gimple init_stmt = gimple_build_assign (new_var, cst);
2476 new_temp = make_ssa_name (new_var, init_stmt);
2477 gimple_assign_set_lhs (init_stmt, new_temp);
2478 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2479 new_temp = make_ssa_name (vec_dest);
74bf76ed
JJ
2480 new_stmt = gimple_build_assign (new_temp,
2481 gimple_assign_lhs (init_stmt));
2482 }
2483 else
2484 {
2485 new_stmt = gimple_build_call_vec (fndecl, vargs);
2486 new_temp = make_ssa_name (vec_dest, new_stmt);
2487 gimple_call_set_lhs (new_stmt, new_temp);
2488 }
ebfd146a
IR
2489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2490
2491 if (j == 0)
2492 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2493 else
2494 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2495
2496 prev_stmt_info = vinfo_for_stmt (new_stmt);
2497 }
2498
2499 break;
2500
2501 case NARROW:
2502 for (j = 0; j < ncopies; ++j)
2503 {
2504 /* Build argument list for the vectorized call. */
2505 if (j == 0)
9771b263 2506 vargs.create (nargs * 2);
ebfd146a 2507 else
9771b263 2508 vargs.truncate (0);
ebfd146a 2509
190c2236
JJ
2510 if (slp_node)
2511 {
ef062b13 2512 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2513 vec<tree> vec_oprnds0;
190c2236
JJ
2514
2515 for (i = 0; i < nargs; i++)
9771b263 2516 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2517 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2518 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2519
2520 /* Arguments are ready. Create the new vector stmt. */
9771b263 2521 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2522 {
2523 size_t k;
9771b263 2524 vargs.truncate (0);
190c2236
JJ
2525 for (k = 0; k < nargs; k++)
2526 {
37b5ec8f 2527 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2528 vargs.quick_push (vec_oprndsk[i]);
2529 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2530 }
2531 new_stmt = gimple_build_call_vec (fndecl, vargs);
2532 new_temp = make_ssa_name (vec_dest, new_stmt);
2533 gimple_call_set_lhs (new_stmt, new_temp);
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2535 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2536 }
2537
2538 for (i = 0; i < nargs; i++)
2539 {
37b5ec8f 2540 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2541 vec_oprndsi.release ();
190c2236 2542 }
190c2236
JJ
2543 continue;
2544 }
2545
ebfd146a
IR
2546 for (i = 0; i < nargs; i++)
2547 {
2548 op = gimple_call_arg (stmt, i);
2549 if (j == 0)
2550 {
2551 vec_oprnd0
2552 = vect_get_vec_def_for_operand (op, stmt, NULL);
2553 vec_oprnd1
63827fb8 2554 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2555 }
2556 else
2557 {
336ecb65 2558 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2559 vec_oprnd0
63827fb8 2560 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2561 vec_oprnd1
63827fb8 2562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2563 }
2564
9771b263
DN
2565 vargs.quick_push (vec_oprnd0);
2566 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2567 }
2568
2569 new_stmt = gimple_build_call_vec (fndecl, vargs);
2570 new_temp = make_ssa_name (vec_dest, new_stmt);
2571 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2573
2574 if (j == 0)
2575 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2576 else
2577 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2578
2579 prev_stmt_info = vinfo_for_stmt (new_stmt);
2580 }
2581
2582 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2583
2584 break;
2585
2586 case WIDEN:
2587 /* No current target implements this case. */
2588 return false;
2589 }
2590
9771b263 2591 vargs.release ();
ebfd146a 2592
ebfd146a
IR
2593 /* The call in STMT might prevent it from being removed in dce.
2594 We however cannot remove it here, due to the way the ssa name
2595 it defines is mapped to the new definition. So just replace
2596 rhs of the statement with something harmless. */
2597
dd34c087
JJ
2598 if (slp_node)
2599 return true;
2600
ebfd146a 2601 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2602 if (is_pattern_stmt_p (stmt_info))
2603 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2604 else
2605 lhs = gimple_call_lhs (stmt);
2606 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2607 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2608 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2609 STMT_VINFO_STMT (stmt_info) = new_stmt;
2610 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2611
2612 return true;
2613}
2614
2615
0136f8f0
AH
2616struct simd_call_arg_info
2617{
2618 tree vectype;
2619 tree op;
2620 enum vect_def_type dt;
2621 HOST_WIDE_INT linear_step;
2622 unsigned int align;
2623};
2624
2625/* Function vectorizable_simd_clone_call.
2626
2627 Check if STMT performs a function call that can be vectorized
2628 by calling a simd clone of the function.
2629 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2630 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2631 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2632
2633static bool
2634vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2635 gimple *vec_stmt, slp_tree slp_node)
2636{
2637 tree vec_dest;
2638 tree scalar_dest;
2639 tree op, type;
2640 tree vec_oprnd0 = NULL_TREE;
2641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2642 tree vectype;
2643 unsigned int nunits;
2644 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2645 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2646 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2647 tree fndecl, new_temp, def;
2648 gimple def_stmt;
2649 gimple new_stmt = NULL;
2650 int ncopies, j;
2651 vec<simd_call_arg_info> arginfo = vNULL;
2652 vec<tree> vargs = vNULL;
2653 size_t i, nargs;
2654 tree lhs, rtype, ratype;
2655 vec<constructor_elt, va_gc> *ret_ctor_elts;
2656
2657 /* Is STMT a vectorizable call? */
2658 if (!is_gimple_call (stmt))
2659 return false;
2660
2661 fndecl = gimple_call_fndecl (stmt);
2662 if (fndecl == NULL_TREE)
2663 return false;
2664
d52f5295 2665 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2666 if (node == NULL || node->simd_clones == NULL)
2667 return false;
2668
2669 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2670 return false;
2671
2672 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2673 return false;
2674
2675 if (gimple_call_lhs (stmt)
2676 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2677 return false;
2678
2679 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2680
2681 vectype = STMT_VINFO_VECTYPE (stmt_info);
2682
2683 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2684 return false;
2685
2686 /* FORNOW */
2687 if (slp_node || PURE_SLP_STMT (stmt_info))
2688 return false;
2689
2690 /* Process function arguments. */
2691 nargs = gimple_call_num_args (stmt);
2692
2693 /* Bail out if the function has zero arguments. */
2694 if (nargs == 0)
2695 return false;
2696
2697 arginfo.create (nargs);
2698
2699 for (i = 0; i < nargs; i++)
2700 {
2701 simd_call_arg_info thisarginfo;
2702 affine_iv iv;
2703
2704 thisarginfo.linear_step = 0;
2705 thisarginfo.align = 0;
2706 thisarginfo.op = NULL_TREE;
2707
2708 op = gimple_call_arg (stmt, i);
2709 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2710 &def_stmt, &def, &thisarginfo.dt,
2711 &thisarginfo.vectype)
2712 || thisarginfo.dt == vect_uninitialized_def)
2713 {
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "use not simple.\n");
2717 arginfo.release ();
2718 return false;
2719 }
2720
2721 if (thisarginfo.dt == vect_constant_def
2722 || thisarginfo.dt == vect_external_def)
2723 gcc_assert (thisarginfo.vectype == NULL_TREE);
2724 else
2725 gcc_assert (thisarginfo.vectype != NULL_TREE);
2726
6c9e85fb
JJ
2727 /* For linear arguments, the analyze phase should have saved
2728 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2729 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2730 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2731 {
2732 gcc_assert (vec_stmt);
2733 thisarginfo.linear_step
2734 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2735 thisarginfo.op
2736 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2737 /* If loop has been peeled for alignment, we need to adjust it. */
2738 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2739 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2740 if (n1 != n2)
2741 {
2742 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2743 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2744 tree opt = TREE_TYPE (thisarginfo.op);
2745 bias = fold_convert (TREE_TYPE (step), bias);
2746 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2747 thisarginfo.op
2748 = fold_build2 (POINTER_TYPE_P (opt)
2749 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2750 thisarginfo.op, bias);
2751 }
2752 }
2753 else if (!vec_stmt
2754 && thisarginfo.dt != vect_constant_def
2755 && thisarginfo.dt != vect_external_def
2756 && loop_vinfo
2757 && TREE_CODE (op) == SSA_NAME
2758 && simple_iv (loop, loop_containing_stmt (stmt), op,
2759 &iv, false)
2760 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2761 {
2762 thisarginfo.linear_step = tree_to_shwi (iv.step);
2763 thisarginfo.op = iv.base;
2764 }
2765 else if ((thisarginfo.dt == vect_constant_def
2766 || thisarginfo.dt == vect_external_def)
2767 && POINTER_TYPE_P (TREE_TYPE (op)))
2768 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2769
2770 arginfo.quick_push (thisarginfo);
2771 }
2772
2773 unsigned int badness = 0;
2774 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2775 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2776 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2777 else
2778 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2779 n = n->simdclone->next_clone)
2780 {
2781 unsigned int this_badness = 0;
2782 if (n->simdclone->simdlen
2783 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2784 || n->simdclone->nargs != nargs)
2785 continue;
2786 if (n->simdclone->simdlen
2787 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2788 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789 - exact_log2 (n->simdclone->simdlen)) * 1024;
2790 if (n->simdclone->inbranch)
2791 this_badness += 2048;
2792 int target_badness = targetm.simd_clone.usable (n);
2793 if (target_badness < 0)
2794 continue;
2795 this_badness += target_badness * 512;
2796 /* FORNOW: Have to add code to add the mask argument. */
2797 if (n->simdclone->inbranch)
2798 continue;
2799 for (i = 0; i < nargs; i++)
2800 {
2801 switch (n->simdclone->args[i].arg_type)
2802 {
2803 case SIMD_CLONE_ARG_TYPE_VECTOR:
2804 if (!useless_type_conversion_p
2805 (n->simdclone->args[i].orig_type,
2806 TREE_TYPE (gimple_call_arg (stmt, i))))
2807 i = -1;
2808 else if (arginfo[i].dt == vect_constant_def
2809 || arginfo[i].dt == vect_external_def
2810 || arginfo[i].linear_step)
2811 this_badness += 64;
2812 break;
2813 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2814 if (arginfo[i].dt != vect_constant_def
2815 && arginfo[i].dt != vect_external_def)
2816 i = -1;
2817 break;
2818 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2819 if (arginfo[i].dt == vect_constant_def
2820 || arginfo[i].dt == vect_external_def
2821 || (arginfo[i].linear_step
2822 != n->simdclone->args[i].linear_step))
2823 i = -1;
2824 break;
2825 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2826 /* FORNOW */
2827 i = -1;
2828 break;
2829 case SIMD_CLONE_ARG_TYPE_MASK:
2830 gcc_unreachable ();
2831 }
2832 if (i == (size_t) -1)
2833 break;
2834 if (n->simdclone->args[i].alignment > arginfo[i].align)
2835 {
2836 i = -1;
2837 break;
2838 }
2839 if (arginfo[i].align)
2840 this_badness += (exact_log2 (arginfo[i].align)
2841 - exact_log2 (n->simdclone->args[i].alignment));
2842 }
2843 if (i == (size_t) -1)
2844 continue;
2845 if (bestn == NULL || this_badness < badness)
2846 {
2847 bestn = n;
2848 badness = this_badness;
2849 }
2850 }
2851
2852 if (bestn == NULL)
2853 {
2854 arginfo.release ();
2855 return false;
2856 }
2857
2858 for (i = 0; i < nargs; i++)
2859 if ((arginfo[i].dt == vect_constant_def
2860 || arginfo[i].dt == vect_external_def)
2861 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2862 {
2863 arginfo[i].vectype
2864 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2865 i)));
2866 if (arginfo[i].vectype == NULL
2867 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2868 > bestn->simdclone->simdlen))
2869 {
2870 arginfo.release ();
2871 return false;
2872 }
2873 }
2874
2875 fndecl = bestn->decl;
2876 nunits = bestn->simdclone->simdlen;
2877 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2878
2879 /* If the function isn't const, only allow it in simd loops where user
2880 has asserted that at least nunits consecutive iterations can be
2881 performed using SIMD instructions. */
2882 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2883 && gimple_vuse (stmt))
2884 {
2885 arginfo.release ();
2886 return false;
2887 }
2888
2889 /* Sanity check: make sure that at least one copy of the vectorized stmt
2890 needs to be generated. */
2891 gcc_assert (ncopies >= 1);
2892
2893 if (!vec_stmt) /* transformation not required. */
2894 {
6c9e85fb
JJ
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2896 for (i = 0; i < nargs; i++)
2897 if (bestn->simdclone->args[i].arg_type
2898 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2899 {
2900 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2901 + 1);
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2903 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2904 ? size_type_node : TREE_TYPE (arginfo[i].op);
2905 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2906 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2907 }
0136f8f0
AH
2908 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE, vect_location,
2911 "=== vectorizable_simd_clone_call ===\n");
2912/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2913 arginfo.release ();
2914 return true;
2915 }
2916
2917 /** Transform. **/
2918
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2921
2922 /* Handle def. */
2923 scalar_dest = gimple_call_lhs (stmt);
2924 vec_dest = NULL_TREE;
2925 rtype = NULL_TREE;
2926 ratype = NULL_TREE;
2927 if (scalar_dest)
2928 {
2929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2930 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2931 if (TREE_CODE (rtype) == ARRAY_TYPE)
2932 {
2933 ratype = rtype;
2934 rtype = TREE_TYPE (ratype);
2935 }
2936 }
2937
2938 prev_stmt_info = NULL;
2939 for (j = 0; j < ncopies; ++j)
2940 {
2941 /* Build argument list for the vectorized call. */
2942 if (j == 0)
2943 vargs.create (nargs);
2944 else
2945 vargs.truncate (0);
2946
2947 for (i = 0; i < nargs; i++)
2948 {
2949 unsigned int k, l, m, o;
2950 tree atype;
2951 op = gimple_call_arg (stmt, i);
2952 switch (bestn->simdclone->args[i].arg_type)
2953 {
2954 case SIMD_CLONE_ARG_TYPE_VECTOR:
2955 atype = bestn->simdclone->args[i].vector_type;
2956 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2957 for (m = j * o; m < (j + 1) * o; m++)
2958 {
2959 if (TYPE_VECTOR_SUBPARTS (atype)
2960 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2961 {
2962 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2963 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2964 / TYPE_VECTOR_SUBPARTS (atype));
2965 gcc_assert ((k & (k - 1)) == 0);
2966 if (m == 0)
2967 vec_oprnd0
2968 = vect_get_vec_def_for_operand (op, stmt, NULL);
2969 else
2970 {
2971 vec_oprnd0 = arginfo[i].op;
2972 if ((m & (k - 1)) == 0)
2973 vec_oprnd0
2974 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2975 vec_oprnd0);
2976 }
2977 arginfo[i].op = vec_oprnd0;
2978 vec_oprnd0
2979 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2980 size_int (prec),
2981 bitsize_int ((m & (k - 1)) * prec));
2982 new_stmt
b731b390 2983 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
2984 vec_oprnd0);
2985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2986 vargs.safe_push (gimple_assign_lhs (new_stmt));
2987 }
2988 else
2989 {
2990 k = (TYPE_VECTOR_SUBPARTS (atype)
2991 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2992 gcc_assert ((k & (k - 1)) == 0);
2993 vec<constructor_elt, va_gc> *ctor_elts;
2994 if (k != 1)
2995 vec_alloc (ctor_elts, k);
2996 else
2997 ctor_elts = NULL;
2998 for (l = 0; l < k; l++)
2999 {
3000 if (m == 0 && l == 0)
3001 vec_oprnd0
3002 = vect_get_vec_def_for_operand (op, stmt, NULL);
3003 else
3004 vec_oprnd0
3005 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3006 arginfo[i].op);
3007 arginfo[i].op = vec_oprnd0;
3008 if (k == 1)
3009 break;
3010 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3011 vec_oprnd0);
3012 }
3013 if (k == 1)
3014 vargs.safe_push (vec_oprnd0);
3015 else
3016 {
3017 vec_oprnd0 = build_constructor (atype, ctor_elts);
3018 new_stmt
b731b390 3019 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3020 vec_oprnd0);
3021 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3022 vargs.safe_push (gimple_assign_lhs (new_stmt));
3023 }
3024 }
3025 }
3026 break;
3027 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3028 vargs.safe_push (op);
3029 break;
3030 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3031 if (j == 0)
3032 {
3033 gimple_seq stmts;
3034 arginfo[i].op
3035 = force_gimple_operand (arginfo[i].op, &stmts, true,
3036 NULL_TREE);
3037 if (stmts != NULL)
3038 {
3039 basic_block new_bb;
3040 edge pe = loop_preheader_edge (loop);
3041 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3042 gcc_assert (!new_bb);
3043 }
b731b390 3044 tree phi_res = copy_ssa_name (op);
538dd0b7 3045 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0
AH
3046 set_vinfo_for_stmt (new_phi,
3047 new_stmt_vec_info (new_phi, loop_vinfo,
3048 NULL));
3049 add_phi_arg (new_phi, arginfo[i].op,
3050 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3051 enum tree_code code
3052 = POINTER_TYPE_P (TREE_TYPE (op))
3053 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3054 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3055 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3056 widest_int cst
3057 = wi::mul (bestn->simdclone->args[i].linear_step,
3058 ncopies * nunits);
3059 tree tcst = wide_int_to_tree (type, cst);
b731b390 3060 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3061 new_stmt
3062 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3063 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3064 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3065 set_vinfo_for_stmt (new_stmt,
3066 new_stmt_vec_info (new_stmt, loop_vinfo,
3067 NULL));
3068 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3069 UNKNOWN_LOCATION);
3070 arginfo[i].op = phi_res;
3071 vargs.safe_push (phi_res);
3072 }
3073 else
3074 {
3075 enum tree_code code
3076 = POINTER_TYPE_P (TREE_TYPE (op))
3077 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3078 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3079 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3080 widest_int cst
3081 = wi::mul (bestn->simdclone->args[i].linear_step,
3082 j * nunits);
3083 tree tcst = wide_int_to_tree (type, cst);
b731b390 3084 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3085 new_stmt = gimple_build_assign (new_temp, code,
3086 arginfo[i].op, tcst);
0136f8f0
AH
3087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3088 vargs.safe_push (new_temp);
3089 }
3090 break;
3091 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3092 default:
3093 gcc_unreachable ();
3094 }
3095 }
3096
3097 new_stmt = gimple_build_call_vec (fndecl, vargs);
3098 if (vec_dest)
3099 {
3100 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3101 if (ratype)
b731b390 3102 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3103 else if (TYPE_VECTOR_SUBPARTS (vectype)
3104 == TYPE_VECTOR_SUBPARTS (rtype))
3105 new_temp = make_ssa_name (vec_dest, new_stmt);
3106 else
3107 new_temp = make_ssa_name (rtype, new_stmt);
3108 gimple_call_set_lhs (new_stmt, new_temp);
3109 }
3110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3111
3112 if (vec_dest)
3113 {
3114 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3115 {
3116 unsigned int k, l;
3117 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3118 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3119 gcc_assert ((k & (k - 1)) == 0);
3120 for (l = 0; l < k; l++)
3121 {
3122 tree t;
3123 if (ratype)
3124 {
3125 t = build_fold_addr_expr (new_temp);
3126 t = build2 (MEM_REF, vectype, t,
3127 build_int_cst (TREE_TYPE (t),
3128 l * prec / BITS_PER_UNIT));
3129 }
3130 else
3131 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3132 size_int (prec), bitsize_int (l * prec));
3133 new_stmt
b731b390 3134 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3136 if (j == 0 && l == 0)
3137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3138 else
3139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3140
3141 prev_stmt_info = vinfo_for_stmt (new_stmt);
3142 }
3143
3144 if (ratype)
3145 {
3146 tree clobber = build_constructor (ratype, NULL);
3147 TREE_THIS_VOLATILE (clobber) = 1;
3148 new_stmt = gimple_build_assign (new_temp, clobber);
3149 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3150 }
3151 continue;
3152 }
3153 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3154 {
3155 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3156 / TYPE_VECTOR_SUBPARTS (rtype));
3157 gcc_assert ((k & (k - 1)) == 0);
3158 if ((j & (k - 1)) == 0)
3159 vec_alloc (ret_ctor_elts, k);
3160 if (ratype)
3161 {
3162 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3163 for (m = 0; m < o; m++)
3164 {
3165 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3166 size_int (m), NULL_TREE, NULL_TREE);
3167 new_stmt
b731b390 3168 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3170 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3171 gimple_assign_lhs (new_stmt));
3172 }
3173 tree clobber = build_constructor (ratype, NULL);
3174 TREE_THIS_VOLATILE (clobber) = 1;
3175 new_stmt = gimple_build_assign (new_temp, clobber);
3176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3177 }
3178 else
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3180 if ((j & (k - 1)) != k - 1)
3181 continue;
3182 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3183 new_stmt
b731b390 3184 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3186
3187 if ((unsigned) j == k - 1)
3188 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3189 else
3190 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3191
3192 prev_stmt_info = vinfo_for_stmt (new_stmt);
3193 continue;
3194 }
3195 else if (ratype)
3196 {
3197 tree t = build_fold_addr_expr (new_temp);
3198 t = build2 (MEM_REF, vectype, t,
3199 build_int_cst (TREE_TYPE (t), 0));
3200 new_stmt
b731b390 3201 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3203 tree clobber = build_constructor (ratype, NULL);
3204 TREE_THIS_VOLATILE (clobber) = 1;
3205 vect_finish_stmt_generation (stmt,
3206 gimple_build_assign (new_temp,
3207 clobber), gsi);
3208 }
3209 }
3210
3211 if (j == 0)
3212 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3213 else
3214 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3215
3216 prev_stmt_info = vinfo_for_stmt (new_stmt);
3217 }
3218
3219 vargs.release ();
3220
3221 /* The call in STMT might prevent it from being removed in dce.
3222 We however cannot remove it here, due to the way the ssa name
3223 it defines is mapped to the new definition. So just replace
3224 rhs of the statement with something harmless. */
3225
3226 if (slp_node)
3227 return true;
3228
3229 if (scalar_dest)
3230 {
3231 type = TREE_TYPE (scalar_dest);
3232 if (is_pattern_stmt_p (stmt_info))
3233 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3234 else
3235 lhs = gimple_call_lhs (stmt);
3236 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3237 }
3238 else
3239 new_stmt = gimple_build_nop ();
3240 set_vinfo_for_stmt (new_stmt, stmt_info);
3241 set_vinfo_for_stmt (stmt, NULL);
3242 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3243 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3244 unlink_stmt_vdef (stmt);
3245
3246 return true;
3247}
3248
3249
ebfd146a
IR
3250/* Function vect_gen_widened_results_half
3251
3252 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3253 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3254 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3255 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3256 needs to be created (DECL is a function-decl of a target-builtin).
3257 STMT is the original scalar stmt that we are vectorizing. */
3258
3259static gimple
3260vect_gen_widened_results_half (enum tree_code code,
3261 tree decl,
3262 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3263 tree vec_dest, gimple_stmt_iterator *gsi,
3264 gimple stmt)
b8698a0f 3265{
ebfd146a 3266 gimple new_stmt;
b8698a0f
L
3267 tree new_temp;
3268
3269 /* Generate half of the widened result: */
3270 if (code == CALL_EXPR)
3271 {
3272 /* Target specific support */
ebfd146a
IR
3273 if (op_type == binary_op)
3274 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3275 else
3276 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3277 new_temp = make_ssa_name (vec_dest, new_stmt);
3278 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3279 }
3280 else
ebfd146a 3281 {
b8698a0f
L
3282 /* Generic support */
3283 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3284 if (op_type != binary_op)
3285 vec_oprnd1 = NULL;
0d0e4a03 3286 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3287 new_temp = make_ssa_name (vec_dest, new_stmt);
3288 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3289 }
ebfd146a
IR
3290 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3291
ebfd146a
IR
3292 return new_stmt;
3293}
3294
4a00c761
JJ
3295
3296/* Get vectorized definitions for loop-based vectorization. For the first
3297 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3298 scalar operand), and for the rest we get a copy with
3299 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3300 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3301 The vectors are collected into VEC_OPRNDS. */
3302
3303static void
3304vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3305 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3306{
3307 tree vec_oprnd;
3308
3309 /* Get first vector operand. */
3310 /* All the vector operands except the very first one (that is scalar oprnd)
3311 are stmt copies. */
3312 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3313 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3314 else
3315 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3316
9771b263 3317 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3318
3319 /* Get second vector operand. */
3320 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3321 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3322
3323 *oprnd = vec_oprnd;
3324
3325 /* For conversion in multiple steps, continue to get operands
3326 recursively. */
3327 if (multi_step_cvt)
3328 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3329}
3330
3331
3332/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3333 For multi-step conversions store the resulting vectors and call the function
3334 recursively. */
3335
3336static void
9771b263 3337vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3338 int multi_step_cvt, gimple stmt,
9771b263 3339 vec<tree> vec_dsts,
4a00c761
JJ
3340 gimple_stmt_iterator *gsi,
3341 slp_tree slp_node, enum tree_code code,
3342 stmt_vec_info *prev_stmt_info)
3343{
3344 unsigned int i;
3345 tree vop0, vop1, new_tmp, vec_dest;
3346 gimple new_stmt;
3347 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3348
9771b263 3349 vec_dest = vec_dsts.pop ();
4a00c761 3350
9771b263 3351 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3352 {
3353 /* Create demotion operation. */
9771b263
DN
3354 vop0 = (*vec_oprnds)[i];
3355 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3356 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3357 new_tmp = make_ssa_name (vec_dest, new_stmt);
3358 gimple_assign_set_lhs (new_stmt, new_tmp);
3359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3360
3361 if (multi_step_cvt)
3362 /* Store the resulting vector for next recursive call. */
9771b263 3363 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3364 else
3365 {
3366 /* This is the last step of the conversion sequence. Store the
3367 vectors in SLP_NODE or in vector info of the scalar statement
3368 (or in STMT_VINFO_RELATED_STMT chain). */
3369 if (slp_node)
9771b263 3370 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3371 else
c689ce1e
RB
3372 {
3373 if (!*prev_stmt_info)
3374 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3375 else
3376 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3377
c689ce1e
RB
3378 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3379 }
4a00c761
JJ
3380 }
3381 }
3382
3383 /* For multi-step demotion operations we first generate demotion operations
3384 from the source type to the intermediate types, and then combine the
3385 results (stored in VEC_OPRNDS) in demotion operation to the destination
3386 type. */
3387 if (multi_step_cvt)
3388 {
3389 /* At each level of recursion we have half of the operands we had at the
3390 previous level. */
9771b263 3391 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3392 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3393 stmt, vec_dsts, gsi, slp_node,
3394 VEC_PACK_TRUNC_EXPR,
3395 prev_stmt_info);
3396 }
3397
9771b263 3398 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3399}
3400
3401
3402/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3403 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3404 the resulting vectors and call the function recursively. */
3405
3406static void
9771b263
DN
3407vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3408 vec<tree> *vec_oprnds1,
4a00c761
JJ
3409 gimple stmt, tree vec_dest,
3410 gimple_stmt_iterator *gsi,
3411 enum tree_code code1,
3412 enum tree_code code2, tree decl1,
3413 tree decl2, int op_type)
3414{
3415 int i;
3416 tree vop0, vop1, new_tmp1, new_tmp2;
3417 gimple new_stmt1, new_stmt2;
6e1aa848 3418 vec<tree> vec_tmp = vNULL;
4a00c761 3419
9771b263
DN
3420 vec_tmp.create (vec_oprnds0->length () * 2);
3421 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3422 {
3423 if (op_type == binary_op)
9771b263 3424 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3425 else
3426 vop1 = NULL_TREE;
3427
3428 /* Generate the two halves of promotion operation. */
3429 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3430 op_type, vec_dest, gsi, stmt);
3431 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3432 op_type, vec_dest, gsi, stmt);
3433 if (is_gimple_call (new_stmt1))
3434 {
3435 new_tmp1 = gimple_call_lhs (new_stmt1);
3436 new_tmp2 = gimple_call_lhs (new_stmt2);
3437 }
3438 else
3439 {
3440 new_tmp1 = gimple_assign_lhs (new_stmt1);
3441 new_tmp2 = gimple_assign_lhs (new_stmt2);
3442 }
3443
3444 /* Store the results for the next step. */
9771b263
DN
3445 vec_tmp.quick_push (new_tmp1);
3446 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3447 }
3448
689eaba3 3449 vec_oprnds0->release ();
4a00c761
JJ
3450 *vec_oprnds0 = vec_tmp;
3451}
3452
3453
b8698a0f
L
3454/* Check if STMT performs a conversion operation, that can be vectorized.
3455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3456 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3458
3459static bool
3460vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3461 gimple *vec_stmt, slp_tree slp_node)
3462{
3463 tree vec_dest;
3464 tree scalar_dest;
4a00c761 3465 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3466 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3467 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3468 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3469 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3470 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3471 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3472 tree new_temp;
3473 tree def;
3474 gimple def_stmt;
3475 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3476 gimple new_stmt = NULL;
3477 stmt_vec_info prev_stmt_info;
3478 int nunits_in;
3479 int nunits_out;
3480 tree vectype_out, vectype_in;
4a00c761
JJ
3481 int ncopies, i, j;
3482 tree lhs_type, rhs_type;
ebfd146a 3483 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3484 vec<tree> vec_oprnds0 = vNULL;
3485 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3486 tree vop0;
4a00c761
JJ
3487 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3488 int multi_step_cvt = 0;
6e1aa848
DN
3489 vec<tree> vec_dsts = vNULL;
3490 vec<tree> interm_types = vNULL;
4a00c761
JJ
3491 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3492 int op_type;
ef4bddc2 3493 machine_mode rhs_mode;
4a00c761 3494 unsigned short fltsz;
ebfd146a
IR
3495
3496 /* Is STMT a vectorizable conversion? */
3497
4a00c761 3498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3499 return false;
3500
8644a673 3501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3502 return false;
3503
3504 if (!is_gimple_assign (stmt))
3505 return false;
3506
3507 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3508 return false;
3509
3510 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3511 if (!CONVERT_EXPR_CODE_P (code)
3512 && code != FIX_TRUNC_EXPR
3513 && code != FLOAT_EXPR
3514 && code != WIDEN_MULT_EXPR
3515 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3516 return false;
3517
4a00c761
JJ
3518 op_type = TREE_CODE_LENGTH (code);
3519
ebfd146a 3520 /* Check types of lhs and rhs. */
b690cc0f 3521 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3522 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3523 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3524
ebfd146a
IR
3525 op0 = gimple_assign_rhs1 (stmt);
3526 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3527
3528 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3529 && !((INTEGRAL_TYPE_P (lhs_type)
3530 && INTEGRAL_TYPE_P (rhs_type))
3531 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3532 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3533 return false;
3534
3535 if ((INTEGRAL_TYPE_P (lhs_type)
3536 && (TYPE_PRECISION (lhs_type)
3537 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3538 || (INTEGRAL_TYPE_P (rhs_type)
3539 && (TYPE_PRECISION (rhs_type)
3540 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3541 {
73fbfcad 3542 if (dump_enabled_p ())
78c60e3d 3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3544 "type conversion to/from bit-precision unsupported."
3545 "\n");
4a00c761
JJ
3546 return false;
3547 }
3548
b690cc0f 3549 /* Check the operands of the operation. */
24ee1384 3550 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3551 &def_stmt, &def, &dt[0], &vectype_in))
3552 {
73fbfcad 3553 if (dump_enabled_p ())
78c60e3d 3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3555 "use not simple.\n");
b690cc0f
RG
3556 return false;
3557 }
4a00c761
JJ
3558 if (op_type == binary_op)
3559 {
3560 bool ok;
3561
3562 op1 = gimple_assign_rhs2 (stmt);
3563 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3564 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3565 OP1. */
3566 if (CONSTANT_CLASS_P (op0))
f5709183 3567 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3568 &def_stmt, &def, &dt[1], &vectype_in);
3569 else
f5709183 3570 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3571 &def, &dt[1]);
4a00c761
JJ
3572
3573 if (!ok)
3574 {
73fbfcad 3575 if (dump_enabled_p ())
78c60e3d 3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3577 "use not simple.\n");
4a00c761
JJ
3578 return false;
3579 }
3580 }
3581
b690cc0f
RG
3582 /* If op0 is an external or constant defs use a vector type of
3583 the same size as the output vector type. */
ebfd146a 3584 if (!vectype_in)
b690cc0f 3585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3586 if (vec_stmt)
3587 gcc_assert (vectype_in);
3588 if (!vectype_in)
3589 {
73fbfcad 3590 if (dump_enabled_p ())
4a00c761 3591 {
78c60e3d
SS
3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3593 "no vectype for scalar type ");
3594 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3595 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3596 }
7d8930a0
IR
3597
3598 return false;
3599 }
ebfd146a 3600
b690cc0f
RG
3601 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3602 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3603 if (nunits_in < nunits_out)
ebfd146a
IR
3604 modifier = NARROW;
3605 else if (nunits_out == nunits_in)
3606 modifier = NONE;
ebfd146a 3607 else
4a00c761 3608 modifier = WIDEN;
ebfd146a 3609
ff802fa1
IR
3610 /* Multiple types in SLP are handled by creating the appropriate number of
3611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3612 case of SLP. */
437f4a00 3613 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3614 ncopies = 1;
4a00c761
JJ
3615 else if (modifier == NARROW)
3616 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3617 else
3618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3619
ebfd146a
IR
3620 /* Sanity check: make sure that at least one copy of the vectorized stmt
3621 needs to be generated. */
3622 gcc_assert (ncopies >= 1);
3623
ebfd146a 3624 /* Supportable by target? */
4a00c761 3625 switch (modifier)
ebfd146a 3626 {
4a00c761
JJ
3627 case NONE:
3628 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3629 return false;
3630 if (supportable_convert_operation (code, vectype_out, vectype_in,
3631 &decl1, &code1))
3632 break;
3633 /* FALLTHRU */
3634 unsupported:
73fbfcad 3635 if (dump_enabled_p ())
78c60e3d 3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3637 "conversion not supported by target.\n");
ebfd146a 3638 return false;
ebfd146a 3639
4a00c761
JJ
3640 case WIDEN:
3641 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3642 &code1, &code2, &multi_step_cvt,
3643 &interm_types))
4a00c761
JJ
3644 {
3645 /* Binary widening operation can only be supported directly by the
3646 architecture. */
3647 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3648 break;
3649 }
3650
3651 if (code != FLOAT_EXPR
3652 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3653 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3654 goto unsupported;
3655
3656 rhs_mode = TYPE_MODE (rhs_type);
3657 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3658 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3659 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3660 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3661 {
3662 cvt_type
3663 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3664 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3665 if (cvt_type == NULL_TREE)
3666 goto unsupported;
3667
3668 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3669 {
3670 if (!supportable_convert_operation (code, vectype_out,
3671 cvt_type, &decl1, &codecvt1))
3672 goto unsupported;
3673 }
3674 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3675 cvt_type, &codecvt1,
3676 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3677 &interm_types))
3678 continue;
3679 else
3680 gcc_assert (multi_step_cvt == 0);
3681
3682 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3683 vectype_in, &code1, &code2,
3684 &multi_step_cvt, &interm_types))
4a00c761
JJ
3685 break;
3686 }
3687
3688 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3689 goto unsupported;
3690
3691 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3692 codecvt2 = ERROR_MARK;
3693 else
3694 {
3695 multi_step_cvt++;
9771b263 3696 interm_types.safe_push (cvt_type);
4a00c761
JJ
3697 cvt_type = NULL_TREE;
3698 }
3699 break;
3700
3701 case NARROW:
3702 gcc_assert (op_type == unary_op);
3703 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3704 &code1, &multi_step_cvt,
3705 &interm_types))
3706 break;
3707
3708 if (code != FIX_TRUNC_EXPR
3709 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3710 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3711 goto unsupported;
3712
3713 rhs_mode = TYPE_MODE (rhs_type);
3714 cvt_type
3715 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3716 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3717 if (cvt_type == NULL_TREE)
3718 goto unsupported;
3719 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3720 &decl1, &codecvt1))
3721 goto unsupported;
3722 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3723 &code1, &multi_step_cvt,
3724 &interm_types))
3725 break;
3726 goto unsupported;
3727
3728 default:
3729 gcc_unreachable ();
ebfd146a
IR
3730 }
3731
3732 if (!vec_stmt) /* transformation not required. */
3733 {
73fbfcad 3734 if (dump_enabled_p ())
78c60e3d 3735 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3736 "=== vectorizable_conversion ===\n");
4a00c761 3737 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3738 {
3739 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3740 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3741 }
4a00c761
JJ
3742 else if (modifier == NARROW)
3743 {
3744 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3746 }
3747 else
3748 {
3749 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3750 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3751 }
9771b263 3752 interm_types.release ();
ebfd146a
IR
3753 return true;
3754 }
3755
3756 /** Transform. **/
73fbfcad 3757 if (dump_enabled_p ())
78c60e3d 3758 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3759 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3760
4a00c761
JJ
3761 if (op_type == binary_op)
3762 {
3763 if (CONSTANT_CLASS_P (op0))
3764 op0 = fold_convert (TREE_TYPE (op1), op0);
3765 else if (CONSTANT_CLASS_P (op1))
3766 op1 = fold_convert (TREE_TYPE (op0), op1);
3767 }
3768
3769 /* In case of multi-step conversion, we first generate conversion operations
3770 to the intermediate types, and then from that types to the final one.
3771 We create vector destinations for the intermediate type (TYPES) received
3772 from supportable_*_operation, and store them in the correct order
3773 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3774 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3775 vec_dest = vect_create_destination_var (scalar_dest,
3776 (cvt_type && modifier == WIDEN)
3777 ? cvt_type : vectype_out);
9771b263 3778 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3779
3780 if (multi_step_cvt)
3781 {
9771b263
DN
3782 for (i = interm_types.length () - 1;
3783 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3784 {
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 intermediate_type);
9771b263 3787 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3788 }
3789 }
ebfd146a 3790
4a00c761 3791 if (cvt_type)
82294ec1
JJ
3792 vec_dest = vect_create_destination_var (scalar_dest,
3793 modifier == WIDEN
3794 ? vectype_out : cvt_type);
4a00c761
JJ
3795
3796 if (!slp_node)
3797 {
30862efc 3798 if (modifier == WIDEN)
4a00c761 3799 {
c3284718 3800 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3801 if (op_type == binary_op)
9771b263 3802 vec_oprnds1.create (1);
4a00c761 3803 }
30862efc 3804 else if (modifier == NARROW)
9771b263
DN
3805 vec_oprnds0.create (
3806 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3807 }
3808 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3809 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3810
4a00c761 3811 last_oprnd = op0;
ebfd146a
IR
3812 prev_stmt_info = NULL;
3813 switch (modifier)
3814 {
3815 case NONE:
3816 for (j = 0; j < ncopies; j++)
3817 {
ebfd146a 3818 if (j == 0)
d092494c
IR
3819 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3820 -1);
ebfd146a
IR
3821 else
3822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3823
9771b263 3824 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3825 {
3826 /* Arguments are ready, create the new vector stmt. */
3827 if (code1 == CALL_EXPR)
3828 {
3829 new_stmt = gimple_build_call (decl1, 1, vop0);
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_call_set_lhs (new_stmt, new_temp);
3832 }
3833 else
3834 {
3835 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3836 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3837 new_temp = make_ssa_name (vec_dest, new_stmt);
3838 gimple_assign_set_lhs (new_stmt, new_temp);
3839 }
3840
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3842 if (slp_node)
9771b263 3843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3844 }
3845
ebfd146a
IR
3846 if (j == 0)
3847 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3848 else
3849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3850 prev_stmt_info = vinfo_for_stmt (new_stmt);
3851 }
3852 break;
3853
3854 case WIDEN:
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j = 0; j < ncopies; j++)
3860 {
4a00c761 3861 /* Handle uses. */
ebfd146a 3862 if (j == 0)
4a00c761
JJ
3863 {
3864 if (slp_node)
3865 {
3866 if (code == WIDEN_LSHIFT_EXPR)
3867 {
3868 unsigned int k;
ebfd146a 3869
4a00c761
JJ
3870 vec_oprnd1 = op1;
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3875 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3876
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
3879 }
3880 else
3881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3882 &vec_oprnds1, slp_node, -1);
3883 }
3884 else
3885 {
3886 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3887 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3888 if (op_type == binary_op)
3889 {
3890 if (code == WIDEN_LSHIFT_EXPR)
3891 vec_oprnd1 = op1;
3892 else
3893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3894 NULL);
9771b263 3895 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3896 }
3897 }
3898 }
ebfd146a 3899 else
4a00c761
JJ
3900 {
3901 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3902 vec_oprnds0.truncate (0);
3903 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3904 if (op_type == binary_op)
3905 {
3906 if (code == WIDEN_LSHIFT_EXPR)
3907 vec_oprnd1 = op1;
3908 else
3909 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3910 vec_oprnd1);
9771b263
DN
3911 vec_oprnds1.truncate (0);
3912 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3913 }
3914 }
ebfd146a 3915
4a00c761
JJ
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i = multi_step_cvt; i >= 0; i--)
3918 {
9771b263 3919 tree this_dest = vec_dsts[i];
4a00c761
JJ
3920 enum tree_code c1 = code1, c2 = code2;
3921 if (i == 0 && codecvt2 != ERROR_MARK)
3922 {
3923 c1 = codecvt1;
3924 c2 = codecvt2;
3925 }
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3927 &vec_oprnds1,
3928 stmt, this_dest, gsi,
3929 c1, c2, decl1, decl2,
3930 op_type);
3931 }
3932
9771b263 3933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3934 {
3935 if (cvt_type)
3936 {
3937 if (codecvt1 == CALL_EXPR)
3938 {
3939 new_stmt = gimple_build_call (decl1, 1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_call_set_lhs (new_stmt, new_temp);
3942 }
3943 else
3944 {
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 3946 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
3947 new_stmt = gimple_build_assign (new_temp, codecvt1,
3948 vop0);
4a00c761
JJ
3949 }
3950
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3952 }
3953 else
3954 new_stmt = SSA_NAME_DEF_STMT (vop0);
3955
3956 if (slp_node)
9771b263 3957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3958 else
c689ce1e
RB
3959 {
3960 if (!prev_stmt_info)
3961 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3962 else
3963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3964 prev_stmt_info = vinfo_for_stmt (new_stmt);
3965 }
4a00c761 3966 }
ebfd146a 3967 }
4a00c761
JJ
3968
3969 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3970 break;
3971
3972 case NARROW:
3973 /* In case the vectorization factor (VF) is bigger than the number
3974 of elements that we can fit in a vectype (nunits), we have to
3975 generate more than one vector stmt - i.e - we need to "unroll"
3976 the vector stmt by a factor VF/nunits. */
3977 for (j = 0; j < ncopies; j++)
3978 {
3979 /* Handle uses. */
4a00c761
JJ
3980 if (slp_node)
3981 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3982 slp_node, -1);
ebfd146a
IR
3983 else
3984 {
9771b263 3985 vec_oprnds0.truncate (0);
4a00c761
JJ
3986 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3987 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3988 }
3989
4a00c761
JJ
3990 /* Arguments are ready. Create the new vector stmts. */
3991 if (cvt_type)
9771b263 3992 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3993 {
3994 if (codecvt1 == CALL_EXPR)
3995 {
3996 new_stmt = gimple_build_call (decl1, 1, vop0);
3997 new_temp = make_ssa_name (vec_dest, new_stmt);
3998 gimple_call_set_lhs (new_stmt, new_temp);
3999 }
4000 else
4001 {
4002 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4003 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4004 new_stmt = gimple_build_assign (new_temp, codecvt1,
4005 vop0);
4a00c761 4006 }
ebfd146a 4007
4a00c761 4008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4009 vec_oprnds0[i] = new_temp;
4a00c761 4010 }
ebfd146a 4011
4a00c761
JJ
4012 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4013 stmt, vec_dsts, gsi,
4014 slp_node, code1,
4015 &prev_stmt_info);
ebfd146a
IR
4016 }
4017
4018 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4019 break;
ebfd146a
IR
4020 }
4021
9771b263
DN
4022 vec_oprnds0.release ();
4023 vec_oprnds1.release ();
4024 vec_dsts.release ();
4025 interm_types.release ();
ebfd146a
IR
4026
4027 return true;
4028}
ff802fa1
IR
4029
4030
ebfd146a
IR
4031/* Function vectorizable_assignment.
4032
b8698a0f
L
4033 Check if STMT performs an assignment (copy) that can be vectorized.
4034 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4035 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4036 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4037
4038static bool
4039vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4040 gimple *vec_stmt, slp_tree slp_node)
4041{
4042 tree vec_dest;
4043 tree scalar_dest;
4044 tree op;
4045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4047 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4048 tree new_temp;
4049 tree def;
4050 gimple def_stmt;
4051 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 4052 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 4053 int ncopies;
f18b55bd 4054 int i, j;
6e1aa848 4055 vec<tree> vec_oprnds = vNULL;
ebfd146a 4056 tree vop;
a70d6342 4057 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
4058 gimple new_stmt = NULL;
4059 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4060 enum tree_code code;
4061 tree vectype_in;
ebfd146a
IR
4062
4063 /* Multiple types in SLP are handled by creating the appropriate number of
4064 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4065 case of SLP. */
437f4a00 4066 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4067 ncopies = 1;
4068 else
4069 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4070
4071 gcc_assert (ncopies >= 1);
ebfd146a 4072
a70d6342 4073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4074 return false;
4075
8644a673 4076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4077 return false;
4078
4079 /* Is vectorizable assignment? */
4080 if (!is_gimple_assign (stmt))
4081 return false;
4082
4083 scalar_dest = gimple_assign_lhs (stmt);
4084 if (TREE_CODE (scalar_dest) != SSA_NAME)
4085 return false;
4086
fde9c428 4087 code = gimple_assign_rhs_code (stmt);
ebfd146a 4088 if (gimple_assign_single_p (stmt)
fde9c428
RG
4089 || code == PAREN_EXPR
4090 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4091 op = gimple_assign_rhs1 (stmt);
4092 else
4093 return false;
4094
7b7ec6c5
RG
4095 if (code == VIEW_CONVERT_EXPR)
4096 op = TREE_OPERAND (op, 0);
4097
24ee1384 4098 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 4099 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 4100 {
73fbfcad 4101 if (dump_enabled_p ())
78c60e3d 4102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4103 "use not simple.\n");
ebfd146a
IR
4104 return false;
4105 }
4106
fde9c428
RG
4107 /* We can handle NOP_EXPR conversions that do not change the number
4108 of elements or the vector size. */
7b7ec6c5
RG
4109 if ((CONVERT_EXPR_CODE_P (code)
4110 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4111 && (!vectype_in
4112 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4113 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4114 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4115 return false;
4116
7b7b1813
RG
4117 /* We do not handle bit-precision changes. */
4118 if ((CONVERT_EXPR_CODE_P (code)
4119 || code == VIEW_CONVERT_EXPR)
4120 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4121 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4123 || ((TYPE_PRECISION (TREE_TYPE (op))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4125 /* But a conversion that does not change the bit-pattern is ok. */
4126 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4127 > TYPE_PRECISION (TREE_TYPE (op)))
4128 && TYPE_UNSIGNED (TREE_TYPE (op))))
4129 {
73fbfcad 4130 if (dump_enabled_p ())
78c60e3d
SS
4131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132 "type conversion to/from bit-precision "
e645e942 4133 "unsupported.\n");
7b7b1813
RG
4134 return false;
4135 }
4136
ebfd146a
IR
4137 if (!vec_stmt) /* transformation not required. */
4138 {
4139 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4140 if (dump_enabled_p ())
78c60e3d 4141 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4142 "=== vectorizable_assignment ===\n");
c3e7ee41 4143 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4144 return true;
4145 }
4146
4147 /** Transform. **/
73fbfcad 4148 if (dump_enabled_p ())
e645e942 4149 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4150
4151 /* Handle def. */
4152 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4153
4154 /* Handle use. */
f18b55bd 4155 for (j = 0; j < ncopies; j++)
ebfd146a 4156 {
f18b55bd
IR
4157 /* Handle uses. */
4158 if (j == 0)
d092494c 4159 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4160 else
4161 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4162
4163 /* Arguments are ready. create the new vector stmt. */
9771b263 4164 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4165 {
7b7ec6c5
RG
4166 if (CONVERT_EXPR_CODE_P (code)
4167 || code == VIEW_CONVERT_EXPR)
4a73490d 4168 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4169 new_stmt = gimple_build_assign (vec_dest, vop);
4170 new_temp = make_ssa_name (vec_dest, new_stmt);
4171 gimple_assign_set_lhs (new_stmt, new_temp);
4172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4173 if (slp_node)
9771b263 4174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4175 }
ebfd146a
IR
4176
4177 if (slp_node)
f18b55bd
IR
4178 continue;
4179
4180 if (j == 0)
4181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4182 else
4183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4184
4185 prev_stmt_info = vinfo_for_stmt (new_stmt);
4186 }
b8698a0f 4187
9771b263 4188 vec_oprnds.release ();
ebfd146a
IR
4189 return true;
4190}
4191
9dc3f7de 4192
1107f3ae
IR
4193/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4194 either as shift by a scalar or by a vector. */
4195
4196bool
4197vect_supportable_shift (enum tree_code code, tree scalar_type)
4198{
4199
ef4bddc2 4200 machine_mode vec_mode;
1107f3ae
IR
4201 optab optab;
4202 int icode;
4203 tree vectype;
4204
4205 vectype = get_vectype_for_scalar_type (scalar_type);
4206 if (!vectype)
4207 return false;
4208
4209 optab = optab_for_tree_code (code, vectype, optab_scalar);
4210 if (!optab
4211 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4212 {
4213 optab = optab_for_tree_code (code, vectype, optab_vector);
4214 if (!optab
4215 || (optab_handler (optab, TYPE_MODE (vectype))
4216 == CODE_FOR_nothing))
4217 return false;
4218 }
4219
4220 vec_mode = TYPE_MODE (vectype);
4221 icode = (int) optab_handler (optab, vec_mode);
4222 if (icode == CODE_FOR_nothing)
4223 return false;
4224
4225 return true;
4226}
4227
4228
9dc3f7de
IR
4229/* Function vectorizable_shift.
4230
4231 Check if STMT performs a shift operation that can be vectorized.
4232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4233 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4235
4236static bool
4237vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4238 gimple *vec_stmt, slp_tree slp_node)
4239{
4240 tree vec_dest;
4241 tree scalar_dest;
4242 tree op0, op1 = NULL;
4243 tree vec_oprnd1 = NULL_TREE;
4244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4245 tree vectype;
4246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4247 enum tree_code code;
ef4bddc2 4248 machine_mode vec_mode;
9dc3f7de
IR
4249 tree new_temp;
4250 optab optab;
4251 int icode;
ef4bddc2 4252 machine_mode optab_op2_mode;
9dc3f7de
IR
4253 tree def;
4254 gimple def_stmt;
4255 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4256 gimple new_stmt = NULL;
4257 stmt_vec_info prev_stmt_info;
4258 int nunits_in;
4259 int nunits_out;
4260 tree vectype_out;
cede2577 4261 tree op1_vectype;
9dc3f7de
IR
4262 int ncopies;
4263 int j, i;
6e1aa848
DN
4264 vec<tree> vec_oprnds0 = vNULL;
4265 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4266 tree vop0, vop1;
4267 unsigned int k;
49eab32e 4268 bool scalar_shift_arg = true;
9dc3f7de
IR
4269 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4270 int vf;
4271
4272 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4273 return false;
4274
4275 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4276 return false;
4277
4278 /* Is STMT a vectorizable binary/unary operation? */
4279 if (!is_gimple_assign (stmt))
4280 return false;
4281
4282 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4283 return false;
4284
4285 code = gimple_assign_rhs_code (stmt);
4286
4287 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4288 || code == RROTATE_EXPR))
4289 return false;
4290
4291 scalar_dest = gimple_assign_lhs (stmt);
4292 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4293 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4294 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4295 {
73fbfcad 4296 if (dump_enabled_p ())
78c60e3d 4297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4298 "bit-precision shifts not supported.\n");
7b7b1813
RG
4299 return false;
4300 }
9dc3f7de
IR
4301
4302 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4303 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4304 &def_stmt, &def, &dt[0], &vectype))
4305 {
73fbfcad 4306 if (dump_enabled_p ())
78c60e3d 4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4308 "use not simple.\n");
9dc3f7de
IR
4309 return false;
4310 }
4311 /* If op0 is an external or constant def use a vector type with
4312 the same size as the output vector type. */
4313 if (!vectype)
4314 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4315 if (vec_stmt)
4316 gcc_assert (vectype);
4317 if (!vectype)
4318 {
73fbfcad 4319 if (dump_enabled_p ())
78c60e3d 4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4321 "no vectype for scalar type\n");
9dc3f7de
IR
4322 return false;
4323 }
4324
4325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4327 if (nunits_out != nunits_in)
4328 return false;
4329
4330 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4331 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4332 &def, &dt[1], &op1_vectype))
9dc3f7de 4333 {
73fbfcad 4334 if (dump_enabled_p ())
78c60e3d 4335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4336 "use not simple.\n");
9dc3f7de
IR
4337 return false;
4338 }
4339
4340 if (loop_vinfo)
4341 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4342 else
4343 vf = 1;
4344
4345 /* Multiple types in SLP are handled by creating the appropriate number of
4346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4347 case of SLP. */
437f4a00 4348 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4349 ncopies = 1;
4350 else
4351 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4352
4353 gcc_assert (ncopies >= 1);
4354
4355 /* Determine whether the shift amount is a vector, or scalar. If the
4356 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4357
49eab32e
JJ
4358 if (dt[1] == vect_internal_def && !slp_node)
4359 scalar_shift_arg = false;
4360 else if (dt[1] == vect_constant_def
4361 || dt[1] == vect_external_def
4362 || dt[1] == vect_internal_def)
4363 {
4364 /* In SLP, need to check whether the shift count is the same,
4365 in loops if it is a constant or invariant, it is always
4366 a scalar shift. */
4367 if (slp_node)
4368 {
9771b263 4369 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4370 gimple slpstmt;
4371
9771b263 4372 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4373 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4374 scalar_shift_arg = false;
4375 }
4376 }
4377 else
4378 {
73fbfcad 4379 if (dump_enabled_p ())
78c60e3d 4380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4381 "operand mode requires invariant argument.\n");
49eab32e
JJ
4382 return false;
4383 }
4384
9dc3f7de 4385 /* Vector shifted by vector. */
49eab32e 4386 if (!scalar_shift_arg)
9dc3f7de
IR
4387 {
4388 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4389 if (dump_enabled_p ())
78c60e3d 4390 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4391 "vector/vector shift/rotate found.\n");
78c60e3d 4392
aa948027
JJ
4393 if (!op1_vectype)
4394 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4395 if (op1_vectype == NULL_TREE
4396 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4397 {
73fbfcad 4398 if (dump_enabled_p ())
78c60e3d
SS
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4400 "unusable type for last operand in"
e645e942 4401 " vector/vector shift/rotate.\n");
cede2577
JJ
4402 return false;
4403 }
9dc3f7de
IR
4404 }
4405 /* See if the machine has a vector shifted by scalar insn and if not
4406 then see if it has a vector shifted by vector insn. */
49eab32e 4407 else
9dc3f7de
IR
4408 {
4409 optab = optab_for_tree_code (code, vectype, optab_scalar);
4410 if (optab
4411 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4412 {
73fbfcad 4413 if (dump_enabled_p ())
78c60e3d 4414 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4415 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4416 }
4417 else
4418 {
4419 optab = optab_for_tree_code (code, vectype, optab_vector);
4420 if (optab
4421 && (optab_handler (optab, TYPE_MODE (vectype))
4422 != CODE_FOR_nothing))
4423 {
49eab32e
JJ
4424 scalar_shift_arg = false;
4425
73fbfcad 4426 if (dump_enabled_p ())
78c60e3d 4427 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4428 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4429
4430 /* Unlike the other binary operators, shifts/rotates have
4431 the rhs being int, instead of the same type as the lhs,
4432 so make sure the scalar is the right type if we are
aa948027 4433 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4434 if (dt[1] == vect_constant_def)
4435 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4436 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4437 TREE_TYPE (op1)))
4438 {
4439 if (slp_node
4440 && TYPE_MODE (TREE_TYPE (vectype))
4441 != TYPE_MODE (TREE_TYPE (op1)))
4442 {
73fbfcad 4443 if (dump_enabled_p ())
78c60e3d
SS
4444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4445 "unusable type for last operand in"
e645e942 4446 " vector/vector shift/rotate.\n");
21c0a521 4447 return false;
aa948027
JJ
4448 }
4449 if (vec_stmt && !slp_node)
4450 {
4451 op1 = fold_convert (TREE_TYPE (vectype), op1);
4452 op1 = vect_init_vector (stmt, op1,
4453 TREE_TYPE (vectype), NULL);
4454 }
4455 }
9dc3f7de
IR
4456 }
4457 }
4458 }
9dc3f7de
IR
4459
4460 /* Supportable by target? */
4461 if (!optab)
4462 {
73fbfcad 4463 if (dump_enabled_p ())
78c60e3d 4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4465 "no optab.\n");
9dc3f7de
IR
4466 return false;
4467 }
4468 vec_mode = TYPE_MODE (vectype);
4469 icode = (int) optab_handler (optab, vec_mode);
4470 if (icode == CODE_FOR_nothing)
4471 {
73fbfcad 4472 if (dump_enabled_p ())
78c60e3d 4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4474 "op not supported by target.\n");
9dc3f7de
IR
4475 /* Check only during analysis. */
4476 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4477 || (vf < vect_min_worthwhile_factor (code)
4478 && !vec_stmt))
4479 return false;
73fbfcad 4480 if (dump_enabled_p ())
e645e942
TJ
4481 dump_printf_loc (MSG_NOTE, vect_location,
4482 "proceeding using word mode.\n");
9dc3f7de
IR
4483 }
4484
4485 /* Worthwhile without SIMD support? Check only during analysis. */
4486 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4487 && vf < vect_min_worthwhile_factor (code)
4488 && !vec_stmt)
4489 {
73fbfcad 4490 if (dump_enabled_p ())
78c60e3d 4491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4492 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4493 return false;
4494 }
4495
4496 if (!vec_stmt) /* transformation not required. */
4497 {
4498 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4499 if (dump_enabled_p ())
e645e942
TJ
4500 dump_printf_loc (MSG_NOTE, vect_location,
4501 "=== vectorizable_shift ===\n");
c3e7ee41 4502 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4503 return true;
4504 }
4505
4506 /** Transform. **/
4507
73fbfcad 4508 if (dump_enabled_p ())
78c60e3d 4509 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4510 "transform binary/unary operation.\n");
9dc3f7de
IR
4511
4512 /* Handle def. */
4513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4514
9dc3f7de
IR
4515 prev_stmt_info = NULL;
4516 for (j = 0; j < ncopies; j++)
4517 {
4518 /* Handle uses. */
4519 if (j == 0)
4520 {
4521 if (scalar_shift_arg)
4522 {
4523 /* Vector shl and shr insn patterns can be defined with scalar
4524 operand 2 (shift operand). In this case, use constant or loop
4525 invariant op1 directly, without extending it to vector mode
4526 first. */
4527 optab_op2_mode = insn_data[icode].operand[2].mode;
4528 if (!VECTOR_MODE_P (optab_op2_mode))
4529 {
73fbfcad 4530 if (dump_enabled_p ())
78c60e3d 4531 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4532 "operand 1 using scalar mode.\n");
9dc3f7de 4533 vec_oprnd1 = op1;
8930f723 4534 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4535 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4536 if (slp_node)
4537 {
4538 /* Store vec_oprnd1 for every vector stmt to be created
4539 for SLP_NODE. We check during the analysis that all
4540 the shift arguments are the same.
4541 TODO: Allow different constants for different vector
4542 stmts generated for an SLP instance. */
4543 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4544 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4545 }
4546 }
4547 }
4548
4549 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4550 (a special case for certain kind of vector shifts); otherwise,
4551 operand 1 should be of a vector type (the usual case). */
4552 if (vec_oprnd1)
4553 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4554 slp_node, -1);
9dc3f7de
IR
4555 else
4556 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4557 slp_node, -1);
9dc3f7de
IR
4558 }
4559 else
4560 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4561
4562 /* Arguments are ready. Create the new vector stmt. */
9771b263 4563 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4564 {
9771b263 4565 vop1 = vec_oprnds1[i];
0d0e4a03 4566 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4567 new_temp = make_ssa_name (vec_dest, new_stmt);
4568 gimple_assign_set_lhs (new_stmt, new_temp);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4570 if (slp_node)
9771b263 4571 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4572 }
4573
4574 if (slp_node)
4575 continue;
4576
4577 if (j == 0)
4578 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4579 else
4580 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4581 prev_stmt_info = vinfo_for_stmt (new_stmt);
4582 }
4583
9771b263
DN
4584 vec_oprnds0.release ();
4585 vec_oprnds1.release ();
9dc3f7de
IR
4586
4587 return true;
4588}
4589
4590
ebfd146a
IR
4591/* Function vectorizable_operation.
4592
16949072
RG
4593 Check if STMT performs a binary, unary or ternary operation that can
4594 be vectorized.
b8698a0f 4595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4598
4599static bool
4600vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4601 gimple *vec_stmt, slp_tree slp_node)
4602{
00f07b86 4603 tree vec_dest;
ebfd146a 4604 tree scalar_dest;
16949072 4605 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4607 tree vectype;
ebfd146a
IR
4608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4609 enum tree_code code;
ef4bddc2 4610 machine_mode vec_mode;
ebfd146a
IR
4611 tree new_temp;
4612 int op_type;
00f07b86 4613 optab optab;
ebfd146a 4614 int icode;
ebfd146a
IR
4615 tree def;
4616 gimple def_stmt;
16949072
RG
4617 enum vect_def_type dt[3]
4618 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4619 gimple new_stmt = NULL;
4620 stmt_vec_info prev_stmt_info;
b690cc0f 4621 int nunits_in;
ebfd146a
IR
4622 int nunits_out;
4623 tree vectype_out;
4624 int ncopies;
4625 int j, i;
6e1aa848
DN
4626 vec<tree> vec_oprnds0 = vNULL;
4627 vec<tree> vec_oprnds1 = vNULL;
4628 vec<tree> vec_oprnds2 = vNULL;
16949072 4629 tree vop0, vop1, vop2;
a70d6342
IR
4630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4631 int vf;
4632
a70d6342 4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4634 return false;
4635
8644a673 4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4637 return false;
4638
4639 /* Is STMT a vectorizable binary/unary operation? */
4640 if (!is_gimple_assign (stmt))
4641 return false;
4642
4643 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4644 return false;
4645
ebfd146a
IR
4646 code = gimple_assign_rhs_code (stmt);
4647
4648 /* For pointer addition, we should use the normal plus for
4649 the vector addition. */
4650 if (code == POINTER_PLUS_EXPR)
4651 code = PLUS_EXPR;
4652
4653 /* Support only unary or binary operations. */
4654 op_type = TREE_CODE_LENGTH (code);
16949072 4655 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4656 {
73fbfcad 4657 if (dump_enabled_p ())
78c60e3d 4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4659 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4660 op_type);
ebfd146a
IR
4661 return false;
4662 }
4663
b690cc0f
RG
4664 scalar_dest = gimple_assign_lhs (stmt);
4665 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4666
7b7b1813
RG
4667 /* Most operations cannot handle bit-precision types without extra
4668 truncations. */
4669 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4670 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4671 /* Exception are bitwise binary operations. */
4672 && code != BIT_IOR_EXPR
4673 && code != BIT_XOR_EXPR
4674 && code != BIT_AND_EXPR)
4675 {
73fbfcad 4676 if (dump_enabled_p ())
78c60e3d 4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4678 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4679 return false;
4680 }
4681
ebfd146a 4682 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4683 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4684 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4685 {
73fbfcad 4686 if (dump_enabled_p ())
78c60e3d 4687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4688 "use not simple.\n");
ebfd146a
IR
4689 return false;
4690 }
b690cc0f
RG
4691 /* If op0 is an external or constant def use a vector type with
4692 the same size as the output vector type. */
4693 if (!vectype)
4694 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4695 if (vec_stmt)
4696 gcc_assert (vectype);
4697 if (!vectype)
4698 {
73fbfcad 4699 if (dump_enabled_p ())
7d8930a0 4700 {
78c60e3d
SS
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "no vectype for scalar type ");
4703 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4704 TREE_TYPE (op0));
e645e942 4705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4706 }
4707
4708 return false;
4709 }
b690cc0f
RG
4710
4711 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4712 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4713 if (nunits_out != nunits_in)
4714 return false;
ebfd146a 4715
16949072 4716 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4717 {
4718 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4719 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4720 &def, &dt[1]))
ebfd146a 4721 {
73fbfcad 4722 if (dump_enabled_p ())
78c60e3d 4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4724 "use not simple.\n");
ebfd146a
IR
4725 return false;
4726 }
4727 }
16949072
RG
4728 if (op_type == ternary_op)
4729 {
4730 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4731 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4732 &def, &dt[2]))
16949072 4733 {
73fbfcad 4734 if (dump_enabled_p ())
78c60e3d 4735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4736 "use not simple.\n");
16949072
RG
4737 return false;
4738 }
4739 }
ebfd146a 4740
b690cc0f
RG
4741 if (loop_vinfo)
4742 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4743 else
4744 vf = 1;
4745
4746 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4747 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4748 case of SLP. */
437f4a00 4749 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4750 ncopies = 1;
4751 else
4752 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4753
4754 gcc_assert (ncopies >= 1);
4755
9dc3f7de 4756 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4757 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4758 || code == RROTATE_EXPR)
9dc3f7de 4759 return false;
ebfd146a 4760
ebfd146a 4761 /* Supportable by target? */
00f07b86
RH
4762
4763 vec_mode = TYPE_MODE (vectype);
4764 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4765 {
00f07b86 4766 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4767 icode = LAST_INSN_CODE;
00f07b86
RH
4768 else
4769 icode = CODE_FOR_nothing;
ebfd146a 4770 }
00f07b86
RH
4771 else
4772 {
4773 optab = optab_for_tree_code (code, vectype, optab_default);
4774 if (!optab)
5deb57cb 4775 {
73fbfcad 4776 if (dump_enabled_p ())
78c60e3d 4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4778 "no optab.\n");
00f07b86 4779 return false;
5deb57cb 4780 }
00f07b86 4781 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4782 }
4783
ebfd146a
IR
4784 if (icode == CODE_FOR_nothing)
4785 {
73fbfcad 4786 if (dump_enabled_p ())
78c60e3d 4787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4788 "op not supported by target.\n");
ebfd146a
IR
4789 /* Check only during analysis. */
4790 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4791 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4792 return false;
73fbfcad 4793 if (dump_enabled_p ())
e645e942
TJ
4794 dump_printf_loc (MSG_NOTE, vect_location,
4795 "proceeding using word mode.\n");
383d9c83
IR
4796 }
4797
4a00c761 4798 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4799 if (!VECTOR_MODE_P (vec_mode)
4800 && !vec_stmt
4801 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4802 {
73fbfcad 4803 if (dump_enabled_p ())
78c60e3d 4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4805 "not worthwhile without SIMD support.\n");
e34842c6 4806 return false;
7d8930a0 4807 }
ebfd146a 4808
ebfd146a
IR
4809 if (!vec_stmt) /* transformation not required. */
4810 {
4a00c761 4811 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4812 if (dump_enabled_p ())
78c60e3d 4813 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4814 "=== vectorizable_operation ===\n");
c3e7ee41 4815 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4816 return true;
4817 }
4818
4819 /** Transform. **/
4820
73fbfcad 4821 if (dump_enabled_p ())
78c60e3d 4822 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4823 "transform binary/unary operation.\n");
383d9c83 4824
ebfd146a 4825 /* Handle def. */
00f07b86 4826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4827
ebfd146a
IR
4828 /* In case the vectorization factor (VF) is bigger than the number
4829 of elements that we can fit in a vectype (nunits), we have to generate
4830 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4831 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4832 from one copy of the vector stmt to the next, in the field
4833 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4834 stages to find the correct vector defs to be used when vectorizing
4835 stmts that use the defs of the current stmt. The example below
4836 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4837 we need to create 4 vectorized stmts):
4838
4839 before vectorization:
4840 RELATED_STMT VEC_STMT
4841 S1: x = memref - -
4842 S2: z = x + 1 - -
4843
4844 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4845 there):
4846 RELATED_STMT VEC_STMT
4847 VS1_0: vx0 = memref0 VS1_1 -
4848 VS1_1: vx1 = memref1 VS1_2 -
4849 VS1_2: vx2 = memref2 VS1_3 -
4850 VS1_3: vx3 = memref3 - -
4851 S1: x = load - VS1_0
4852 S2: z = x + 1 - -
4853
4854 step2: vectorize stmt S2 (done here):
4855 To vectorize stmt S2 we first need to find the relevant vector
4856 def for the first operand 'x'. This is, as usual, obtained from
4857 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4858 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4859 relevant vector def 'vx0'. Having found 'vx0' we can generate
4860 the vector stmt VS2_0, and as usual, record it in the
4861 STMT_VINFO_VEC_STMT of stmt S2.
4862 When creating the second copy (VS2_1), we obtain the relevant vector
4863 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4864 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4865 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4866 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4867 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4868 chain of stmts and pointers:
4869 RELATED_STMT VEC_STMT
4870 VS1_0: vx0 = memref0 VS1_1 -
4871 VS1_1: vx1 = memref1 VS1_2 -
4872 VS1_2: vx2 = memref2 VS1_3 -
4873 VS1_3: vx3 = memref3 - -
4874 S1: x = load - VS1_0
4875 VS2_0: vz0 = vx0 + v1 VS2_1 -
4876 VS2_1: vz1 = vx1 + v1 VS2_2 -
4877 VS2_2: vz2 = vx2 + v1 VS2_3 -
4878 VS2_3: vz3 = vx3 + v1 - -
4879 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4880
4881 prev_stmt_info = NULL;
4882 for (j = 0; j < ncopies; j++)
4883 {
4884 /* Handle uses. */
4885 if (j == 0)
4a00c761
JJ
4886 {
4887 if (op_type == binary_op || op_type == ternary_op)
4888 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4889 slp_node, -1);
4890 else
4891 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4892 slp_node, -1);
4893 if (op_type == ternary_op)
36ba4aae 4894 {
9771b263
DN
4895 vec_oprnds2.create (1);
4896 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4897 stmt,
4898 NULL));
36ba4aae 4899 }
4a00c761 4900 }
ebfd146a 4901 else
4a00c761
JJ
4902 {
4903 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4904 if (op_type == ternary_op)
4905 {
9771b263
DN
4906 tree vec_oprnd = vec_oprnds2.pop ();
4907 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4908 vec_oprnd));
4a00c761
JJ
4909 }
4910 }
4911
4912 /* Arguments are ready. Create the new vector stmt. */
9771b263 4913 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4914 {
4a00c761 4915 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4916 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4917 vop2 = ((op_type == ternary_op)
9771b263 4918 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 4919 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
4920 new_temp = make_ssa_name (vec_dest, new_stmt);
4921 gimple_assign_set_lhs (new_stmt, new_temp);
4922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4923 if (slp_node)
9771b263 4924 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4925 }
4926
4a00c761
JJ
4927 if (slp_node)
4928 continue;
4929
4930 if (j == 0)
4931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4932 else
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4934 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4935 }
4936
9771b263
DN
4937 vec_oprnds0.release ();
4938 vec_oprnds1.release ();
4939 vec_oprnds2.release ();
ebfd146a 4940
ebfd146a
IR
4941 return true;
4942}
4943
c716e67f
XDL
4944/* A helper function to ensure data reference DR's base alignment
4945 for STMT_INFO. */
4946
4947static void
4948ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4949{
4950 if (!dr->aux)
4951 return;
4952
4953 if (((dataref_aux *)dr->aux)->base_misaligned)
4954 {
4955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4956 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4957
428f0c67
JH
4958 if (decl_in_symtab_p (base_decl))
4959 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4960 else
4961 {
4962 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4963 DECL_USER_ALIGN (base_decl) = 1;
4964 }
c716e67f
XDL
4965 ((dataref_aux *)dr->aux)->base_misaligned = false;
4966 }
4967}
4968
ebfd146a 4969
09dfa495
BM
4970/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4971 reversal of the vector elements. If that is impossible to do,
4972 returns NULL. */
4973
4974static tree
4975perm_mask_for_reverse (tree vectype)
4976{
4977 int i, nunits;
4978 unsigned char *sel;
4979
4980 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4981 sel = XALLOCAVEC (unsigned char, nunits);
4982
4983 for (i = 0; i < nunits; ++i)
4984 sel[i] = nunits - 1 - i;
4985
557be5a8
AL
4986 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4987 return NULL_TREE;
4988 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
4989}
4990
ebfd146a
IR
4991/* Function vectorizable_store.
4992
b8698a0f
L
4993 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4994 can be vectorized.
4995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4998
4999static bool
5000vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5001 slp_tree slp_node)
ebfd146a
IR
5002{
5003 tree scalar_dest;
5004 tree data_ref;
5005 tree op;
5006 tree vec_oprnd = NULL_TREE;
5007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5008 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5009 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5010 tree elem_type;
ebfd146a 5011 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5012 struct loop *loop = NULL;
ef4bddc2 5013 machine_mode vec_mode;
ebfd146a
IR
5014 tree dummy;
5015 enum dr_alignment_support alignment_support_scheme;
5016 tree def;
5017 gimple def_stmt;
5018 enum vect_def_type dt;
5019 stmt_vec_info prev_stmt_info = NULL;
5020 tree dataref_ptr = NULL_TREE;
74bf76ed 5021 tree dataref_offset = NULL_TREE;
fef4d2b3 5022 gimple ptr_incr = NULL;
f2e2a985 5023 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
5024 int ncopies;
5025 int j;
5026 gimple next_stmt, first_stmt = NULL;
0d0293ac 5027 bool grouped_store = false;
272c6793 5028 bool store_lanes_p = false;
ebfd146a 5029 unsigned int group_size, i;
6e1aa848
DN
5030 vec<tree> dr_chain = vNULL;
5031 vec<tree> oprnds = vNULL;
5032 vec<tree> result_chain = vNULL;
ebfd146a 5033 bool inv_p;
09dfa495
BM
5034 bool negative = false;
5035 tree offset = NULL_TREE;
6e1aa848 5036 vec<tree> vec_oprnds = vNULL;
ebfd146a 5037 bool slp = (slp_node != NULL);
ebfd146a 5038 unsigned int vec_num;
a70d6342 5039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 5040 tree aggr_type;
a70d6342
IR
5041
5042 if (loop_vinfo)
5043 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5044
5045 /* Multiple types in SLP are handled by creating the appropriate number of
5046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5047 case of SLP. */
437f4a00 5048 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5049 ncopies = 1;
5050 else
5051 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5052
5053 gcc_assert (ncopies >= 1);
5054
5055 /* FORNOW. This restriction should be relaxed. */
a70d6342 5056 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 5057 {
73fbfcad 5058 if (dump_enabled_p ())
78c60e3d 5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5060 "multiple types in nested loop.\n");
ebfd146a
IR
5061 return false;
5062 }
5063
a70d6342 5064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5065 return false;
5066
8644a673 5067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5068 return false;
5069
5070 /* Is vectorizable store? */
5071
5072 if (!is_gimple_assign (stmt))
5073 return false;
5074
5075 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5076 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5077 && is_pattern_stmt_p (stmt_info))
5078 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5079 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5080 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5081 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5082 && TREE_CODE (scalar_dest) != COMPONENT_REF
5083 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5084 && TREE_CODE (scalar_dest) != REALPART_EXPR
5085 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5086 return false;
5087
5088 gcc_assert (gimple_assign_single_p (stmt));
5089 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
5090 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5091 &def, &dt))
ebfd146a 5092 {
73fbfcad 5093 if (dump_enabled_p ())
78c60e3d 5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5095 "use not simple.\n");
ebfd146a
IR
5096 return false;
5097 }
5098
272c6793 5099 elem_type = TREE_TYPE (vectype);
ebfd146a 5100 vec_mode = TYPE_MODE (vectype);
7b7b1813 5101
ebfd146a
IR
5102 /* FORNOW. In some cases can vectorize even if data-type not supported
5103 (e.g. - array initialization with 0). */
947131ba 5104 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5105 return false;
5106
5107 if (!STMT_VINFO_DATA_REF (stmt_info))
5108 return false;
5109
f2e2a985 5110 if (!STMT_VINFO_STRIDED_P (stmt_info))
09dfa495 5111 {
f2e2a985
MM
5112 negative =
5113 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5114 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5115 size_zero_node) < 0;
5116 if (negative && ncopies > 1)
09dfa495
BM
5117 {
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f2e2a985 5120 "multiple types with negative step.\n");
09dfa495
BM
5121 return false;
5122 }
f2e2a985 5123 if (negative)
09dfa495 5124 {
f2e2a985
MM
5125 gcc_assert (!grouped_store);
5126 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127 if (alignment_support_scheme != dr_aligned
5128 && alignment_support_scheme != dr_unaligned_supported)
5129 {
5130 if (dump_enabled_p ())
5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5132 "negative step but alignment required.\n");
5133 return false;
5134 }
5135 if (dt != vect_constant_def
5136 && dt != vect_external_def
5137 && !perm_mask_for_reverse (vectype))
5138 {
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "negative step and reversing not supported.\n");
5142 return false;
5143 }
09dfa495
BM
5144 }
5145 }
5146
0d0293ac 5147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5148 {
0d0293ac 5149 grouped_store = true;
e14c1050 5150 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
cee62fee
MM
5151 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5152 if (!slp
5153 && !PURE_SLP_STMT (stmt_info)
5154 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5155 {
272c6793
RS
5156 if (vect_store_lanes_supported (vectype, group_size))
5157 store_lanes_p = true;
0d0293ac 5158 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5159 return false;
5160 }
b8698a0f 5161
cee62fee
MM
5162 if (STMT_VINFO_STRIDED_P (stmt_info)
5163 && (slp || PURE_SLP_STMT (stmt_info))
5164 && (group_size > nunits
5165 || nunits % group_size != 0))
5166 {
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5168 "unhandled strided group store\n");
5169 return false;
5170 }
5171
ebfd146a
IR
5172 if (first_stmt == stmt)
5173 {
5174 /* STMT is the leader of the group. Check the operands of all the
5175 stmts of the group. */
e14c1050 5176 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5177 while (next_stmt)
5178 {
5179 gcc_assert (gimple_assign_single_p (next_stmt));
5180 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5181 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5182 &def_stmt, &def, &dt))
ebfd146a 5183 {
73fbfcad 5184 if (dump_enabled_p ())
78c60e3d 5185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5186 "use not simple.\n");
ebfd146a
IR
5187 return false;
5188 }
e14c1050 5189 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5190 }
5191 }
5192 }
5193
5194 if (!vec_stmt) /* transformation not required. */
5195 {
5196 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5197 /* The SLP costs are calculated during SLP analysis. */
5198 if (!PURE_SLP_STMT (stmt_info))
5199 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5200 NULL, NULL, NULL);
ebfd146a
IR
5201 return true;
5202 }
5203
5204 /** Transform. **/
5205
c716e67f
XDL
5206 ensure_base_align (stmt_info, dr);
5207
0d0293ac 5208 if (grouped_store)
ebfd146a
IR
5209 {
5210 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5211 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5212
e14c1050 5213 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5214
5215 /* FORNOW */
a70d6342 5216 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5217
5218 /* We vectorize all the stmts of the interleaving group when we
5219 reach the last stmt in the group. */
e14c1050
IR
5220 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5221 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5222 && !slp)
5223 {
5224 *vec_stmt = NULL;
5225 return true;
5226 }
5227
5228 if (slp)
4b5caab7 5229 {
0d0293ac 5230 grouped_store = false;
4b5caab7
IR
5231 /* VEC_NUM is the number of vect stmts to be created for this
5232 group. */
5233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5234 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5235 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5236 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5237 }
ebfd146a 5238 else
4b5caab7
IR
5239 /* VEC_NUM is the number of vect stmts to be created for this
5240 group. */
ebfd146a
IR
5241 vec_num = group_size;
5242 }
b8698a0f 5243 else
ebfd146a
IR
5244 {
5245 first_stmt = stmt;
5246 first_dr = dr;
5247 group_size = vec_num = 1;
ebfd146a 5248 }
b8698a0f 5249
73fbfcad 5250 if (dump_enabled_p ())
78c60e3d 5251 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5252 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5253
f2e2a985
MM
5254 if (STMT_VINFO_STRIDED_P (stmt_info))
5255 {
5256 gimple_stmt_iterator incr_gsi;
5257 bool insert_after;
5258 gimple incr;
5259 tree offvar;
5260 tree ivstep;
5261 tree running_off;
5262 gimple_seq stmts = NULL;
5263 tree stride_base, stride_step, alias_off;
5264 tree vec_oprnd;
f502d50e 5265 unsigned int g;
f2e2a985
MM
5266
5267 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5268
5269 stride_base
5270 = fold_build_pointer_plus
f502d50e 5271 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 5272 size_binop (PLUS_EXPR,
f502d50e
MM
5273 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5274 convert_to_ptrofftype (DR_INIT(first_dr))));
5275 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
5276
5277 /* For a store with loop-invariant (but other than power-of-2)
5278 stride (i.e. not a grouped access) like so:
5279
5280 for (i = 0; i < n; i += stride)
5281 array[i] = ...;
5282
5283 we generate a new induction variable and new stores from
5284 the components of the (vectorized) rhs:
5285
5286 for (j = 0; ; j += VF*stride)
5287 vectemp = ...;
5288 tmp1 = vectemp[0];
5289 array[j] = tmp1;
5290 tmp2 = vectemp[1];
5291 array[j + stride] = tmp2;
5292 ...
5293 */
5294
cee62fee
MM
5295 unsigned nstores = nunits;
5296 tree ltype = elem_type;
5297 if (slp)
5298 {
5299 nstores = nunits / group_size;
5300 if (group_size < nunits)
5301 ltype = build_vector_type (elem_type, group_size);
5302 else
5303 ltype = vectype;
5304 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5305 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
f502d50e 5306 group_size = 1;
cee62fee
MM
5307 }
5308
f2e2a985
MM
5309 ivstep = stride_step;
5310 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5311 build_int_cst (TREE_TYPE (ivstep),
cee62fee 5312 ncopies * nstores));
f2e2a985
MM
5313
5314 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5315
5316 create_iv (stride_base, ivstep, NULL,
5317 loop, &incr_gsi, insert_after,
5318 &offvar, NULL);
5319 incr = gsi_stmt (incr_gsi);
5320 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5321
5322 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5323 if (stmts)
5324 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5325
5326 prev_stmt_info = NULL;
f502d50e
MM
5327 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5328 next_stmt = first_stmt;
5329 for (g = 0; g < group_size; g++)
f2e2a985 5330 {
f502d50e
MM
5331 running_off = offvar;
5332 if (g)
f2e2a985 5333 {
f502d50e
MM
5334 tree size = TYPE_SIZE_UNIT (ltype);
5335 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 5336 size);
f502d50e 5337 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 5338 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 5339 running_off, pos);
f2e2a985 5340 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 5341 running_off = newoff;
f502d50e
MM
5342 }
5343 for (j = 0; j < ncopies; j++)
5344 {
5345 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5346 and first_stmt == stmt. */
5347 if (j == 0)
5348 {
5349 if (slp)
5350 {
5351 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5352 slp_node, -1);
5353 vec_oprnd = vec_oprnds[0];
5354 }
5355 else
5356 {
5357 gcc_assert (gimple_assign_single_p (next_stmt));
5358 op = gimple_assign_rhs1 (next_stmt);
5359 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5360 NULL);
5361 }
5362 }
f2e2a985 5363 else
f502d50e
MM
5364 {
5365 if (slp)
5366 vec_oprnd = vec_oprnds[j];
5367 else
5368 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5369 }
5370
5371 for (i = 0; i < nstores; i++)
5372 {
5373 tree newref, newoff;
5374 gimple incr, assign;
5375 tree size = TYPE_SIZE (ltype);
5376 /* Extract the i'th component. */
5377 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5378 bitsize_int (i), size);
5379 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5380 size, pos);
5381
5382 elem = force_gimple_operand_gsi (gsi, elem, true,
5383 NULL_TREE, true,
5384 GSI_SAME_STMT);
5385
5386 newref = build2 (MEM_REF, ltype,
5387 running_off, alias_off);
5388
5389 /* And store it to *running_off. */
5390 assign = gimple_build_assign (newref, elem);
5391 vect_finish_stmt_generation (stmt, assign, gsi);
5392
5393 newoff = copy_ssa_name (running_off, NULL);
5394 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5395 running_off, stride_step);
5396 vect_finish_stmt_generation (stmt, incr, gsi);
5397
5398 running_off = newoff;
5399 if (g == group_size - 1)
5400 {
5401 if (j == 0 && i == 0)
5402 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5403 else
5404 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5405 prev_stmt_info = vinfo_for_stmt (assign);
5406 }
5407 }
f2e2a985 5408 }
f502d50e 5409 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
f2e2a985
MM
5410 }
5411 return true;
5412 }
5413
9771b263
DN
5414 dr_chain.create (group_size);
5415 oprnds.create (group_size);
ebfd146a 5416
720f5239 5417 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5418 gcc_assert (alignment_support_scheme);
272c6793
RS
5419 /* Targets with store-lane instructions must not require explicit
5420 realignment. */
5421 gcc_assert (!store_lanes_p
5422 || alignment_support_scheme == dr_aligned
5423 || alignment_support_scheme == dr_unaligned_supported);
5424
09dfa495
BM
5425 if (negative)
5426 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5427
272c6793
RS
5428 if (store_lanes_p)
5429 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5430 else
5431 aggr_type = vectype;
ebfd146a
IR
5432
5433 /* In case the vectorization factor (VF) is bigger than the number
5434 of elements that we can fit in a vectype (nunits), we have to generate
5435 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5436 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5437 vect_get_vec_def_for_copy_stmt. */
5438
0d0293ac 5439 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5440
5441 S1: &base + 2 = x2
5442 S2: &base = x0
5443 S3: &base + 1 = x1
5444 S4: &base + 3 = x3
5445
5446 We create vectorized stores starting from base address (the access of the
5447 first stmt in the chain (S2 in the above example), when the last store stmt
5448 of the chain (S4) is reached:
5449
5450 VS1: &base = vx2
5451 VS2: &base + vec_size*1 = vx0
5452 VS3: &base + vec_size*2 = vx1
5453 VS4: &base + vec_size*3 = vx3
5454
5455 Then permutation statements are generated:
5456
3fcc1b55
JJ
5457 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5458 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5459 ...
b8698a0f 5460
ebfd146a
IR
5461 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5462 (the order of the data-refs in the output of vect_permute_store_chain
5463 corresponds to the order of scalar stmts in the interleaving chain - see
5464 the documentation of vect_permute_store_chain()).
5465
5466 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5467 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5468 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5469 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5470 */
5471
5472 prev_stmt_info = NULL;
5473 for (j = 0; j < ncopies; j++)
5474 {
5475 gimple new_stmt;
ebfd146a
IR
5476
5477 if (j == 0)
5478 {
5479 if (slp)
5480 {
5481 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5482 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5483 NULL, slp_node, -1);
ebfd146a 5484
9771b263 5485 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5486 }
5487 else
5488 {
b8698a0f
L
5489 /* For interleaved stores we collect vectorized defs for all the
5490 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5491 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5492 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5493
0d0293ac 5494 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5495 OPRNDS are of size 1. */
b8698a0f 5496 next_stmt = first_stmt;
ebfd146a
IR
5497 for (i = 0; i < group_size; i++)
5498 {
b8698a0f
L
5499 /* Since gaps are not supported for interleaved stores,
5500 GROUP_SIZE is the exact number of stmts in the chain.
5501 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5502 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5503 iteration of the loop will be executed. */
5504 gcc_assert (next_stmt
5505 && gimple_assign_single_p (next_stmt));
5506 op = gimple_assign_rhs1 (next_stmt);
5507
b8698a0f 5508 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5509 NULL);
9771b263
DN
5510 dr_chain.quick_push (vec_oprnd);
5511 oprnds.quick_push (vec_oprnd);
e14c1050 5512 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5513 }
5514 }
5515
5516 /* We should have catched mismatched types earlier. */
5517 gcc_assert (useless_type_conversion_p (vectype,
5518 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5519 bool simd_lane_access_p
5520 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5521 if (simd_lane_access_p
5522 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5523 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5524 && integer_zerop (DR_OFFSET (first_dr))
5525 && integer_zerop (DR_INIT (first_dr))
5526 && alias_sets_conflict_p (get_alias_set (aggr_type),
5527 get_alias_set (DR_REF (first_dr))))
5528 {
5529 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5530 dataref_offset = build_int_cst (reference_alias_ptr_type
5531 (DR_REF (first_dr)), 0);
8928eff3 5532 inv_p = false;
74bf76ed
JJ
5533 }
5534 else
5535 dataref_ptr
5536 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5537 simd_lane_access_p ? loop : NULL,
09dfa495 5538 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5539 simd_lane_access_p, &inv_p);
a70d6342 5540 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5541 }
b8698a0f 5542 else
ebfd146a 5543 {
b8698a0f
L
5544 /* For interleaved stores we created vectorized defs for all the
5545 defs stored in OPRNDS in the previous iteration (previous copy).
5546 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5547 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5548 next copy.
0d0293ac 5549 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5550 OPRNDS are of size 1. */
5551 for (i = 0; i < group_size; i++)
5552 {
9771b263 5553 op = oprnds[i];
24ee1384
IR
5554 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5555 &def, &dt);
b8698a0f 5556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5557 dr_chain[i] = vec_oprnd;
5558 oprnds[i] = vec_oprnd;
ebfd146a 5559 }
74bf76ed
JJ
5560 if (dataref_offset)
5561 dataref_offset
5562 = int_const_binop (PLUS_EXPR, dataref_offset,
5563 TYPE_SIZE_UNIT (aggr_type));
5564 else
5565 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5566 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5567 }
5568
272c6793 5569 if (store_lanes_p)
ebfd146a 5570 {
272c6793 5571 tree vec_array;
267d3070 5572
272c6793
RS
5573 /* Combine all the vectors into an array. */
5574 vec_array = create_vector_array (vectype, vec_num);
5575 for (i = 0; i < vec_num; i++)
c2d7ab2a 5576 {
9771b263 5577 vec_oprnd = dr_chain[i];
272c6793 5578 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5579 }
b8698a0f 5580
272c6793
RS
5581 /* Emit:
5582 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5583 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5584 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5585 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5587 }
5588 else
5589 {
5590 new_stmt = NULL;
0d0293ac 5591 if (grouped_store)
272c6793 5592 {
b6b9227d
JJ
5593 if (j == 0)
5594 result_chain.create (group_size);
272c6793
RS
5595 /* Permute. */
5596 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5597 &result_chain);
5598 }
c2d7ab2a 5599
272c6793
RS
5600 next_stmt = first_stmt;
5601 for (i = 0; i < vec_num; i++)
5602 {
644ffefd 5603 unsigned align, misalign;
272c6793
RS
5604
5605 if (i > 0)
5606 /* Bump the vector pointer. */
5607 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5608 stmt, NULL_TREE);
5609
5610 if (slp)
9771b263 5611 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5612 else if (grouped_store)
5613 /* For grouped stores vectorized defs are interleaved in
272c6793 5614 vect_permute_store_chain(). */
9771b263 5615 vec_oprnd = result_chain[i];
272c6793 5616
aed93b23
RB
5617 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5618 dataref_ptr,
5619 dataref_offset
5620 ? dataref_offset
5621 : build_int_cst (reference_alias_ptr_type
5622 (DR_REF (first_dr)), 0));
644ffefd 5623 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5624 if (aligned_access_p (first_dr))
644ffefd 5625 misalign = 0;
272c6793
RS
5626 else if (DR_MISALIGNMENT (first_dr) == -1)
5627 {
5628 TREE_TYPE (data_ref)
5629 = build_aligned_type (TREE_TYPE (data_ref),
5630 TYPE_ALIGN (elem_type));
644ffefd
MJ
5631 align = TYPE_ALIGN_UNIT (elem_type);
5632 misalign = 0;
272c6793
RS
5633 }
5634 else
5635 {
5636 TREE_TYPE (data_ref)
5637 = build_aligned_type (TREE_TYPE (data_ref),
5638 TYPE_ALIGN (elem_type));
644ffefd 5639 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5640 }
aed93b23
RB
5641 if (dataref_offset == NULL_TREE
5642 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
5643 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5644 misalign);
c2d7ab2a 5645
f234d260
BM
5646 if (negative
5647 && dt != vect_constant_def
5648 && dt != vect_external_def)
09dfa495
BM
5649 {
5650 tree perm_mask = perm_mask_for_reverse (vectype);
5651 tree perm_dest
5652 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5653 vectype);
b731b390 5654 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
5655
5656 /* Generate the permute statement. */
5657 gimple perm_stmt
0d0e4a03
JJ
5658 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5659 vec_oprnd, perm_mask);
09dfa495
BM
5660 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5661
5662 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5663 vec_oprnd = new_temp;
5664 }
5665
272c6793
RS
5666 /* Arguments are ready. Create the new vector stmt. */
5667 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5669
5670 if (slp)
5671 continue;
5672
e14c1050 5673 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5674 if (!next_stmt)
5675 break;
5676 }
ebfd146a 5677 }
1da0876c
RS
5678 if (!slp)
5679 {
5680 if (j == 0)
5681 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5682 else
5683 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5684 prev_stmt_info = vinfo_for_stmt (new_stmt);
5685 }
ebfd146a
IR
5686 }
5687
9771b263
DN
5688 dr_chain.release ();
5689 oprnds.release ();
5690 result_chain.release ();
5691 vec_oprnds.release ();
ebfd146a
IR
5692
5693 return true;
5694}
5695
557be5a8
AL
5696/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5697 VECTOR_CST mask. No checks are made that the target platform supports the
5698 mask, so callers may wish to test can_vec_perm_p separately, or use
5699 vect_gen_perm_mask_checked. */
a1e53f3f 5700
3fcc1b55 5701tree
557be5a8 5702vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 5703{
d2a12ae7 5704 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5705 int i, nunits;
a1e53f3f 5706
22e4dee7 5707 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 5708
96f9265a
RG
5709 mask_elt_type = lang_hooks.types.type_for_mode
5710 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5711 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5712
d2a12ae7 5713 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5714 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5715 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5716 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5717
2635892a 5718 return mask_vec;
a1e53f3f
L
5719}
5720
cf7aa6a3
AL
5721/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5722 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
5723
5724tree
5725vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5726{
5727 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5728 return vect_gen_perm_mask_any (vectype, sel);
5729}
5730
aec7ae7d
JJ
5731/* Given a vector variable X and Y, that was generated for the scalar
5732 STMT, generate instructions to permute the vector elements of X and Y
5733 using permutation mask MASK_VEC, insert them at *GSI and return the
5734 permuted vector variable. */
a1e53f3f
L
5735
5736static tree
aec7ae7d
JJ
5737permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5738 gimple_stmt_iterator *gsi)
a1e53f3f
L
5739{
5740 tree vectype = TREE_TYPE (x);
aec7ae7d 5741 tree perm_dest, data_ref;
a1e53f3f
L
5742 gimple perm_stmt;
5743
acdcd61b 5744 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 5745 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
5746
5747 /* Generate the permute statement. */
0d0e4a03 5748 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
5749 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5750
5751 return data_ref;
5752}
5753
6b916b36
RB
5754/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5755 inserting them on the loops preheader edge. Returns true if we
5756 were successful in doing so (and thus STMT can be moved then),
5757 otherwise returns false. */
5758
5759static bool
5760hoist_defs_of_uses (gimple stmt, struct loop *loop)
5761{
5762 ssa_op_iter i;
5763 tree op;
5764 bool any = false;
5765
5766 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5767 {
5768 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5769 if (!gimple_nop_p (def_stmt)
5770 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5771 {
5772 /* Make sure we don't need to recurse. While we could do
5773 so in simple cases when there are more complex use webs
5774 we don't have an easy way to preserve stmt order to fulfil
5775 dependencies within them. */
5776 tree op2;
5777 ssa_op_iter i2;
d1417442
JJ
5778 if (gimple_code (def_stmt) == GIMPLE_PHI)
5779 return false;
6b916b36
RB
5780 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5781 {
5782 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5783 if (!gimple_nop_p (def_stmt2)
5784 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5785 return false;
5786 }
5787 any = true;
5788 }
5789 }
5790
5791 if (!any)
5792 return true;
5793
5794 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5795 {
5796 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5797 if (!gimple_nop_p (def_stmt)
5798 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5799 {
5800 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5801 gsi_remove (&gsi, false);
5802 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5803 }
5804 }
5805
5806 return true;
5807}
5808
ebfd146a
IR
5809/* vectorizable_load.
5810
b8698a0f
L
5811 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5812 can be vectorized.
5813 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5814 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5815 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5816
5817static bool
5818vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5819 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5820{
5821 tree scalar_dest;
5822 tree vec_dest = NULL;
5823 tree data_ref = NULL;
5824 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5825 stmt_vec_info prev_stmt_info;
ebfd146a 5826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5827 struct loop *loop = NULL;
ebfd146a 5828 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5829 bool nested_in_vect_loop = false;
c716e67f 5830 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5831 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5832 tree elem_type;
ebfd146a 5833 tree new_temp;
ef4bddc2 5834 machine_mode mode;
ebfd146a
IR
5835 gimple new_stmt = NULL;
5836 tree dummy;
5837 enum dr_alignment_support alignment_support_scheme;
5838 tree dataref_ptr = NULL_TREE;
74bf76ed 5839 tree dataref_offset = NULL_TREE;
fef4d2b3 5840 gimple ptr_incr = NULL;
ebfd146a
IR
5841 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5842 int ncopies;
9b999e8c 5843 int i, j, group_size = -1, group_gap_adj;
ebfd146a
IR
5844 tree msq = NULL_TREE, lsq;
5845 tree offset = NULL_TREE;
356bbc4c 5846 tree byte_offset = NULL_TREE;
ebfd146a 5847 tree realignment_token = NULL_TREE;
538dd0b7 5848 gphi *phi = NULL;
6e1aa848 5849 vec<tree> dr_chain = vNULL;
0d0293ac 5850 bool grouped_load = false;
272c6793 5851 bool load_lanes_p = false;
ebfd146a 5852 gimple first_stmt;
ebfd146a 5853 bool inv_p;
319e6439 5854 bool negative = false;
ebfd146a
IR
5855 bool compute_in_loop = false;
5856 struct loop *at_loop;
5857 int vec_num;
5858 bool slp = (slp_node != NULL);
5859 bool slp_perm = false;
5860 enum tree_code code;
a70d6342
IR
5861 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5862 int vf;
272c6793 5863 tree aggr_type;
aec7ae7d
JJ
5864 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5865 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5866 int gather_scale = 1;
5867 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5868
5869 if (loop_vinfo)
5870 {
5871 loop = LOOP_VINFO_LOOP (loop_vinfo);
5872 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5873 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5874 }
5875 else
3533e503 5876 vf = 1;
ebfd146a
IR
5877
5878 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5879 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5880 case of SLP. */
437f4a00 5881 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5882 ncopies = 1;
5883 else
5884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5885
5886 gcc_assert (ncopies >= 1);
5887
5888 /* FORNOW. This restriction should be relaxed. */
5889 if (nested_in_vect_loop && ncopies > 1)
5890 {
73fbfcad 5891 if (dump_enabled_p ())
78c60e3d 5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5893 "multiple types in nested loop.\n");
ebfd146a
IR
5894 return false;
5895 }
5896
f2556b68
RB
5897 /* Invalidate assumptions made by dependence analysis when vectorization
5898 on the unrolled body effectively re-orders stmts. */
5899 if (ncopies > 1
5900 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5901 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5902 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5903 {
5904 if (dump_enabled_p ())
5905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5906 "cannot perform implicit CSE when unrolling "
5907 "with negative dependence distance\n");
5908 return false;
5909 }
5910
a70d6342 5911 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5912 return false;
5913
8644a673 5914 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5915 return false;
5916
5917 /* Is vectorizable load? */
5918 if (!is_gimple_assign (stmt))
5919 return false;
5920
5921 scalar_dest = gimple_assign_lhs (stmt);
5922 if (TREE_CODE (scalar_dest) != SSA_NAME)
5923 return false;
5924
5925 code = gimple_assign_rhs_code (stmt);
5926 if (code != ARRAY_REF
38000232 5927 && code != BIT_FIELD_REF
ebfd146a 5928 && code != INDIRECT_REF
e9dbe7bb
IR
5929 && code != COMPONENT_REF
5930 && code != IMAGPART_EXPR
70f34814 5931 && code != REALPART_EXPR
42373e0b
RG
5932 && code != MEM_REF
5933 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5934 return false;
5935
5936 if (!STMT_VINFO_DATA_REF (stmt_info))
5937 return false;
5938
7b7b1813 5939 elem_type = TREE_TYPE (vectype);
947131ba 5940 mode = TYPE_MODE (vectype);
ebfd146a
IR
5941
5942 /* FORNOW. In some cases can vectorize even if data-type not supported
5943 (e.g. - data copies). */
947131ba 5944 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5945 {
73fbfcad 5946 if (dump_enabled_p ())
78c60e3d 5947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5948 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5949 return false;
5950 }
5951
ebfd146a 5952 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5953 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5954 {
0d0293ac 5955 grouped_load = true;
ebfd146a 5956 /* FORNOW */
aec7ae7d 5957 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5958
e14c1050 5959 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d5f035ea
RB
5960
5961 /* If this is single-element interleaving with an element distance
5962 that leaves unused vector loads around punt - we at least create
5963 very sub-optimal code in that case (and blow up memory,
5964 see PR65518). */
5965 if (first_stmt == stmt
5966 && !GROUP_NEXT_ELEMENT (stmt_info)
5967 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5968 {
5969 if (dump_enabled_p ())
5970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5971 "single-element interleaving not supported "
5972 "for not adjacent vector loads\n");
5973 return false;
5974 }
5975
b1af7da6
RB
5976 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5977 slp_perm = true;
5978
7b5fc413
RB
5979 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5980 if (!slp
5981 && !PURE_SLP_STMT (stmt_info)
f2e2a985 5982 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5983 {
272c6793
RS
5984 if (vect_load_lanes_supported (vectype, group_size))
5985 load_lanes_p = true;
0d0293ac 5986 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5987 return false;
5988 }
f2556b68
RB
5989
5990 /* Invalidate assumptions made by dependence analysis when vectorization
5991 on the unrolled body effectively re-orders stmts. */
5992 if (!PURE_SLP_STMT (stmt_info)
5993 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5994 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5995 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5996 {
5997 if (dump_enabled_p ())
5998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5999 "cannot perform implicit CSE when performing "
6000 "group loads with negative dependence distance\n");
6001 return false;
6002 }
96bb56b2
RB
6003
6004 /* Similarly when the stmt is a load that is both part of a SLP
6005 instance and a loop vectorized stmt via the same-dr mechanism
6006 we have to give up. */
6007 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6008 && (STMT_SLP_TYPE (stmt_info)
6009 != STMT_SLP_TYPE (vinfo_for_stmt
6010 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6011 {
6012 if (dump_enabled_p ())
6013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6014 "conflicting SLP types for CSEd load\n");
6015 return false;
6016 }
ebfd146a
IR
6017 }
6018
a1e53f3f 6019
aec7ae7d
JJ
6020 if (STMT_VINFO_GATHER_P (stmt_info))
6021 {
6022 gimple def_stmt;
6023 tree def;
6024 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
6025 &gather_off, &gather_scale);
6026 gcc_assert (gather_decl);
24ee1384 6027 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
6028 &def_stmt, &def, &gather_dt,
6029 &gather_off_vectype))
6030 {
73fbfcad 6031 if (dump_enabled_p ())
78c60e3d 6032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6033 "gather index use not simple.\n");
aec7ae7d
JJ
6034 return false;
6035 }
6036 }
f2e2a985 6037 else if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
6038 {
6039 if ((grouped_load
6040 && (slp || PURE_SLP_STMT (stmt_info)))
6041 && (group_size > nunits
b266b968 6042 || nunits % group_size != 0))
7b5fc413
RB
6043 {
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "unhandled strided group load\n");
6046 return false;
6047 }
6048 }
319e6439
RG
6049 else
6050 {
6051 negative = tree_int_cst_compare (nested_in_vect_loop
6052 ? STMT_VINFO_DR_STEP (stmt_info)
6053 : DR_STEP (dr),
6054 size_zero_node) < 0;
6055 if (negative && ncopies > 1)
6056 {
73fbfcad 6057 if (dump_enabled_p ())
78c60e3d 6058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6059 "multiple types with negative step.\n");
319e6439
RG
6060 return false;
6061 }
6062
6063 if (negative)
6064 {
08940f33
RB
6065 if (grouped_load)
6066 {
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6069 "negative step for group load not supported"
6070 "\n");
08940f33
RB
6071 return false;
6072 }
319e6439
RG
6073 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6074 if (alignment_support_scheme != dr_aligned
6075 && alignment_support_scheme != dr_unaligned_supported)
6076 {
73fbfcad 6077 if (dump_enabled_p ())
78c60e3d 6078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6079 "negative step but alignment required.\n");
319e6439
RG
6080 return false;
6081 }
6082 if (!perm_mask_for_reverse (vectype))
6083 {
73fbfcad 6084 if (dump_enabled_p ())
78c60e3d 6085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6086 "negative step and reversing not supported."
6087 "\n");
319e6439
RG
6088 return false;
6089 }
6090 }
7d75abc8 6091 }
aec7ae7d 6092
ebfd146a
IR
6093 if (!vec_stmt) /* transformation not required. */
6094 {
6095 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6096 /* The SLP costs are calculated during SLP analysis. */
6097 if (!PURE_SLP_STMT (stmt_info))
6098 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6099 NULL, NULL, NULL);
ebfd146a
IR
6100 return true;
6101 }
6102
73fbfcad 6103 if (dump_enabled_p ())
78c60e3d 6104 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6105 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
6106
6107 /** Transform. **/
6108
c716e67f
XDL
6109 ensure_base_align (stmt_info, dr);
6110
aec7ae7d
JJ
6111 if (STMT_VINFO_GATHER_P (stmt_info))
6112 {
6113 tree vec_oprnd0 = NULL_TREE, op;
6114 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6115 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6116 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6117 edge pe = loop_preheader_edge (loop);
6118 gimple_seq seq;
6119 basic_block new_bb;
6120 enum { NARROW, NONE, WIDEN } modifier;
6121 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6122
6123 if (nunits == gather_off_nunits)
6124 modifier = NONE;
6125 else if (nunits == gather_off_nunits / 2)
6126 {
6127 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6128 modifier = WIDEN;
6129
6130 for (i = 0; i < gather_off_nunits; ++i)
6131 sel[i] = i | nunits;
6132
557be5a8 6133 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
6134 }
6135 else if (nunits == gather_off_nunits * 2)
6136 {
6137 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6138 modifier = NARROW;
6139
6140 for (i = 0; i < nunits; ++i)
6141 sel[i] = i < gather_off_nunits
6142 ? i : i + nunits - gather_off_nunits;
6143
557be5a8 6144 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
6145 ncopies *= 2;
6146 }
6147 else
6148 gcc_unreachable ();
6149
6150 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6151 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6152 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6153 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6154 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6155 scaletype = TREE_VALUE (arglist);
d3c2fee0 6156 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6157
6158 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6159
6160 ptr = fold_convert (ptrtype, gather_base);
6161 if (!is_gimple_min_invariant (ptr))
6162 {
6163 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6164 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6165 gcc_assert (!new_bb);
6166 }
6167
6168 /* Currently we support only unconditional gather loads,
6169 so mask should be all ones. */
d3c2fee0
AI
6170 if (TREE_CODE (masktype) == INTEGER_TYPE)
6171 mask = build_int_cst (masktype, -1);
6172 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6173 {
6174 mask = build_int_cst (TREE_TYPE (masktype), -1);
6175 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6176 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6177 }
aec7ae7d
JJ
6178 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6179 {
6180 REAL_VALUE_TYPE r;
6181 long tmp[6];
6182 for (j = 0; j < 6; ++j)
6183 tmp[j] = -1;
6184 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6185 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6186 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6187 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6188 }
6189 else
6190 gcc_unreachable ();
aec7ae7d
JJ
6191
6192 scale = build_int_cst (scaletype, gather_scale);
6193
d3c2fee0
AI
6194 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6195 merge = build_int_cst (TREE_TYPE (rettype), 0);
6196 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6197 {
6198 REAL_VALUE_TYPE r;
6199 long tmp[6];
6200 for (j = 0; j < 6; ++j)
6201 tmp[j] = 0;
6202 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6203 merge = build_real (TREE_TYPE (rettype), r);
6204 }
6205 else
6206 gcc_unreachable ();
6207 merge = build_vector_from_val (rettype, merge);
6208 merge = vect_init_vector (stmt, merge, rettype, NULL);
6209
aec7ae7d
JJ
6210 prev_stmt_info = NULL;
6211 for (j = 0; j < ncopies; ++j)
6212 {
6213 if (modifier == WIDEN && (j & 1))
6214 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6215 perm_mask, stmt, gsi);
6216 else if (j == 0)
6217 op = vec_oprnd0
6218 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6219 else
6220 op = vec_oprnd0
6221 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6222
6223 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6224 {
6225 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6226 == TYPE_VECTOR_SUBPARTS (idxtype));
6227 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 6228 var = make_ssa_name (var);
aec7ae7d
JJ
6229 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6230 new_stmt
0d0e4a03 6231 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6233 op = var;
6234 }
6235
6236 new_stmt
d3c2fee0 6237 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6238
6239 if (!useless_type_conversion_p (vectype, rettype))
6240 {
6241 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6242 == TYPE_VECTOR_SUBPARTS (rettype));
6243 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
6244 op = make_ssa_name (var, new_stmt);
6245 gimple_call_set_lhs (new_stmt, op);
6246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6247 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6248 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6249 new_stmt
0d0e4a03 6250 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6251 }
6252 else
6253 {
6254 var = make_ssa_name (vec_dest, new_stmt);
6255 gimple_call_set_lhs (new_stmt, var);
6256 }
6257
6258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6259
6260 if (modifier == NARROW)
6261 {
6262 if ((j & 1) == 0)
6263 {
6264 prev_res = var;
6265 continue;
6266 }
6267 var = permute_vec_elements (prev_res, var,
6268 perm_mask, stmt, gsi);
6269 new_stmt = SSA_NAME_DEF_STMT (var);
6270 }
6271
6272 if (prev_stmt_info == NULL)
6273 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6274 else
6275 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6276 prev_stmt_info = vinfo_for_stmt (new_stmt);
6277 }
6278 return true;
6279 }
f2e2a985 6280 else if (STMT_VINFO_STRIDED_P (stmt_info))
7d75abc8
MM
6281 {
6282 gimple_stmt_iterator incr_gsi;
6283 bool insert_after;
6284 gimple incr;
6285 tree offvar;
7d75abc8
MM
6286 tree ivstep;
6287 tree running_off;
9771b263 6288 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6289 gimple_seq stmts = NULL;
14ac6aa2
RB
6290 tree stride_base, stride_step, alias_off;
6291
6292 gcc_assert (!nested_in_vect_loop);
7d75abc8 6293
f502d50e 6294 if (slp && grouped_load)
ab313a8c
RB
6295 first_dr = STMT_VINFO_DATA_REF
6296 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6297 else
6298 first_dr = dr;
6299
14ac6aa2
RB
6300 stride_base
6301 = fold_build_pointer_plus
ab313a8c 6302 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 6303 size_binop (PLUS_EXPR,
ab313a8c
RB
6304 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6305 convert_to_ptrofftype (DR_INIT (first_dr))));
6306 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
6307
6308 /* For a load with loop-invariant (but other than power-of-2)
6309 stride (i.e. not a grouped access) like so:
6310
6311 for (i = 0; i < n; i += stride)
6312 ... = array[i];
6313
6314 we generate a new induction variable and new accesses to
6315 form a new vector (or vectors, depending on ncopies):
6316
6317 for (j = 0; ; j += VF*stride)
6318 tmp1 = array[j];
6319 tmp2 = array[j + stride];
6320 ...
6321 vectemp = {tmp1, tmp2, ...}
6322 */
6323
ab313a8c
RB
6324 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6325 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
6326
6327 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6328
ab313a8c 6329 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
6330 loop, &incr_gsi, insert_after,
6331 &offvar, NULL);
6332 incr = gsi_stmt (incr_gsi);
6333 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6334
ab313a8c
RB
6335 stride_step = force_gimple_operand (unshare_expr (stride_step),
6336 &stmts, true, NULL_TREE);
7d75abc8
MM
6337 if (stmts)
6338 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6339
6340 prev_stmt_info = NULL;
6341 running_off = offvar;
ab313a8c 6342 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
7b5fc413
RB
6343 int nloads = nunits;
6344 tree ltype = TREE_TYPE (vectype);
b266b968 6345 auto_vec<tree> dr_chain;
7b5fc413
RB
6346 if (slp)
6347 {
6348 nloads = nunits / group_size;
6349 if (group_size < nunits)
6350 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6351 else
6352 ltype = vectype;
6353 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6354 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
b266b968
RB
6355 if (slp_perm)
6356 dr_chain.create (ncopies);
7b5fc413 6357 }
7d75abc8
MM
6358 for (j = 0; j < ncopies; j++)
6359 {
6360 tree vec_inv;
6361
7b5fc413
RB
6362 if (nloads > 1)
6363 {
6364 vec_alloc (v, nloads);
6365 for (i = 0; i < nloads; i++)
6366 {
6367 tree newref, newoff;
6368 gimple incr;
6369 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6370
6371 newref = force_gimple_operand_gsi (gsi, newref, true,
6372 NULL_TREE, true,
6373 GSI_SAME_STMT);
6374 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6375 newoff = copy_ssa_name (running_off);
6376 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6377 running_off, stride_step);
6378 vect_finish_stmt_generation (stmt, incr, gsi);
6379
6380 running_off = newoff;
6381 }
6382
6383 vec_inv = build_constructor (vectype, v);
6384 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6385 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6386 }
6387 else
7d75abc8 6388 {
7b5fc413
RB
6389 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6390 build2 (MEM_REF, ltype,
6391 running_off, alias_off));
6392 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6393
6394 tree newoff = copy_ssa_name (running_off);
6395 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
0d0e4a03 6396 running_off, stride_step);
7d75abc8
MM
6397 vect_finish_stmt_generation (stmt, incr, gsi);
6398
6399 running_off = newoff;
6400 }
6401
7b5fc413 6402 if (slp)
b266b968
RB
6403 {
6404 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6405 if (slp_perm)
6406 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6407 }
7d75abc8
MM
6408 if (j == 0)
6409 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6410 else
6411 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6412 prev_stmt_info = vinfo_for_stmt (new_stmt);
6413 }
b266b968
RB
6414 if (slp_perm)
6415 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6416 slp_node_instance, false);
7d75abc8
MM
6417 return true;
6418 }
aec7ae7d 6419
0d0293ac 6420 if (grouped_load)
ebfd146a 6421 {
e14c1050 6422 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 6423 if (slp
01d8bf07 6424 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
6425 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6426 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6427
ebfd146a 6428 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6429 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6430 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6431 ??? But we can only do so if there is exactly one
6432 as we have no way to get at the rest. Leave the CSE
6433 opportunity alone.
6434 ??? With the group load eventually participating
6435 in multiple different permutations (having multiple
6436 slp nodes which refer to the same group) the CSE
6437 is even wrong code. See PR56270. */
6438 && !slp)
ebfd146a
IR
6439 {
6440 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6441 return true;
6442 }
6443 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6444 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
9b999e8c 6445 group_gap_adj = 0;
ebfd146a
IR
6446
6447 /* VEC_NUM is the number of vect stmts to be created for this group. */
6448 if (slp)
6449 {
0d0293ac 6450 grouped_load = false;
91ff1504
RB
6451 /* For SLP permutation support we need to load the whole group,
6452 not only the number of vector stmts the permutation result
6453 fits in. */
6454 if (slp_perm)
6455 vec_num = (group_size * vf + nunits - 1) / nunits;
6456 else
6457 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9b999e8c 6458 group_gap_adj = vf * group_size - nunits * vec_num;
a70d6342 6459 }
ebfd146a 6460 else
9b999e8c 6461 vec_num = group_size;
ebfd146a
IR
6462 }
6463 else
6464 {
6465 first_stmt = stmt;
6466 first_dr = dr;
6467 group_size = vec_num = 1;
9b999e8c 6468 group_gap_adj = 0;
ebfd146a
IR
6469 }
6470
720f5239 6471 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6472 gcc_assert (alignment_support_scheme);
272c6793
RS
6473 /* Targets with load-lane instructions must not require explicit
6474 realignment. */
6475 gcc_assert (!load_lanes_p
6476 || alignment_support_scheme == dr_aligned
6477 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6478
6479 /* In case the vectorization factor (VF) is bigger than the number
6480 of elements that we can fit in a vectype (nunits), we have to generate
6481 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6482 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6483 from one copy of the vector stmt to the next, in the field
ff802fa1 6484 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6485 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6486 stmts that use the defs of the current stmt. The example below
6487 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6488 need to create 4 vectorized stmts):
ebfd146a
IR
6489
6490 before vectorization:
6491 RELATED_STMT VEC_STMT
6492 S1: x = memref - -
6493 S2: z = x + 1 - -
6494
6495 step 1: vectorize stmt S1:
6496 We first create the vector stmt VS1_0, and, as usual, record a
6497 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6498 Next, we create the vector stmt VS1_1, and record a pointer to
6499 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6500 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6501 stmts and pointers:
6502 RELATED_STMT VEC_STMT
6503 VS1_0: vx0 = memref0 VS1_1 -
6504 VS1_1: vx1 = memref1 VS1_2 -
6505 VS1_2: vx2 = memref2 VS1_3 -
6506 VS1_3: vx3 = memref3 - -
6507 S1: x = load - VS1_0
6508 S2: z = x + 1 - -
6509
b8698a0f
L
6510 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6511 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6512 stmt S2. */
6513
0d0293ac 6514 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6515
6516 S1: x2 = &base + 2
6517 S2: x0 = &base
6518 S3: x1 = &base + 1
6519 S4: x3 = &base + 3
6520
b8698a0f 6521 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6522 starting from the access of the first stmt of the chain:
6523
6524 VS1: vx0 = &base
6525 VS2: vx1 = &base + vec_size*1
6526 VS3: vx3 = &base + vec_size*2
6527 VS4: vx4 = &base + vec_size*3
6528
6529 Then permutation statements are generated:
6530
e2c83630
RH
6531 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6532 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6533 ...
6534
6535 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6536 (the order of the data-refs in the output of vect_permute_load_chain
6537 corresponds to the order of scalar stmts in the interleaving chain - see
6538 the documentation of vect_permute_load_chain()).
6539 The generation of permutation stmts and recording them in
0d0293ac 6540 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6541
b8698a0f 6542 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6543 permutation stmts above are created for every copy. The result vector
6544 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6545 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6546
6547 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6548 on a target that supports unaligned accesses (dr_unaligned_supported)
6549 we generate the following code:
6550 p = initial_addr;
6551 indx = 0;
6552 loop {
6553 p = p + indx * vectype_size;
6554 vec_dest = *(p);
6555 indx = indx + 1;
6556 }
6557
6558 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6559 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6560 then generate the following code, in which the data in each iteration is
6561 obtained by two vector loads, one from the previous iteration, and one
6562 from the current iteration:
6563 p1 = initial_addr;
6564 msq_init = *(floor(p1))
6565 p2 = initial_addr + VS - 1;
6566 realignment_token = call target_builtin;
6567 indx = 0;
6568 loop {
6569 p2 = p2 + indx * vectype_size
6570 lsq = *(floor(p2))
6571 vec_dest = realign_load (msq, lsq, realignment_token)
6572 indx = indx + 1;
6573 msq = lsq;
6574 } */
6575
6576 /* If the misalignment remains the same throughout the execution of the
6577 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6578 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6579 This can only occur when vectorizing memory accesses in the inner-loop
6580 nested within an outer-loop that is being vectorized. */
6581
d1e4b493 6582 if (nested_in_vect_loop
211bea38 6583 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6584 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6585 {
6586 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6587 compute_in_loop = true;
6588 }
6589
6590 if ((alignment_support_scheme == dr_explicit_realign_optimized
6591 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6592 && !compute_in_loop)
ebfd146a
IR
6593 {
6594 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6595 alignment_support_scheme, NULL_TREE,
6596 &at_loop);
6597 if (alignment_support_scheme == dr_explicit_realign_optimized)
6598 {
538dd0b7 6599 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
6600 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6601 size_one_node);
ebfd146a
IR
6602 }
6603 }
6604 else
6605 at_loop = loop;
6606
a1e53f3f
L
6607 if (negative)
6608 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6609
272c6793
RS
6610 if (load_lanes_p)
6611 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6612 else
6613 aggr_type = vectype;
6614
ebfd146a
IR
6615 prev_stmt_info = NULL;
6616 for (j = 0; j < ncopies; j++)
b8698a0f 6617 {
272c6793 6618 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6619 if (j == 0)
74bf76ed
JJ
6620 {
6621 bool simd_lane_access_p
6622 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6623 if (simd_lane_access_p
6624 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6625 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6626 && integer_zerop (DR_OFFSET (first_dr))
6627 && integer_zerop (DR_INIT (first_dr))
6628 && alias_sets_conflict_p (get_alias_set (aggr_type),
6629 get_alias_set (DR_REF (first_dr)))
6630 && (alignment_support_scheme == dr_aligned
6631 || alignment_support_scheme == dr_unaligned_supported))
6632 {
6633 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6634 dataref_offset = build_int_cst (reference_alias_ptr_type
6635 (DR_REF (first_dr)), 0);
8928eff3 6636 inv_p = false;
74bf76ed
JJ
6637 }
6638 else
6639 dataref_ptr
6640 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6641 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
6642 simd_lane_access_p, &inv_p,
6643 byte_offset);
74bf76ed
JJ
6644 }
6645 else if (dataref_offset)
6646 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6647 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6648 else
272c6793
RS
6649 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6650 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6651
0d0293ac 6652 if (grouped_load || slp_perm)
9771b263 6653 dr_chain.create (vec_num);
5ce1ee7f 6654
272c6793 6655 if (load_lanes_p)
ebfd146a 6656 {
272c6793
RS
6657 tree vec_array;
6658
6659 vec_array = create_vector_array (vectype, vec_num);
6660
6661 /* Emit:
6662 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6663 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6664 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6665 gimple_call_set_lhs (new_stmt, vec_array);
6666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6667
272c6793
RS
6668 /* Extract each vector into an SSA_NAME. */
6669 for (i = 0; i < vec_num; i++)
ebfd146a 6670 {
272c6793
RS
6671 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6672 vec_array, i);
9771b263 6673 dr_chain.quick_push (new_temp);
272c6793
RS
6674 }
6675
6676 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6677 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6678 }
6679 else
6680 {
6681 for (i = 0; i < vec_num; i++)
6682 {
6683 if (i > 0)
6684 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6685 stmt, NULL_TREE);
6686
6687 /* 2. Create the vector-load in the loop. */
6688 switch (alignment_support_scheme)
6689 {
6690 case dr_aligned:
6691 case dr_unaligned_supported:
be1ac4ec 6692 {
644ffefd
MJ
6693 unsigned int align, misalign;
6694
272c6793 6695 data_ref
aed93b23
RB
6696 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6697 dataref_offset
6698 ? dataref_offset
6699 : build_int_cst (reference_alias_ptr_type
6700 (DR_REF (first_dr)), 0));
644ffefd 6701 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6702 if (alignment_support_scheme == dr_aligned)
6703 {
6704 gcc_assert (aligned_access_p (first_dr));
644ffefd 6705 misalign = 0;
272c6793
RS
6706 }
6707 else if (DR_MISALIGNMENT (first_dr) == -1)
6708 {
6709 TREE_TYPE (data_ref)
6710 = build_aligned_type (TREE_TYPE (data_ref),
6711 TYPE_ALIGN (elem_type));
644ffefd
MJ
6712 align = TYPE_ALIGN_UNIT (elem_type);
6713 misalign = 0;
272c6793
RS
6714 }
6715 else
6716 {
6717 TREE_TYPE (data_ref)
6718 = build_aligned_type (TREE_TYPE (data_ref),
6719 TYPE_ALIGN (elem_type));
644ffefd 6720 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6721 }
aed93b23
RB
6722 if (dataref_offset == NULL_TREE
6723 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6724 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6725 align, misalign);
272c6793 6726 break;
be1ac4ec 6727 }
272c6793 6728 case dr_explicit_realign:
267d3070 6729 {
272c6793 6730 tree ptr, bump;
272c6793 6731
d88981fc 6732 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
6733
6734 if (compute_in_loop)
6735 msq = vect_setup_realignment (first_stmt, gsi,
6736 &realignment_token,
6737 dr_explicit_realign,
6738 dataref_ptr, NULL);
6739
aed93b23
RB
6740 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6741 ptr = copy_ssa_name (dataref_ptr);
6742 else
6743 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
6744 new_stmt = gimple_build_assign
6745 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6746 build_int_cst
6747 (TREE_TYPE (dataref_ptr),
6748 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6749 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6750 data_ref
6751 = build2 (MEM_REF, vectype, ptr,
6752 build_int_cst (reference_alias_ptr_type
6753 (DR_REF (first_dr)), 0));
6754 vec_dest = vect_create_destination_var (scalar_dest,
6755 vectype);
6756 new_stmt = gimple_build_assign (vec_dest, data_ref);
6757 new_temp = make_ssa_name (vec_dest, new_stmt);
6758 gimple_assign_set_lhs (new_stmt, new_temp);
6759 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6760 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6762 msq = new_temp;
6763
d88981fc 6764 bump = size_binop (MULT_EXPR, vs,
7b7b1813 6765 TYPE_SIZE_UNIT (elem_type));
d88981fc 6766 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 6767 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
6768 new_stmt = gimple_build_assign
6769 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
6770 build_int_cst
6771 (TREE_TYPE (ptr),
6772 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
aed93b23 6773 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
6774 gimple_assign_set_lhs (new_stmt, ptr);
6775 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6776 data_ref
6777 = build2 (MEM_REF, vectype, ptr,
6778 build_int_cst (reference_alias_ptr_type
6779 (DR_REF (first_dr)), 0));
6780 break;
267d3070 6781 }
272c6793 6782 case dr_explicit_realign_optimized:
aed93b23
RB
6783 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6784 new_temp = copy_ssa_name (dataref_ptr);
6785 else
6786 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
6787 new_stmt = gimple_build_assign
6788 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6789 build_int_cst
6790 (TREE_TYPE (dataref_ptr),
6791 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6792 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6793 data_ref
6794 = build2 (MEM_REF, vectype, new_temp,
6795 build_int_cst (reference_alias_ptr_type
6796 (DR_REF (first_dr)), 0));
6797 break;
6798 default:
6799 gcc_unreachable ();
6800 }
ebfd146a 6801 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6802 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6803 new_temp = make_ssa_name (vec_dest, new_stmt);
6804 gimple_assign_set_lhs (new_stmt, new_temp);
6805 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6806
272c6793
RS
6807 /* 3. Handle explicit realignment if necessary/supported.
6808 Create in loop:
6809 vec_dest = realign_load (msq, lsq, realignment_token) */
6810 if (alignment_support_scheme == dr_explicit_realign_optimized
6811 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6812 {
272c6793
RS
6813 lsq = gimple_assign_lhs (new_stmt);
6814 if (!realignment_token)
6815 realignment_token = dataref_ptr;
6816 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
6817 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6818 msq, lsq, realignment_token);
272c6793
RS
6819 new_temp = make_ssa_name (vec_dest, new_stmt);
6820 gimple_assign_set_lhs (new_stmt, new_temp);
6821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6822
6823 if (alignment_support_scheme == dr_explicit_realign_optimized)
6824 {
6825 gcc_assert (phi);
6826 if (i == vec_num - 1 && j == ncopies - 1)
6827 add_phi_arg (phi, lsq,
6828 loop_latch_edge (containing_loop),
9e227d60 6829 UNKNOWN_LOCATION);
272c6793
RS
6830 msq = lsq;
6831 }
ebfd146a 6832 }
ebfd146a 6833
59fd17e3
RB
6834 /* 4. Handle invariant-load. */
6835 if (inv_p && !bb_vinfo)
6836 {
59fd17e3 6837 gcc_assert (!grouped_load);
d1417442
JJ
6838 /* If we have versioned for aliasing or the loop doesn't
6839 have any data dependencies that would preclude this,
6840 then we are sure this is a loop invariant load and
6841 thus we can insert it on the preheader edge. */
6842 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6843 && !nested_in_vect_loop
6b916b36 6844 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
6845 {
6846 if (dump_enabled_p ())
6847 {
6848 dump_printf_loc (MSG_NOTE, vect_location,
6849 "hoisting out of the vectorized "
6850 "loop: ");
6851 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 6852 }
b731b390 6853 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
6854 gsi_insert_on_edge_immediate
6855 (loop_preheader_edge (loop),
6856 gimple_build_assign (tem,
6857 unshare_expr
6858 (gimple_assign_rhs1 (stmt))));
6859 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6860 }
6861 else
6862 {
6863 gimple_stmt_iterator gsi2 = *gsi;
6864 gsi_next (&gsi2);
6865 new_temp = vect_init_vector (stmt, scalar_dest,
6866 vectype, &gsi2);
6867 }
59fd17e3 6868 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0
RB
6869 set_vinfo_for_stmt (new_stmt,
6870 new_stmt_vec_info (new_stmt, loop_vinfo,
6871 bb_vinfo));
59fd17e3
RB
6872 }
6873
272c6793
RS
6874 if (negative)
6875 {
aec7ae7d
JJ
6876 tree perm_mask = perm_mask_for_reverse (vectype);
6877 new_temp = permute_vec_elements (new_temp, new_temp,
6878 perm_mask, stmt, gsi);
ebfd146a
IR
6879 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6880 }
267d3070 6881
272c6793 6882 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6883 vect_transform_grouped_load (). */
6884 if (grouped_load || slp_perm)
9771b263 6885 dr_chain.quick_push (new_temp);
267d3070 6886
272c6793
RS
6887 /* Store vector loads in the corresponding SLP_NODE. */
6888 if (slp && !slp_perm)
9771b263 6889 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6890 }
9b999e8c
RB
6891 /* Bump the vector pointer to account for a gap or for excess
6892 elements loaded for a permuted SLP load. */
6893 if (group_gap_adj != 0)
a64b9c26 6894 {
9b999e8c
RB
6895 bool ovf;
6896 tree bump
6897 = wide_int_to_tree (sizetype,
6898 wi::smul (TYPE_SIZE_UNIT (elem_type),
6899 group_gap_adj, &ovf));
a64b9c26
RB
6900 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6901 stmt, bump);
6902 }
ebfd146a
IR
6903 }
6904
6905 if (slp && !slp_perm)
6906 continue;
6907
6908 if (slp_perm)
6909 {
01d8bf07 6910 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6911 slp_node_instance, false))
6912 {
9771b263 6913 dr_chain.release ();
ebfd146a
IR
6914 return false;
6915 }
6916 }
6917 else
6918 {
0d0293ac 6919 if (grouped_load)
ebfd146a 6920 {
272c6793 6921 if (!load_lanes_p)
0d0293ac 6922 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6923 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6924 }
6925 else
6926 {
6927 if (j == 0)
6928 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6929 else
6930 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6931 prev_stmt_info = vinfo_for_stmt (new_stmt);
6932 }
6933 }
9771b263 6934 dr_chain.release ();
ebfd146a
IR
6935 }
6936
ebfd146a
IR
6937 return true;
6938}
6939
6940/* Function vect_is_simple_cond.
b8698a0f 6941
ebfd146a
IR
6942 Input:
6943 LOOP - the loop that is being vectorized.
6944 COND - Condition that is checked for simple use.
6945
e9e1d143
RG
6946 Output:
6947 *COMP_VECTYPE - the vector type for the comparison.
6948
ebfd146a
IR
6949 Returns whether a COND can be vectorized. Checks whether
6950 condition operands are supportable using vec_is_simple_use. */
6951
87aab9b2 6952static bool
24ee1384
IR
6953vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6954 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6955{
6956 tree lhs, rhs;
6957 tree def;
6958 enum vect_def_type dt;
e9e1d143 6959 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6960
6961 if (!COMPARISON_CLASS_P (cond))
6962 return false;
6963
6964 lhs = TREE_OPERAND (cond, 0);
6965 rhs = TREE_OPERAND (cond, 1);
6966
6967 if (TREE_CODE (lhs) == SSA_NAME)
6968 {
6969 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6970 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6971 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6972 return false;
6973 }
6974 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6975 && TREE_CODE (lhs) != FIXED_CST)
6976 return false;
6977
6978 if (TREE_CODE (rhs) == SSA_NAME)
6979 {
6980 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6981 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6982 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6983 return false;
6984 }
f7e531cf 6985 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6986 && TREE_CODE (rhs) != FIXED_CST)
6987 return false;
6988
e9e1d143 6989 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6990 return true;
6991}
6992
6993/* vectorizable_condition.
6994
b8698a0f
L
6995 Check if STMT is conditional modify expression that can be vectorized.
6996 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6997 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6998 at GSI.
6999
7000 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7001 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7002 else caluse if it is 2).
ebfd146a
IR
7003
7004 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7005
4bbe8262 7006bool
ebfd146a 7007vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
7008 gimple *vec_stmt, tree reduc_def, int reduc_index,
7009 slp_tree slp_node)
ebfd146a
IR
7010{
7011 tree scalar_dest = NULL_TREE;
7012 tree vec_dest = NULL_TREE;
ebfd146a
IR
7013 tree cond_expr, then_clause, else_clause;
7014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 7016 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7017 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7018 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
7019 tree vec_compare, vec_cond_expr;
7020 tree new_temp;
7021 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 7022 tree def;
a855b1b1 7023 enum vect_def_type dt, dts[4];
ebfd146a 7024 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 7025 int ncopies;
ebfd146a 7026 enum tree_code code;
a855b1b1 7027 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7028 int i, j;
7029 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7030 vec<tree> vec_oprnds0 = vNULL;
7031 vec<tree> vec_oprnds1 = vNULL;
7032 vec<tree> vec_oprnds2 = vNULL;
7033 vec<tree> vec_oprnds3 = vNULL;
74946978 7034 tree vec_cmp_type;
b8698a0f 7035
f7e531cf
IR
7036 if (slp_node || PURE_SLP_STMT (stmt_info))
7037 ncopies = 1;
7038 else
7039 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 7040
ebfd146a 7041 gcc_assert (ncopies >= 1);
a855b1b1 7042 if (reduc_index && ncopies > 1)
ebfd146a
IR
7043 return false; /* FORNOW */
7044
f7e531cf
IR
7045 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7046 return false;
7047
7048 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
7049 return false;
7050
4bbe8262
IR
7051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7052 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7053 && reduc_def))
ebfd146a
IR
7054 return false;
7055
ebfd146a 7056 /* FORNOW: not yet supported. */
b8698a0f 7057 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7058 {
73fbfcad 7059 if (dump_enabled_p ())
78c60e3d 7060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7061 "value used after loop.\n");
ebfd146a
IR
7062 return false;
7063 }
7064
7065 /* Is vectorizable conditional operation? */
7066 if (!is_gimple_assign (stmt))
7067 return false;
7068
7069 code = gimple_assign_rhs_code (stmt);
7070
7071 if (code != COND_EXPR)
7072 return false;
7073
4e71066d
RG
7074 cond_expr = gimple_assign_rhs1 (stmt);
7075 then_clause = gimple_assign_rhs2 (stmt);
7076 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7077
24ee1384
IR
7078 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7079 &comp_vectype)
e9e1d143 7080 || !comp_vectype)
ebfd146a
IR
7081 return false;
7082
7083 if (TREE_CODE (then_clause) == SSA_NAME)
7084 {
7085 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 7086 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
7087 &then_def_stmt, &def, &dt))
7088 return false;
7089 }
b8698a0f 7090 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
7091 && TREE_CODE (then_clause) != REAL_CST
7092 && TREE_CODE (then_clause) != FIXED_CST)
7093 return false;
7094
7095 if (TREE_CODE (else_clause) == SSA_NAME)
7096 {
7097 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 7098 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
7099 &else_def_stmt, &def, &dt))
7100 return false;
7101 }
b8698a0f 7102 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
7103 && TREE_CODE (else_clause) != REAL_CST
7104 && TREE_CODE (else_clause) != FIXED_CST)
7105 return false;
7106
74946978
MP
7107 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7108 /* The result of a vector comparison should be signed type. */
7109 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7110 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7111 if (vec_cmp_type == NULL_TREE)
7112 return false;
784fb9b3 7113
b8698a0f 7114 if (!vec_stmt)
ebfd146a
IR
7115 {
7116 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 7117 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
7118 }
7119
f7e531cf
IR
7120 /* Transform. */
7121
7122 if (!slp_node)
7123 {
9771b263
DN
7124 vec_oprnds0.create (1);
7125 vec_oprnds1.create (1);
7126 vec_oprnds2.create (1);
7127 vec_oprnds3.create (1);
f7e531cf 7128 }
ebfd146a
IR
7129
7130 /* Handle def. */
7131 scalar_dest = gimple_assign_lhs (stmt);
7132 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7133
7134 /* Handle cond expr. */
a855b1b1
MM
7135 for (j = 0; j < ncopies; j++)
7136 {
538dd0b7 7137 gassign *new_stmt = NULL;
a855b1b1
MM
7138 if (j == 0)
7139 {
f7e531cf
IR
7140 if (slp_node)
7141 {
00f96dc9
TS
7142 auto_vec<tree, 4> ops;
7143 auto_vec<vec<tree>, 4> vec_defs;
9771b263 7144
9771b263
DN
7145 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7146 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7147 ops.safe_push (then_clause);
7148 ops.safe_push (else_clause);
f7e531cf 7149 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
7150 vec_oprnds3 = vec_defs.pop ();
7151 vec_oprnds2 = vec_defs.pop ();
7152 vec_oprnds1 = vec_defs.pop ();
7153 vec_oprnds0 = vec_defs.pop ();
f7e531cf 7154
9771b263
DN
7155 ops.release ();
7156 vec_defs.release ();
f7e531cf
IR
7157 }
7158 else
7159 {
7160 gimple gtemp;
7161 vec_cond_lhs =
a855b1b1
MM
7162 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7163 stmt, NULL);
24ee1384
IR
7164 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7165 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
7166
7167 vec_cond_rhs =
7168 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7169 stmt, NULL);
24ee1384
IR
7170 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7171 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
7172 if (reduc_index == 1)
7173 vec_then_clause = reduc_def;
7174 else
7175 {
7176 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7177 stmt, NULL);
24ee1384 7178 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
7179 NULL, &gtemp, &def, &dts[2]);
7180 }
7181 if (reduc_index == 2)
7182 vec_else_clause = reduc_def;
7183 else
7184 {
7185 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 7186 stmt, NULL);
24ee1384 7187 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 7188 NULL, &gtemp, &def, &dts[3]);
f7e531cf 7189 }
a855b1b1
MM
7190 }
7191 }
7192 else
7193 {
f7e531cf 7194 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 7195 vec_oprnds0.pop ());
f7e531cf 7196 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 7197 vec_oprnds1.pop ());
a855b1b1 7198 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 7199 vec_oprnds2.pop ());
a855b1b1 7200 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 7201 vec_oprnds3.pop ());
f7e531cf
IR
7202 }
7203
7204 if (!slp_node)
7205 {
9771b263
DN
7206 vec_oprnds0.quick_push (vec_cond_lhs);
7207 vec_oprnds1.quick_push (vec_cond_rhs);
7208 vec_oprnds2.quick_push (vec_then_clause);
7209 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
7210 }
7211
9dc3f7de 7212 /* Arguments are ready. Create the new vector stmt. */
9771b263 7213 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 7214 {
9771b263
DN
7215 vec_cond_rhs = vec_oprnds1[i];
7216 vec_then_clause = vec_oprnds2[i];
7217 vec_else_clause = vec_oprnds3[i];
a855b1b1 7218
784fb9b3
JJ
7219 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7220 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
7221 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7222 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 7223
f7e531cf
IR
7224 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7225 new_temp = make_ssa_name (vec_dest, new_stmt);
7226 gimple_assign_set_lhs (new_stmt, new_temp);
7227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7228 if (slp_node)
9771b263 7229 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
7230 }
7231
7232 if (slp_node)
7233 continue;
7234
7235 if (j == 0)
7236 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7237 else
7238 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7239
7240 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 7241 }
b8698a0f 7242
9771b263
DN
7243 vec_oprnds0.release ();
7244 vec_oprnds1.release ();
7245 vec_oprnds2.release ();
7246 vec_oprnds3.release ();
f7e531cf 7247
ebfd146a
IR
7248 return true;
7249}
7250
7251
8644a673 7252/* Make sure the statement is vectorizable. */
ebfd146a
IR
7253
7254bool
a70d6342 7255vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7256{
8644a673 7257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7258 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7259 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7260 bool ok;
a70d6342 7261 tree scalar_type, vectype;
363477c0
JJ
7262 gimple pattern_stmt;
7263 gimple_seq pattern_def_seq;
ebfd146a 7264
73fbfcad 7265 if (dump_enabled_p ())
ebfd146a 7266 {
78c60e3d
SS
7267 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7269 }
ebfd146a 7270
1825a1f3 7271 if (gimple_has_volatile_ops (stmt))
b8698a0f 7272 {
73fbfcad 7273 if (dump_enabled_p ())
78c60e3d 7274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7275 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7276
7277 return false;
7278 }
b8698a0f
L
7279
7280 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7281 to include:
7282 - the COND_EXPR which is the loop exit condition
7283 - any LABEL_EXPRs in the loop
b8698a0f 7284 - computations that are used only for array indexing or loop control.
8644a673 7285 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7286 instance, therefore, all the statements are relevant.
ebfd146a 7287
d092494c 7288 Pattern statement needs to be analyzed instead of the original statement
83197f37 7289 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7290 statements. In basic blocks we are called from some SLP instance
7291 traversal, don't analyze pattern stmts instead, the pattern stmts
7292 already will be part of SLP instance. */
83197f37
IR
7293
7294 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7295 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7296 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7297 {
9d5e7640 7298 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7299 && pattern_stmt
9d5e7640
IR
7300 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7301 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7302 {
83197f37 7303 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7304 stmt = pattern_stmt;
7305 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7306 if (dump_enabled_p ())
9d5e7640 7307 {
78c60e3d
SS
7308 dump_printf_loc (MSG_NOTE, vect_location,
7309 "==> examining pattern statement: ");
7310 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7311 }
7312 }
7313 else
7314 {
73fbfcad 7315 if (dump_enabled_p ())
e645e942 7316 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7317
9d5e7640
IR
7318 return true;
7319 }
8644a673 7320 }
83197f37 7321 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7322 && node == NULL
83197f37
IR
7323 && pattern_stmt
7324 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7325 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7326 {
7327 /* Analyze PATTERN_STMT too. */
73fbfcad 7328 if (dump_enabled_p ())
83197f37 7329 {
78c60e3d
SS
7330 dump_printf_loc (MSG_NOTE, vect_location,
7331 "==> examining pattern statement: ");
7332 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7333 }
7334
7335 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7336 return false;
7337 }
ebfd146a 7338
1107f3ae 7339 if (is_pattern_stmt_p (stmt_info)
079c527f 7340 && node == NULL
363477c0 7341 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 7342 {
363477c0 7343 gimple_stmt_iterator si;
1107f3ae 7344
363477c0
JJ
7345 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7346 {
7347 gimple pattern_def_stmt = gsi_stmt (si);
7348 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7349 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7350 {
7351 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 7352 if (dump_enabled_p ())
363477c0 7353 {
78c60e3d
SS
7354 dump_printf_loc (MSG_NOTE, vect_location,
7355 "==> examining pattern def statement: ");
7356 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 7357 }
1107f3ae 7358
363477c0
JJ
7359 if (!vect_analyze_stmt (pattern_def_stmt,
7360 need_to_vectorize, node))
7361 return false;
7362 }
7363 }
7364 }
1107f3ae 7365
8644a673
IR
7366 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7367 {
7368 case vect_internal_def:
7369 break;
ebfd146a 7370
8644a673 7371 case vect_reduction_def:
7c5222ff 7372 case vect_nested_cycle:
14a61437
RB
7373 gcc_assert (!bb_vinfo
7374 && (relevance == vect_used_in_outer
7375 || relevance == vect_used_in_outer_by_reduction
7376 || relevance == vect_used_by_reduction
7377 || relevance == vect_unused_in_scope));
8644a673
IR
7378 break;
7379
7380 case vect_induction_def:
7381 case vect_constant_def:
7382 case vect_external_def:
7383 case vect_unknown_def_type:
7384 default:
7385 gcc_unreachable ();
7386 }
ebfd146a 7387
a70d6342
IR
7388 if (bb_vinfo)
7389 {
7390 gcc_assert (PURE_SLP_STMT (stmt_info));
7391
b690cc0f 7392 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 7393 if (dump_enabled_p ())
a70d6342 7394 {
78c60e3d
SS
7395 dump_printf_loc (MSG_NOTE, vect_location,
7396 "get vectype for scalar type: ");
7397 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 7398 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7399 }
7400
7401 vectype = get_vectype_for_scalar_type (scalar_type);
7402 if (!vectype)
7403 {
73fbfcad 7404 if (dump_enabled_p ())
a70d6342 7405 {
78c60e3d
SS
7406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7407 "not SLPed: unsupported data-type ");
7408 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7409 scalar_type);
e645e942 7410 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
7411 }
7412 return false;
7413 }
7414
73fbfcad 7415 if (dump_enabled_p ())
a70d6342 7416 {
78c60e3d
SS
7417 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7418 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 7419 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7420 }
7421
7422 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7423 }
7424
8644a673 7425 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 7426 {
8644a673 7427 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
7428 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7429 || (is_gimple_call (stmt)
7430 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 7431 *need_to_vectorize = true;
ebfd146a
IR
7432 }
7433
b1af7da6
RB
7434 if (PURE_SLP_STMT (stmt_info) && !node)
7435 {
7436 dump_printf_loc (MSG_NOTE, vect_location,
7437 "handled only by SLP analysis\n");
7438 return true;
7439 }
7440
7441 ok = true;
7442 if (!bb_vinfo
7443 && (STMT_VINFO_RELEVANT_P (stmt_info)
7444 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7445 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7446 || vectorizable_conversion (stmt, NULL, NULL, node)
7447 || vectorizable_shift (stmt, NULL, NULL, node)
7448 || vectorizable_operation (stmt, NULL, NULL, node)
7449 || vectorizable_assignment (stmt, NULL, NULL, node)
7450 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7451 || vectorizable_call (stmt, NULL, NULL, node)
7452 || vectorizable_store (stmt, NULL, NULL, node)
7453 || vectorizable_reduction (stmt, NULL, NULL, node)
7454 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7455 else
7456 {
7457 if (bb_vinfo)
7458 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7459 || vectorizable_conversion (stmt, NULL, NULL, node)
7460 || vectorizable_shift (stmt, NULL, NULL, node)
7461 || vectorizable_operation (stmt, NULL, NULL, node)
7462 || vectorizable_assignment (stmt, NULL, NULL, node)
7463 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7464 || vectorizable_call (stmt, NULL, NULL, node)
7465 || vectorizable_store (stmt, NULL, NULL, node)
7466 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7467 }
8644a673
IR
7468
7469 if (!ok)
ebfd146a 7470 {
73fbfcad 7471 if (dump_enabled_p ())
8644a673 7472 {
78c60e3d
SS
7473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7474 "not vectorized: relevant stmt not ");
7475 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7476 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7477 }
b8698a0f 7478
ebfd146a
IR
7479 return false;
7480 }
7481
a70d6342
IR
7482 if (bb_vinfo)
7483 return true;
7484
8644a673
IR
7485 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7486 need extra handling, except for vectorizable reductions. */
7487 if (STMT_VINFO_LIVE_P (stmt_info)
7488 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7489 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 7490
8644a673 7491 if (!ok)
ebfd146a 7492 {
73fbfcad 7493 if (dump_enabled_p ())
8644a673 7494 {
78c60e3d
SS
7495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7496 "not vectorized: live stmt not ");
7497 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7498 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7499 }
b8698a0f 7500
8644a673 7501 return false;
ebfd146a
IR
7502 }
7503
ebfd146a
IR
7504 return true;
7505}
7506
7507
7508/* Function vect_transform_stmt.
7509
7510 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7511
7512bool
7513vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 7514 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
7515 slp_instance slp_node_instance)
7516{
7517 bool is_store = false;
7518 gimple vec_stmt = NULL;
7519 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 7520 bool done;
ebfd146a
IR
7521
7522 switch (STMT_VINFO_TYPE (stmt_info))
7523 {
7524 case type_demotion_vec_info_type:
ebfd146a 7525 case type_promotion_vec_info_type:
ebfd146a
IR
7526 case type_conversion_vec_info_type:
7527 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7528 gcc_assert (done);
7529 break;
7530
7531 case induc_vec_info_type:
7532 gcc_assert (!slp_node);
7533 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7534 gcc_assert (done);
7535 break;
7536
9dc3f7de
IR
7537 case shift_vec_info_type:
7538 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7539 gcc_assert (done);
7540 break;
7541
ebfd146a
IR
7542 case op_vec_info_type:
7543 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7544 gcc_assert (done);
7545 break;
7546
7547 case assignment_vec_info_type:
7548 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7549 gcc_assert (done);
7550 break;
7551
7552 case load_vec_info_type:
b8698a0f 7553 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7554 slp_node_instance);
7555 gcc_assert (done);
7556 break;
7557
7558 case store_vec_info_type:
7559 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7560 gcc_assert (done);
0d0293ac 7561 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7562 {
7563 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7564 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7565 one are skipped, and there vec_stmt_info shouldn't be freed
7566 meanwhile. */
0d0293ac 7567 *grouped_store = true;
ebfd146a
IR
7568 if (STMT_VINFO_VEC_STMT (stmt_info))
7569 is_store = true;
7570 }
7571 else
7572 is_store = true;
7573 break;
7574
7575 case condition_vec_info_type:
f7e531cf 7576 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7577 gcc_assert (done);
7578 break;
7579
7580 case call_vec_info_type:
190c2236 7581 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7582 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7583 if (is_gimple_call (stmt)
7584 && gimple_call_internal_p (stmt)
7585 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7586 is_store = true;
ebfd146a
IR
7587 break;
7588
0136f8f0
AH
7589 case call_simd_clone_vec_info_type:
7590 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7591 stmt = gsi_stmt (*gsi);
7592 break;
7593
ebfd146a 7594 case reduc_vec_info_type:
b5aeb3bb 7595 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7596 gcc_assert (done);
7597 break;
7598
7599 default:
7600 if (!STMT_VINFO_LIVE_P (stmt_info))
7601 {
73fbfcad 7602 if (dump_enabled_p ())
78c60e3d 7603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7604 "stmt not supported.\n");
ebfd146a
IR
7605 gcc_unreachable ();
7606 }
7607 }
7608
7609 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7610 is being vectorized, but outside the immediately enclosing loop. */
7611 if (vec_stmt
a70d6342
IR
7612 && STMT_VINFO_LOOP_VINFO (stmt_info)
7613 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7614 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7615 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7616 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7617 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7618 vect_used_in_outer_by_reduction))
ebfd146a 7619 {
a70d6342
IR
7620 struct loop *innerloop = LOOP_VINFO_LOOP (
7621 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7622 imm_use_iterator imm_iter;
7623 use_operand_p use_p;
7624 tree scalar_dest;
7625 gimple exit_phi;
7626
73fbfcad 7627 if (dump_enabled_p ())
78c60e3d 7628 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7629 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7630
7631 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7632 (to be used when vectorizing outer-loop stmts that use the DEF of
7633 STMT). */
7634 if (gimple_code (stmt) == GIMPLE_PHI)
7635 scalar_dest = PHI_RESULT (stmt);
7636 else
7637 scalar_dest = gimple_assign_lhs (stmt);
7638
7639 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7640 {
7641 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7642 {
7643 exit_phi = USE_STMT (use_p);
7644 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7645 }
7646 }
7647 }
7648
7649 /* Handle stmts whose DEF is used outside the loop-nest that is
7650 being vectorized. */
7651 if (STMT_VINFO_LIVE_P (stmt_info)
7652 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7653 {
7654 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7655 gcc_assert (done);
7656 }
7657
7658 if (vec_stmt)
83197f37 7659 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7660
b8698a0f 7661 return is_store;
ebfd146a
IR
7662}
7663
7664
b8698a0f 7665/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7666 stmt_vec_info. */
7667
7668void
7669vect_remove_stores (gimple first_stmt)
7670{
7671 gimple next = first_stmt;
7672 gimple tmp;
7673 gimple_stmt_iterator next_si;
7674
7675 while (next)
7676 {
78048b1c
JJ
7677 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7678
7679 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7680 if (is_pattern_stmt_p (stmt_info))
7681 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7682 /* Free the attached stmt_vec_info and remove the stmt. */
7683 next_si = gsi_for_stmt (next);
3d3f2249 7684 unlink_stmt_vdef (next);
ebfd146a 7685 gsi_remove (&next_si, true);
3d3f2249 7686 release_defs (next);
ebfd146a
IR
7687 free_stmt_vec_info (next);
7688 next = tmp;
7689 }
7690}
7691
7692
7693/* Function new_stmt_vec_info.
7694
7695 Create and initialize a new stmt_vec_info struct for STMT. */
7696
7697stmt_vec_info
b8698a0f 7698new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7699 bb_vec_info bb_vinfo)
ebfd146a
IR
7700{
7701 stmt_vec_info res;
7702 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7703
7704 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7705 STMT_VINFO_STMT (res) = stmt;
7706 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7707 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7708 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7709 STMT_VINFO_LIVE_P (res) = false;
7710 STMT_VINFO_VECTYPE (res) = NULL;
7711 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7712 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7713 STMT_VINFO_IN_PATTERN_P (res) = false;
7714 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7715 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7716 STMT_VINFO_DATA_REF (res) = NULL;
7717
7718 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7719 STMT_VINFO_DR_OFFSET (res) = NULL;
7720 STMT_VINFO_DR_INIT (res) = NULL;
7721 STMT_VINFO_DR_STEP (res) = NULL;
7722 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7723
7724 if (gimple_code (stmt) == GIMPLE_PHI
7725 && is_loop_header_bb_p (gimple_bb (stmt)))
7726 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7727 else
8644a673
IR
7728 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7729
9771b263 7730 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7731 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7732 GROUP_FIRST_ELEMENT (res) = NULL;
7733 GROUP_NEXT_ELEMENT (res) = NULL;
7734 GROUP_SIZE (res) = 0;
7735 GROUP_STORE_COUNT (res) = 0;
7736 GROUP_GAP (res) = 0;
7737 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7738
7739 return res;
7740}
7741
7742
7743/* Create a hash table for stmt_vec_info. */
7744
7745void
7746init_stmt_vec_info_vec (void)
7747{
9771b263
DN
7748 gcc_assert (!stmt_vec_info_vec.exists ());
7749 stmt_vec_info_vec.create (50);
ebfd146a
IR
7750}
7751
7752
7753/* Free hash table for stmt_vec_info. */
7754
7755void
7756free_stmt_vec_info_vec (void)
7757{
93675444
JJ
7758 unsigned int i;
7759 vec_void_p info;
7760 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7761 if (info != NULL)
7762 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7763 gcc_assert (stmt_vec_info_vec.exists ());
7764 stmt_vec_info_vec.release ();
ebfd146a
IR
7765}
7766
7767
7768/* Free stmt vectorization related info. */
7769
7770void
7771free_stmt_vec_info (gimple stmt)
7772{
7773 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7774
7775 if (!stmt_info)
7776 return;
7777
78048b1c
JJ
7778 /* Check if this statement has a related "pattern stmt"
7779 (introduced by the vectorizer during the pattern recognition
7780 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7781 too. */
7782 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7783 {
7784 stmt_vec_info patt_info
7785 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7786 if (patt_info)
7787 {
363477c0 7788 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
f0281fde
RB
7789 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7790 gimple_set_bb (patt_stmt, NULL);
7791 tree lhs = gimple_get_lhs (patt_stmt);
7792 if (TREE_CODE (lhs) == SSA_NAME)
7793 release_ssa_name (lhs);
363477c0
JJ
7794 if (seq)
7795 {
7796 gimple_stmt_iterator si;
7797 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde
RB
7798 {
7799 gimple seq_stmt = gsi_stmt (si);
7800 gimple_set_bb (seq_stmt, NULL);
7801 lhs = gimple_get_lhs (patt_stmt);
7802 if (TREE_CODE (lhs) == SSA_NAME)
7803 release_ssa_name (lhs);
7804 free_stmt_vec_info (seq_stmt);
7805 }
363477c0 7806 }
f0281fde 7807 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
7808 }
7809 }
7810
9771b263 7811 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 7812 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
7813 set_vinfo_for_stmt (stmt, NULL);
7814 free (stmt_info);
7815}
7816
7817
bb67d9c7 7818/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7819
bb67d9c7 7820 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7821 by the target. */
7822
bb67d9c7
RG
7823static tree
7824get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 7825{
ef4bddc2
RS
7826 machine_mode inner_mode = TYPE_MODE (scalar_type);
7827 machine_mode simd_mode;
2f816591 7828 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7829 int nunits;
7830 tree vectype;
7831
cc4b5170 7832 if (nbytes == 0)
ebfd146a
IR
7833 return NULL_TREE;
7834
48f2e373
RB
7835 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7836 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7837 return NULL_TREE;
7838
7b7b1813
RG
7839 /* For vector types of elements whose mode precision doesn't
7840 match their types precision we use a element type of mode
7841 precision. The vectorization routines will have to make sure
48f2e373
RB
7842 they support the proper result truncation/extension.
7843 We also make sure to build vector types with INTEGER_TYPE
7844 component type only. */
6d7971b8 7845 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7846 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7847 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7848 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7849 TYPE_UNSIGNED (scalar_type));
6d7971b8 7850
ccbf5bb4
RG
7851 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7852 When the component mode passes the above test simply use a type
7853 corresponding to that mode. The theory is that any use that
7854 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7855 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7856 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7857 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7858
7859 /* We can't build a vector type of elements with alignment bigger than
7860 their size. */
dfc2e2ac 7861 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7862 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7863 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7864
dfc2e2ac
RB
7865 /* If we felt back to using the mode fail if there was
7866 no scalar type for it. */
7867 if (scalar_type == NULL_TREE)
7868 return NULL_TREE;
7869
bb67d9c7
RG
7870 /* If no size was supplied use the mode the target prefers. Otherwise
7871 lookup a vector mode of the specified size. */
7872 if (size == 0)
7873 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7874 else
7875 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7876 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7877 if (nunits <= 1)
7878 return NULL_TREE;
ebfd146a
IR
7879
7880 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7881
7882 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7883 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7884 return NULL_TREE;
ebfd146a
IR
7885
7886 return vectype;
7887}
7888
bb67d9c7
RG
7889unsigned int current_vector_size;
7890
7891/* Function get_vectype_for_scalar_type.
7892
7893 Returns the vector type corresponding to SCALAR_TYPE as supported
7894 by the target. */
7895
7896tree
7897get_vectype_for_scalar_type (tree scalar_type)
7898{
7899 tree vectype;
7900 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7901 current_vector_size);
7902 if (vectype
7903 && current_vector_size == 0)
7904 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7905 return vectype;
7906}
7907
b690cc0f
RG
7908/* Function get_same_sized_vectype
7909
7910 Returns a vector type corresponding to SCALAR_TYPE of size
7911 VECTOR_TYPE if supported by the target. */
7912
7913tree
bb67d9c7 7914get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7915{
bb67d9c7
RG
7916 return get_vectype_for_scalar_type_and_size
7917 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7918}
7919
ebfd146a
IR
7920/* Function vect_is_simple_use.
7921
7922 Input:
a70d6342
IR
7923 LOOP_VINFO - the vect info of the loop that is being vectorized.
7924 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7925 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7926 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7927
7928 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7929 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7930 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7931 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7932 is the case in reduction/induction computations).
7933 For basic blocks, supportable operands are constants and bb invariants.
7934 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7935
7936bool
24ee1384 7937vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7938 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7939 tree *def, enum vect_def_type *dt)
b8698a0f 7940{
ebfd146a
IR
7941 *def_stmt = NULL;
7942 *def = NULL_TREE;
3fc356dc 7943 *dt = vect_unknown_def_type;
b8698a0f 7944
73fbfcad 7945 if (dump_enabled_p ())
ebfd146a 7946 {
78c60e3d
SS
7947 dump_printf_loc (MSG_NOTE, vect_location,
7948 "vect_is_simple_use: operand ");
7949 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7950 dump_printf (MSG_NOTE, "\n");
ebfd146a 7951 }
b8698a0f 7952
b758f602 7953 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7954 {
7955 *dt = vect_constant_def;
7956 return true;
7957 }
b8698a0f 7958
ebfd146a
IR
7959 if (is_gimple_min_invariant (operand))
7960 {
7961 *def = operand;
8644a673 7962 *dt = vect_external_def;
ebfd146a
IR
7963 return true;
7964 }
7965
ebfd146a
IR
7966 if (TREE_CODE (operand) != SSA_NAME)
7967 {
73fbfcad 7968 if (dump_enabled_p ())
78c60e3d 7969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7970 "not ssa-name.\n");
ebfd146a
IR
7971 return false;
7972 }
b8698a0f 7973
3fc356dc 7974 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 7975 {
3fc356dc
RB
7976 *def = operand;
7977 *dt = vect_external_def;
7978 return true;
ebfd146a
IR
7979 }
7980
3fc356dc 7981 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 7982 if (dump_enabled_p ())
ebfd146a 7983 {
78c60e3d
SS
7984 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7985 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
7986 }
7987
3fc356dc
RB
7988 basic_block bb = gimple_bb (*def_stmt);
7989 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
7990 || (bb_vinfo
7991 && (bb != BB_VINFO_BB (bb_vinfo)
7992 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8644a673 7993 *dt = vect_external_def;
ebfd146a
IR
7994 else
7995 {
3fc356dc
RB
7996 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
7997 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
90dd6e3d
RB
7998 *dt = vect_external_def;
7999 else
8000 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
8001 }
8002
2e8ab70c
RB
8003 if (dump_enabled_p ())
8004 {
8005 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8006 switch (*dt)
8007 {
8008 case vect_uninitialized_def:
8009 dump_printf (MSG_NOTE, "uninitialized\n");
8010 break;
8011 case vect_constant_def:
8012 dump_printf (MSG_NOTE, "constant\n");
8013 break;
8014 case vect_external_def:
8015 dump_printf (MSG_NOTE, "external\n");
8016 break;
8017 case vect_internal_def:
8018 dump_printf (MSG_NOTE, "internal\n");
8019 break;
8020 case vect_induction_def:
8021 dump_printf (MSG_NOTE, "induction\n");
8022 break;
8023 case vect_reduction_def:
8024 dump_printf (MSG_NOTE, "reduction\n");
8025 break;
8026 case vect_double_reduction_def:
8027 dump_printf (MSG_NOTE, "double reduction\n");
8028 break;
8029 case vect_nested_cycle:
8030 dump_printf (MSG_NOTE, "nested cycle\n");
8031 break;
8032 case vect_unknown_def_type:
8033 dump_printf (MSG_NOTE, "unknown\n");
8034 break;
8035 }
8036 }
8037
24ee1384
IR
8038 if (*dt == vect_unknown_def_type
8039 || (stmt
8040 && *dt == vect_double_reduction_def
8041 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 8042 {
73fbfcad 8043 if (dump_enabled_p ())
78c60e3d 8044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8045 "Unsupported pattern.\n");
ebfd146a
IR
8046 return false;
8047 }
8048
ebfd146a
IR
8049 switch (gimple_code (*def_stmt))
8050 {
8051 case GIMPLE_PHI:
8052 *def = gimple_phi_result (*def_stmt);
8053 break;
8054
8055 case GIMPLE_ASSIGN:
8056 *def = gimple_assign_lhs (*def_stmt);
8057 break;
8058
8059 case GIMPLE_CALL:
8060 *def = gimple_call_lhs (*def_stmt);
8061 if (*def != NULL)
8062 break;
8063 /* FALLTHRU */
8064 default:
73fbfcad 8065 if (dump_enabled_p ())
78c60e3d 8066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8067 "unsupported defining stmt:\n");
ebfd146a
IR
8068 return false;
8069 }
8070
8071 return true;
8072}
8073
b690cc0f
RG
8074/* Function vect_is_simple_use_1.
8075
8076 Same as vect_is_simple_use_1 but also determines the vector operand
8077 type of OPERAND and stores it to *VECTYPE. If the definition of
8078 OPERAND is vect_uninitialized_def, vect_constant_def or
8079 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8080 is responsible to compute the best suited vector type for the
8081 scalar operand. */
8082
8083bool
24ee1384 8084vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
8085 bb_vec_info bb_vinfo, gimple *def_stmt,
8086 tree *def, enum vect_def_type *dt, tree *vectype)
8087{
24ee1384
IR
8088 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8089 def, dt))
b690cc0f
RG
8090 return false;
8091
8092 /* Now get a vector type if the def is internal, otherwise supply
8093 NULL_TREE and leave it up to the caller to figure out a proper
8094 type for the use stmt. */
8095 if (*dt == vect_internal_def
8096 || *dt == vect_induction_def
8097 || *dt == vect_reduction_def
8098 || *dt == vect_double_reduction_def
8099 || *dt == vect_nested_cycle)
8100 {
8101 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
8102
8103 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8104 && !STMT_VINFO_RELEVANT (stmt_info)
8105 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 8106 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 8107
b690cc0f
RG
8108 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8109 gcc_assert (*vectype != NULL_TREE);
8110 }
8111 else if (*dt == vect_uninitialized_def
8112 || *dt == vect_constant_def
8113 || *dt == vect_external_def)
8114 *vectype = NULL_TREE;
8115 else
8116 gcc_unreachable ();
8117
8118 return true;
8119}
8120
ebfd146a
IR
8121
8122/* Function supportable_widening_operation
8123
b8698a0f
L
8124 Check whether an operation represented by the code CODE is a
8125 widening operation that is supported by the target platform in
b690cc0f
RG
8126 vector form (i.e., when operating on arguments of type VECTYPE_IN
8127 producing a result of type VECTYPE_OUT).
b8698a0f 8128
ebfd146a
IR
8129 Widening operations we currently support are NOP (CONVERT), FLOAT
8130 and WIDEN_MULT. This function checks if these operations are supported
8131 by the target platform either directly (via vector tree-codes), or via
8132 target builtins.
8133
8134 Output:
b8698a0f
L
8135 - CODE1 and CODE2 are codes of vector operations to be used when
8136 vectorizing the operation, if available.
ebfd146a
IR
8137 - MULTI_STEP_CVT determines the number of required intermediate steps in
8138 case of multi-step conversion (like char->short->int - in that case
8139 MULTI_STEP_CVT will be 1).
b8698a0f
L
8140 - INTERM_TYPES contains the intermediate type required to perform the
8141 widening operation (short in the above example). */
ebfd146a
IR
8142
8143bool
b690cc0f
RG
8144supportable_widening_operation (enum tree_code code, gimple stmt,
8145 tree vectype_out, tree vectype_in,
ebfd146a
IR
8146 enum tree_code *code1, enum tree_code *code2,
8147 int *multi_step_cvt,
9771b263 8148 vec<tree> *interm_types)
ebfd146a
IR
8149{
8150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8151 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 8152 struct loop *vect_loop = NULL;
ef4bddc2 8153 machine_mode vec_mode;
81f40b79 8154 enum insn_code icode1, icode2;
ebfd146a 8155 optab optab1, optab2;
b690cc0f
RG
8156 tree vectype = vectype_in;
8157 tree wide_vectype = vectype_out;
ebfd146a 8158 enum tree_code c1, c2;
4a00c761
JJ
8159 int i;
8160 tree prev_type, intermediate_type;
ef4bddc2 8161 machine_mode intermediate_mode, prev_mode;
4a00c761 8162 optab optab3, optab4;
ebfd146a 8163
4a00c761 8164 *multi_step_cvt = 0;
4ef69dfc
IR
8165 if (loop_info)
8166 vect_loop = LOOP_VINFO_LOOP (loop_info);
8167
ebfd146a
IR
8168 switch (code)
8169 {
8170 case WIDEN_MULT_EXPR:
6ae6116f
RH
8171 /* The result of a vectorized widening operation usually requires
8172 two vectors (because the widened results do not fit into one vector).
8173 The generated vector results would normally be expected to be
8174 generated in the same order as in the original scalar computation,
8175 i.e. if 8 results are generated in each vector iteration, they are
8176 to be organized as follows:
8177 vect1: [res1,res2,res3,res4],
8178 vect2: [res5,res6,res7,res8].
8179
8180 However, in the special case that the result of the widening
8181 operation is used in a reduction computation only, the order doesn't
8182 matter (because when vectorizing a reduction we change the order of
8183 the computation). Some targets can take advantage of this and
8184 generate more efficient code. For example, targets like Altivec,
8185 that support widen_mult using a sequence of {mult_even,mult_odd}
8186 generate the following vectors:
8187 vect1: [res1,res3,res5,res7],
8188 vect2: [res2,res4,res6,res8].
8189
8190 When vectorizing outer-loops, we execute the inner-loop sequentially
8191 (each vectorized inner-loop iteration contributes to VF outer-loop
8192 iterations in parallel). We therefore don't allow to change the
8193 order of the computation in the inner-loop during outer-loop
8194 vectorization. */
8195 /* TODO: Another case in which order doesn't *really* matter is when we
8196 widen and then contract again, e.g. (short)((int)x * y >> 8).
8197 Normally, pack_trunc performs an even/odd permute, whereas the
8198 repack from an even/odd expansion would be an interleave, which
8199 would be significantly simpler for e.g. AVX2. */
8200 /* In any case, in order to avoid duplicating the code below, recurse
8201 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8202 are properly set up for the caller. If we fail, we'll continue with
8203 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8204 if (vect_loop
8205 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8206 && !nested_in_vect_loop_p (vect_loop, stmt)
8207 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8208 stmt, vectype_out, vectype_in,
a86ec597
RH
8209 code1, code2, multi_step_cvt,
8210 interm_types))
ebc047a2
CH
8211 {
8212 /* Elements in a vector with vect_used_by_reduction property cannot
8213 be reordered if the use chain with this property does not have the
8214 same operation. One such an example is s += a * b, where elements
8215 in a and b cannot be reordered. Here we check if the vector defined
8216 by STMT is only directly used in the reduction statement. */
8217 tree lhs = gimple_assign_lhs (stmt);
8218 use_operand_p dummy;
8219 gimple use_stmt;
8220 stmt_vec_info use_stmt_info = NULL;
8221 if (single_imm_use (lhs, &dummy, &use_stmt)
8222 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8223 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8224 return true;
8225 }
4a00c761
JJ
8226 c1 = VEC_WIDEN_MULT_LO_EXPR;
8227 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
8228 break;
8229
6ae6116f
RH
8230 case VEC_WIDEN_MULT_EVEN_EXPR:
8231 /* Support the recursion induced just above. */
8232 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8233 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8234 break;
8235
36ba4aae 8236 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
8237 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8238 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
8239 break;
8240
ebfd146a 8241 CASE_CONVERT:
4a00c761
JJ
8242 c1 = VEC_UNPACK_LO_EXPR;
8243 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8244 break;
8245
8246 case FLOAT_EXPR:
4a00c761
JJ
8247 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8248 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8249 break;
8250
8251 case FIX_TRUNC_EXPR:
8252 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8253 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8254 computing the operation. */
8255 return false;
8256
8257 default:
8258 gcc_unreachable ();
8259 }
8260
6ae6116f 8261 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
8262 {
8263 enum tree_code ctmp = c1;
8264 c1 = c2;
8265 c2 = ctmp;
8266 }
8267
ebfd146a
IR
8268 if (code == FIX_TRUNC_EXPR)
8269 {
8270 /* The signedness is determined from output operand. */
b690cc0f
RG
8271 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8272 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8273 }
8274 else
8275 {
8276 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8277 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8278 }
8279
8280 if (!optab1 || !optab2)
8281 return false;
8282
8283 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8284 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8285 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8286 return false;
8287
4a00c761
JJ
8288 *code1 = c1;
8289 *code2 = c2;
8290
8291 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8292 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8293 return true;
8294
b8698a0f 8295 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8296 types. */
ebfd146a 8297
4a00c761
JJ
8298 prev_type = vectype;
8299 prev_mode = vec_mode;
b8698a0f 8300
4a00c761
JJ
8301 if (!CONVERT_EXPR_CODE_P (code))
8302 return false;
b8698a0f 8303
4a00c761
JJ
8304 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8305 intermediate steps in promotion sequence. We try
8306 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8307 not. */
9771b263 8308 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8309 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8310 {
8311 intermediate_mode = insn_data[icode1].operand[0].mode;
8312 intermediate_type
8313 = lang_hooks.types.type_for_mode (intermediate_mode,
8314 TYPE_UNSIGNED (prev_type));
8315 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8316 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8317
8318 if (!optab3 || !optab4
8319 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8320 || insn_data[icode1].operand[0].mode != intermediate_mode
8321 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8322 || insn_data[icode2].operand[0].mode != intermediate_mode
8323 || ((icode1 = optab_handler (optab3, intermediate_mode))
8324 == CODE_FOR_nothing)
8325 || ((icode2 = optab_handler (optab4, intermediate_mode))
8326 == CODE_FOR_nothing))
8327 break;
ebfd146a 8328
9771b263 8329 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8330 (*multi_step_cvt)++;
8331
8332 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8333 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8334 return true;
8335
8336 prev_type = intermediate_type;
8337 prev_mode = intermediate_mode;
ebfd146a
IR
8338 }
8339
9771b263 8340 interm_types->release ();
4a00c761 8341 return false;
ebfd146a
IR
8342}
8343
8344
8345/* Function supportable_narrowing_operation
8346
b8698a0f
L
8347 Check whether an operation represented by the code CODE is a
8348 narrowing operation that is supported by the target platform in
b690cc0f
RG
8349 vector form (i.e., when operating on arguments of type VECTYPE_IN
8350 and producing a result of type VECTYPE_OUT).
b8698a0f 8351
ebfd146a 8352 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 8353 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
8354 the target platform directly via vector tree-codes.
8355
8356 Output:
b8698a0f
L
8357 - CODE1 is the code of a vector operation to be used when
8358 vectorizing the operation, if available.
ebfd146a
IR
8359 - MULTI_STEP_CVT determines the number of required intermediate steps in
8360 case of multi-step conversion (like int->short->char - in that case
8361 MULTI_STEP_CVT will be 1).
8362 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 8363 narrowing operation (short in the above example). */
ebfd146a
IR
8364
8365bool
8366supportable_narrowing_operation (enum tree_code code,
b690cc0f 8367 tree vectype_out, tree vectype_in,
ebfd146a 8368 enum tree_code *code1, int *multi_step_cvt,
9771b263 8369 vec<tree> *interm_types)
ebfd146a 8370{
ef4bddc2 8371 machine_mode vec_mode;
ebfd146a
IR
8372 enum insn_code icode1;
8373 optab optab1, interm_optab;
b690cc0f
RG
8374 tree vectype = vectype_in;
8375 tree narrow_vectype = vectype_out;
ebfd146a 8376 enum tree_code c1;
4a00c761 8377 tree intermediate_type;
ef4bddc2 8378 machine_mode intermediate_mode, prev_mode;
ebfd146a 8379 int i;
4a00c761 8380 bool uns;
ebfd146a 8381
4a00c761 8382 *multi_step_cvt = 0;
ebfd146a
IR
8383 switch (code)
8384 {
8385 CASE_CONVERT:
8386 c1 = VEC_PACK_TRUNC_EXPR;
8387 break;
8388
8389 case FIX_TRUNC_EXPR:
8390 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8391 break;
8392
8393 case FLOAT_EXPR:
8394 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8395 tree code and optabs used for computing the operation. */
8396 return false;
8397
8398 default:
8399 gcc_unreachable ();
8400 }
8401
8402 if (code == FIX_TRUNC_EXPR)
8403 /* The signedness is determined from output operand. */
b690cc0f 8404 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
8405 else
8406 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8407
8408 if (!optab1)
8409 return false;
8410
8411 vec_mode = TYPE_MODE (vectype);
947131ba 8412 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8413 return false;
8414
4a00c761
JJ
8415 *code1 = c1;
8416
8417 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8418 return true;
8419
ebfd146a
IR
8420 /* Check if it's a multi-step conversion that can be done using intermediate
8421 types. */
4a00c761
JJ
8422 prev_mode = vec_mode;
8423 if (code == FIX_TRUNC_EXPR)
8424 uns = TYPE_UNSIGNED (vectype_out);
8425 else
8426 uns = TYPE_UNSIGNED (vectype);
8427
8428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8430 costly than signed. */
8431 if (code == FIX_TRUNC_EXPR && uns)
8432 {
8433 enum insn_code icode2;
8434
8435 intermediate_type
8436 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8437 interm_optab
8438 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 8439 if (interm_optab != unknown_optab
4a00c761
JJ
8440 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8441 && insn_data[icode1].operand[0].mode
8442 == insn_data[icode2].operand[0].mode)
8443 {
8444 uns = false;
8445 optab1 = interm_optab;
8446 icode1 = icode2;
8447 }
8448 }
ebfd146a 8449
4a00c761
JJ
8450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8451 intermediate steps in promotion sequence. We try
8452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 8453 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8454 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8455 {
8456 intermediate_mode = insn_data[icode1].operand[0].mode;
8457 intermediate_type
8458 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8459 interm_optab
8460 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8461 optab_default);
8462 if (!interm_optab
8463 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8464 || insn_data[icode1].operand[0].mode != intermediate_mode
8465 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8466 == CODE_FOR_nothing))
8467 break;
8468
9771b263 8469 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8470 (*multi_step_cvt)++;
8471
8472 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8473 return true;
8474
8475 prev_mode = intermediate_mode;
8476 optab1 = interm_optab;
ebfd146a
IR
8477 }
8478
9771b263 8479 interm_types->release ();
4a00c761 8480 return false;
ebfd146a 8481}