]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR target/66470 (TLS ICE due to ix86_split_long_move)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
40e23961
MC
27#include "input.h"
28#include "alias.h"
29#include "symtab.h"
ebfd146a 30#include "tree.h"
40e23961 31#include "fold-const.h"
d8a2d370 32#include "stor-layout.h"
ebfd146a 33#include "target.h"
60393bbc 34#include "predict.h"
60393bbc 35#include "hard-reg-set.h"
60393bbc
AM
36#include "function.h"
37#include "dominance.h"
38#include "cfg.h"
ebfd146a 39#include "basic-block.h"
cf835838 40#include "gimple-pretty-print.h"
2fb9a547
AM
41#include "tree-ssa-alias.h"
42#include "internal-fn.h"
43#include "tree-eh.h"
44#include "gimple-expr.h"
45#include "is-a.h"
18f429e2 46#include "gimple.h"
45b0be94 47#include "gimplify.h"
5be5c238 48#include "gimple-iterator.h"
18f429e2 49#include "gimplify-me.h"
442b4905
AM
50#include "gimple-ssa.h"
51#include "tree-cfg.h"
52#include "tree-phinodes.h"
53#include "ssa-iterators.h"
d8a2d370 54#include "stringpool.h"
442b4905 55#include "tree-ssanames.h"
e28030cf 56#include "tree-ssa-loop-manip.h"
ebfd146a 57#include "cfgloop.h"
0136f8f0
AH
58#include "tree-ssa-loop.h"
59#include "tree-scalar-evolution.h"
36566b39
PK
60#include "rtl.h"
61#include "flags.h"
36566b39
PK
62#include "insn-config.h"
63#include "expmed.h"
64#include "dojump.h"
65#include "explow.h"
66#include "calls.h"
67#include "emit-rtl.h"
68#include "varasm.h"
69#include "stmt.h"
ebfd146a 70#include "expr.h"
7ee2468b 71#include "recog.h" /* FIXME: for insn_data */
b0710fe1 72#include "insn-codes.h"
ebfd146a 73#include "optabs.h"
718f9c0f 74#include "diagnostic-core.h"
ebfd146a 75#include "tree-vectorizer.h"
c582198b
AM
76#include "plugin-api.h"
77#include "ipa-ref.h"
0136f8f0 78#include "cgraph.h"
9b2b7279 79#include "builtins.h"
ebfd146a 80
7ee2468b
SB
81/* For lang_hooks.types.type_for_mode. */
82#include "langhooks.h"
ebfd146a 83
c3e7ee41
BS
84/* Return the vectorized type for the given statement. */
85
86tree
87stmt_vectype (struct _stmt_vec_info *stmt_info)
88{
89 return STMT_VINFO_VECTYPE (stmt_info);
90}
91
92/* Return TRUE iff the given statement is in an inner loop relative to
93 the loop being vectorized. */
94bool
95stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
96{
97 gimple stmt = STMT_VINFO_STMT (stmt_info);
98 basic_block bb = gimple_bb (stmt);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
100 struct loop* loop;
101
102 if (!loop_vinfo)
103 return false;
104
105 loop = LOOP_VINFO_LOOP (loop_vinfo);
106
107 return (bb->loop_father == loop->inner);
108}
109
110/* Record the cost of a statement, either by directly informing the
111 target model or by saving it in a vector for later processing.
112 Return a preliminary estimate of the statement's cost. */
113
114unsigned
92345349 115record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 117 int misalign, enum vect_cost_model_location where)
c3e7ee41 118{
92345349 119 if (body_cost_vec)
c3e7ee41 120 {
92345349
BS
121 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
122 add_stmt_info_to_vec (body_cost_vec, count, kind,
123 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
124 misalign);
c3e7ee41 125 return (unsigned)
92345349 126 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
127
128 }
129 else
130 {
131 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
132 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
133 void *target_cost_data;
134
135 if (loop_vinfo)
136 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
137 else
138 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
139
92345349
BS
140 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
141 misalign, where);
c3e7ee41
BS
142 }
143}
144
272c6793
RS
145/* Return a variable of type ELEM_TYPE[NELEMS]. */
146
147static tree
148create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149{
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
152}
153
154/* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT and the vector is associated
157 with scalar destination SCALAR_DEST. */
158
159static tree
160read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
161 tree array, unsigned HOST_WIDE_INT n)
162{
163 tree vect_type, vect, vect_name, array_ref;
164 gimple new_stmt;
165
166 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
167 vect_type = TREE_TYPE (TREE_TYPE (array));
168 vect = vect_create_destination_var (scalar_dest, vect_type);
169 array_ref = build4 (ARRAY_REF, vect_type, array,
170 build_int_cst (size_type_node, n),
171 NULL_TREE, NULL_TREE);
172
173 new_stmt = gimple_build_assign (vect, array_ref);
174 vect_name = make_ssa_name (vect, new_stmt);
175 gimple_assign_set_lhs (new_stmt, vect_name);
176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
177
178 return vect_name;
179}
180
181/* ARRAY is an array of vectors created by create_vector_array.
182 Emit code to store SSA_NAME VECT in index N of the array.
183 The store is part of the vectorization of STMT. */
184
185static void
186write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
187 tree array, unsigned HOST_WIDE_INT n)
188{
189 tree array_ref;
190 gimple new_stmt;
191
192 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
193 build_int_cst (size_type_node, n),
194 NULL_TREE, NULL_TREE);
195
196 new_stmt = gimple_build_assign (array_ref, vect);
197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
198}
199
200/* PTR is a pointer to an array of type TYPE. Return a representation
201 of *PTR. The memory reference replaces those in FIRST_DR
202 (and its group). */
203
204static tree
205create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
206{
272c6793
RS
207 tree mem_ref, alias_ptr_type;
208
209 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
210 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
211 /* Arrays have the same alignment as their type. */
644ffefd 212 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
213 return mem_ref;
214}
215
ebfd146a
IR
216/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
217
218/* Function vect_mark_relevant.
219
220 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
221
222static void
9771b263 223vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
83197f37
IR
224 enum vect_relevant relevant, bool live_p,
225 bool used_in_pattern)
ebfd146a
IR
226{
227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
228 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
229 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
83197f37 230 gimple pattern_stmt;
ebfd146a 231
73fbfcad 232 if (dump_enabled_p ())
78c60e3d 233 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 234 "mark relevant %d, live %d.\n", relevant, live_p);
ebfd146a 235
83197f37
IR
236 /* If this stmt is an original stmt in a pattern, we might need to mark its
237 related pattern stmt instead of the original stmt. However, such stmts
238 may have their own uses that are not in any pattern, in such cases the
239 stmt itself should be marked. */
ebfd146a
IR
240 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
241 {
83197f37
IR
242 bool found = false;
243 if (!used_in_pattern)
244 {
245 imm_use_iterator imm_iter;
246 use_operand_p use_p;
247 gimple use_stmt;
248 tree lhs;
13c931c9
JJ
249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 251
83197f37
IR
252 if (is_gimple_assign (stmt))
253 lhs = gimple_assign_lhs (stmt);
254 else
255 lhs = gimple_call_lhs (stmt);
ebfd146a 256
83197f37
IR
257 /* This use is out of pattern use, if LHS has other uses that are
258 pattern uses, we should mark the stmt itself, and not the pattern
259 stmt. */
5ce9450f 260 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
261 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
262 {
263 if (is_gimple_debug (USE_STMT (use_p)))
264 continue;
265 use_stmt = USE_STMT (use_p);
266
13c931c9
JJ
267 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
268 continue;
269
ab0ef706
JJ
270 if (vinfo_for_stmt (use_stmt)
271 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
272 {
273 found = true;
274 break;
275 }
276 }
83197f37
IR
277 }
278
279 if (!found)
280 {
281 /* This is the last stmt in a sequence that was detected as a
282 pattern that can potentially be vectorized. Don't mark the stmt
283 as relevant/live because it's not going to be vectorized.
284 Instead mark the pattern-stmt that replaces it. */
285
286 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
287
73fbfcad 288 if (dump_enabled_p ())
78c60e3d
SS
289 dump_printf_loc (MSG_NOTE, vect_location,
290 "last stmt in pattern. don't mark"
e645e942 291 " relevant/live.\n");
83197f37
IR
292 stmt_info = vinfo_for_stmt (pattern_stmt);
293 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
294 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
295 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
296 stmt = pattern_stmt;
297 }
ebfd146a
IR
298 }
299
300 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
301 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
302 STMT_VINFO_RELEVANT (stmt_info) = relevant;
303
304 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
305 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 {
73fbfcad 307 if (dump_enabled_p ())
78c60e3d 308 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 309 "already marked relevant/live.\n");
ebfd146a
IR
310 return;
311 }
312
9771b263 313 worklist->safe_push (stmt);
ebfd146a
IR
314}
315
316
317/* Function vect_stmt_relevant_p.
318
319 Return true if STMT in loop that is represented by LOOP_VINFO is
320 "relevant for vectorization".
321
322 A stmt is considered "relevant for vectorization" if:
323 - it has uses outside the loop.
324 - it has vdefs (it alters memory).
325 - control stmts in the loop (except for the exit condition).
326
327 CHECKME: what other side effects would the vectorizer allow? */
328
329static bool
330vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
331 enum vect_relevant *relevant, bool *live_p)
332{
333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
334 ssa_op_iter op_iter;
335 imm_use_iterator imm_iter;
336 use_operand_p use_p;
337 def_operand_p def_p;
338
8644a673 339 *relevant = vect_unused_in_scope;
ebfd146a
IR
340 *live_p = false;
341
342 /* cond stmt other than loop exit cond. */
b8698a0f
L
343 if (is_ctrl_stmt (stmt)
344 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
345 != loop_exit_ctrl_vec_info_type)
8644a673 346 *relevant = vect_used_in_scope;
ebfd146a
IR
347
348 /* changing memory. */
349 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
350 if (gimple_vdef (stmt)
351 && !gimple_clobber_p (stmt))
ebfd146a 352 {
73fbfcad 353 if (dump_enabled_p ())
78c60e3d 354 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 355 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 356 *relevant = vect_used_in_scope;
ebfd146a
IR
357 }
358
359 /* uses outside the loop. */
360 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
361 {
362 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
363 {
364 basic_block bb = gimple_bb (USE_STMT (use_p));
365 if (!flow_bb_inside_loop_p (loop, bb))
366 {
73fbfcad 367 if (dump_enabled_p ())
78c60e3d 368 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 369 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 370
3157b0c2
AO
371 if (is_gimple_debug (USE_STMT (use_p)))
372 continue;
373
ebfd146a
IR
374 /* We expect all such uses to be in the loop exit phis
375 (because of loop closed form) */
376 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
377 gcc_assert (bb == single_exit (loop)->dest);
378
379 *live_p = true;
380 }
381 }
382 }
383
384 return (*live_p || *relevant);
385}
386
387
b8698a0f 388/* Function exist_non_indexing_operands_for_use_p
ebfd146a 389
ff802fa1 390 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
391 used in STMT for anything other than indexing an array. */
392
393static bool
394exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
395{
396 tree operand;
397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 398
ff802fa1 399 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
400 reference in STMT, then any operand that corresponds to USE
401 is not indexing an array. */
402 if (!STMT_VINFO_DATA_REF (stmt_info))
403 return true;
59a05b0c 404
ebfd146a
IR
405 /* STMT has a data_ref. FORNOW this means that its of one of
406 the following forms:
407 -1- ARRAY_REF = var
408 -2- var = ARRAY_REF
409 (This should have been verified in analyze_data_refs).
410
411 'var' in the second case corresponds to a def, not a use,
b8698a0f 412 so USE cannot correspond to any operands that are not used
ebfd146a
IR
413 for array indexing.
414
415 Therefore, all we need to check is if STMT falls into the
416 first case, and whether var corresponds to USE. */
ebfd146a
IR
417
418 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
419 {
420 if (is_gimple_call (stmt)
421 && gimple_call_internal_p (stmt))
422 switch (gimple_call_internal_fn (stmt))
423 {
424 case IFN_MASK_STORE:
425 operand = gimple_call_arg (stmt, 3);
426 if (operand == use)
427 return true;
428 /* FALLTHRU */
429 case IFN_MASK_LOAD:
430 operand = gimple_call_arg (stmt, 2);
431 if (operand == use)
432 return true;
433 break;
434 default:
435 break;
436 }
437 return false;
438 }
439
59a05b0c
EB
440 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
441 return false;
ebfd146a 442 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
443 if (TREE_CODE (operand) != SSA_NAME)
444 return false;
445
446 if (operand == use)
447 return true;
448
449 return false;
450}
451
452
b8698a0f 453/*
ebfd146a
IR
454 Function process_use.
455
456 Inputs:
457 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 458 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 459 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 460 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
461 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
462 be performed.
ebfd146a
IR
463
464 Outputs:
465 Generally, LIVE_P and RELEVANT are used to define the liveness and
466 relevance info of the DEF_STMT of this USE:
467 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
468 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
469 Exceptions:
470 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 471 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 472 of the respective DEF_STMT is left unchanged.
b8698a0f
L
473 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
474 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
475 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
476 be modified accordingly.
477
478 Return true if everything is as expected. Return false otherwise. */
479
480static bool
b8698a0f 481process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
9771b263 482 enum vect_relevant relevant, vec<gimple> *worklist,
aec7ae7d 483 bool force)
ebfd146a
IR
484{
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
486 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
487 stmt_vec_info dstmt_vinfo;
488 basic_block bb, def_bb;
489 tree def;
490 gimple def_stmt;
491 enum vect_def_type dt;
492
b8698a0f 493 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 494 that are used for address computation are not considered relevant. */
aec7ae7d 495 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
496 return true;
497
24ee1384 498 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 499 {
73fbfcad 500 if (dump_enabled_p ())
78c60e3d 501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 502 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
503 return false;
504 }
505
506 if (!def_stmt || gimple_nop_p (def_stmt))
507 return true;
508
509 def_bb = gimple_bb (def_stmt);
510 if (!flow_bb_inside_loop_p (loop, def_bb))
511 {
73fbfcad 512 if (dump_enabled_p ())
e645e942 513 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
514 return true;
515 }
516
b8698a0f
L
517 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
518 DEF_STMT must have already been processed, because this should be the
519 only way that STMT, which is a reduction-phi, was put in the worklist,
520 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
521 check that everything is as expected, and we are done. */
522 dstmt_vinfo = vinfo_for_stmt (def_stmt);
523 bb = gimple_bb (stmt);
524 if (gimple_code (stmt) == GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 && gimple_code (def_stmt) != GIMPLE_PHI
527 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
528 && bb->loop_father == def_bb->loop_father)
529 {
73fbfcad 530 if (dump_enabled_p ())
78c60e3d 531 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 532 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
533 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
534 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
535 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 536 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 537 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
538 return true;
539 }
540
541 /* case 3a: outer-loop stmt defining an inner-loop stmt:
542 outer-loop-header-bb:
543 d = def_stmt
544 inner-loop:
545 stmt # use (d)
546 outer-loop-tail-bb:
547 ... */
548 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
549 {
73fbfcad 550 if (dump_enabled_p ())
78c60e3d 551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 552 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 553
ebfd146a
IR
554 switch (relevant)
555 {
8644a673 556 case vect_unused_in_scope:
7c5222ff
IR
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
558 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 559 break;
7c5222ff 560
ebfd146a 561 case vect_used_in_outer_by_reduction:
7c5222ff 562 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
563 relevant = vect_used_by_reduction;
564 break;
7c5222ff 565
ebfd146a 566 case vect_used_in_outer:
7c5222ff 567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 568 relevant = vect_used_in_scope;
ebfd146a 569 break;
7c5222ff 570
8644a673 571 case vect_used_in_scope:
ebfd146a
IR
572 break;
573
574 default:
575 gcc_unreachable ();
b8698a0f 576 }
ebfd146a
IR
577 }
578
579 /* case 3b: inner-loop stmt defining an outer-loop stmt:
580 outer-loop-header-bb:
581 ...
582 inner-loop:
583 d = def_stmt
06066f92 584 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
585 stmt # use (d) */
586 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
587 {
73fbfcad 588 if (dump_enabled_p ())
78c60e3d 589 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 590 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 591
ebfd146a
IR
592 switch (relevant)
593 {
8644a673 594 case vect_unused_in_scope:
b8698a0f 595 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 596 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 597 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
598 break;
599
ebfd146a
IR
600 case vect_used_by_reduction:
601 relevant = vect_used_in_outer_by_reduction;
602 break;
603
8644a673 604 case vect_used_in_scope:
ebfd146a
IR
605 relevant = vect_used_in_outer;
606 break;
607
608 default:
609 gcc_unreachable ();
610 }
611 }
612
83197f37
IR
613 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
614 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
615 return true;
616}
617
618
619/* Function vect_mark_stmts_to_be_vectorized.
620
621 Not all stmts in the loop need to be vectorized. For example:
622
623 for i...
624 for j...
625 1. T0 = i + j
626 2. T1 = a[T0]
627
628 3. j = j + 1
629
630 Stmt 1 and 3 do not need to be vectorized, because loop control and
631 addressing of vectorized data-refs are handled differently.
632
633 This pass detects such stmts. */
634
635bool
636vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
637{
ebfd146a
IR
638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
639 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
640 unsigned int nbbs = loop->num_nodes;
641 gimple_stmt_iterator si;
642 gimple stmt;
643 unsigned int i;
644 stmt_vec_info stmt_vinfo;
645 basic_block bb;
646 gimple phi;
647 bool live_p;
06066f92
IR
648 enum vect_relevant relevant, tmp_relevant;
649 enum vect_def_type def_type;
ebfd146a 650
73fbfcad 651 if (dump_enabled_p ())
78c60e3d 652 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 653 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 654
00f96dc9 655 auto_vec<gimple, 64> worklist;
ebfd146a
IR
656
657 /* 1. Init worklist. */
658 for (i = 0; i < nbbs; i++)
659 {
660 bb = bbs[i];
661 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 662 {
ebfd146a 663 phi = gsi_stmt (si);
73fbfcad 664 if (dump_enabled_p ())
ebfd146a 665 {
78c60e3d
SS
666 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
668 }
669
670 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 671 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
672 }
673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
674 {
675 stmt = gsi_stmt (si);
73fbfcad 676 if (dump_enabled_p ())
ebfd146a 677 {
78c60e3d
SS
678 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 680 }
ebfd146a
IR
681
682 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 683 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
684 }
685 }
686
687 /* 2. Process_worklist */
9771b263 688 while (worklist.length () > 0)
ebfd146a
IR
689 {
690 use_operand_p use_p;
691 ssa_op_iter iter;
692
9771b263 693 stmt = worklist.pop ();
73fbfcad 694 if (dump_enabled_p ())
ebfd146a 695 {
78c60e3d
SS
696 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
698 }
699
b8698a0f
L
700 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
701 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
702 liveness and relevance properties of STMT. */
703 stmt_vinfo = vinfo_for_stmt (stmt);
704 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
705 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
706
707 /* Generally, the liveness and relevance properties of STMT are
708 propagated as is to the DEF_STMTs of its USEs:
709 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
710 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
711
712 One exception is when STMT has been identified as defining a reduction
713 variable; in this case we set the liveness/relevance as follows:
714 live_p = false
715 relevant = vect_used_by_reduction
716 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 717 those that are used by a reduction computation, and those that are
ff802fa1 718 (also) used by a regular computation. This allows us later on to
b8698a0f 719 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 720 order of the results that they produce does not have to be kept. */
ebfd146a 721
06066f92
IR
722 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
723 tmp_relevant = relevant;
724 switch (def_type)
ebfd146a 725 {
06066f92
IR
726 case vect_reduction_def:
727 switch (tmp_relevant)
728 {
729 case vect_unused_in_scope:
730 relevant = vect_used_by_reduction;
731 break;
732
733 case vect_used_by_reduction:
734 if (gimple_code (stmt) == GIMPLE_PHI)
735 break;
736 /* fall through */
737
738 default:
73fbfcad 739 if (dump_enabled_p ())
78c60e3d 740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 741 "unsupported use of reduction.\n");
06066f92
IR
742 return false;
743 }
744
b8698a0f 745 live_p = false;
06066f92 746 break;
b8698a0f 747
06066f92
IR
748 case vect_nested_cycle:
749 if (tmp_relevant != vect_unused_in_scope
750 && tmp_relevant != vect_used_in_outer_by_reduction
751 && tmp_relevant != vect_used_in_outer)
752 {
73fbfcad 753 if (dump_enabled_p ())
78c60e3d 754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 755 "unsupported use of nested cycle.\n");
7c5222ff 756
06066f92
IR
757 return false;
758 }
7c5222ff 759
b8698a0f
L
760 live_p = false;
761 break;
762
06066f92
IR
763 case vect_double_reduction_def:
764 if (tmp_relevant != vect_unused_in_scope
765 && tmp_relevant != vect_used_by_reduction)
766 {
73fbfcad 767 if (dump_enabled_p ())
78c60e3d 768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 769 "unsupported use of double reduction.\n");
7c5222ff 770
7c5222ff 771 return false;
06066f92
IR
772 }
773
774 live_p = false;
b8698a0f 775 break;
7c5222ff 776
06066f92
IR
777 default:
778 break;
7c5222ff 779 }
b8698a0f 780
aec7ae7d 781 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
782 {
783 /* Pattern statements are not inserted into the code, so
784 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
785 have to scan the RHS or function arguments instead. */
786 if (is_gimple_assign (stmt))
787 {
69d2aade
JJ
788 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
789 tree op = gimple_assign_rhs1 (stmt);
790
791 i = 1;
792 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
793 {
794 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 795 live_p, relevant, &worklist, false)
69d2aade 796 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 797 live_p, relevant, &worklist, false))
566d377a 798 return false;
69d2aade
JJ
799 i = 2;
800 }
801 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 802 {
69d2aade 803 op = gimple_op (stmt, i);
afbe6325
RB
804 if (TREE_CODE (op) == SSA_NAME
805 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
806 &worklist, false))
07687835 807 return false;
9d5e7640
IR
808 }
809 }
810 else if (is_gimple_call (stmt))
811 {
812 for (i = 0; i < gimple_call_num_args (stmt); i++)
813 {
814 tree arg = gimple_call_arg (stmt, i);
815 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 816 &worklist, false))
07687835 817 return false;
9d5e7640
IR
818 }
819 }
820 }
821 else
822 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
823 {
824 tree op = USE_FROM_PTR (use_p);
825 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 826 &worklist, false))
07687835 827 return false;
9d5e7640 828 }
aec7ae7d
JJ
829
830 if (STMT_VINFO_GATHER_P (stmt_vinfo))
831 {
832 tree off;
833 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
834 gcc_assert (decl);
835 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
836 &worklist, true))
566d377a 837 return false;
aec7ae7d 838 }
ebfd146a
IR
839 } /* while worklist */
840
ebfd146a
IR
841 return true;
842}
843
844
b8698a0f 845/* Function vect_model_simple_cost.
ebfd146a 846
b8698a0f 847 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
848 single op. Right now, this does not account for multiple insns that could
849 be generated for the single vector op. We will handle that shortly. */
850
851void
b8698a0f 852vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
853 enum vect_def_type *dt,
854 stmt_vector_for_cost *prologue_cost_vec,
855 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
856{
857 int i;
92345349 858 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
859
860 /* The SLP costs were already calculated during SLP tree build. */
861 if (PURE_SLP_STMT (stmt_info))
862 return;
863
ebfd146a
IR
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
92345349
BS
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
c3e7ee41
BS
869
870 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
871 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
872 stmt_info, 0, vect_body);
c3e7ee41 873
73fbfcad 874 if (dump_enabled_p ())
78c60e3d
SS
875 dump_printf_loc (MSG_NOTE, vect_location,
876 "vect_model_simple_cost: inside_cost = %d, "
e645e942 877 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
878}
879
880
8bd37302
BS
881/* Model cost for type demotion and promotion operations. PWR is normally
882 zero for single-step promotions and demotions. It will be one if
883 two-step promotion/demotion is required, and so on. Each additional
884 step doubles the number of instructions required. */
885
886static void
887vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
888 enum vect_def_type *dt, int pwr)
889{
890 int i, tmp;
92345349 891 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
892 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
893 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
894 void *target_cost_data;
8bd37302
BS
895
896 /* The SLP costs were already calculated during SLP tree build. */
897 if (PURE_SLP_STMT (stmt_info))
898 return;
899
c3e7ee41
BS
900 if (loop_vinfo)
901 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
902 else
903 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
904
8bd37302
BS
905 for (i = 0; i < pwr + 1; i++)
906 {
907 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
908 (i + 1) : i;
c3e7ee41 909 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
910 vec_promote_demote, stmt_info, 0,
911 vect_body);
8bd37302
BS
912 }
913
914 /* FORNOW: Assuming maximum 2 args per stmts. */
915 for (i = 0; i < 2; i++)
92345349
BS
916 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
917 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
918 stmt_info, 0, vect_prologue);
8bd37302 919
73fbfcad 920 if (dump_enabled_p ())
78c60e3d
SS
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 923 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
924}
925
0d0293ac 926/* Function vect_cost_group_size
b8698a0f 927
0d0293ac 928 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
929 load or store of a group, else return 1. This ensures that group size is
930 only returned once per group. */
931
932static int
0d0293ac 933vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 934{
e14c1050 935 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
936
937 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 938 return GROUP_SIZE (stmt_info);
ebfd146a
IR
939
940 return 1;
941}
942
943
944/* Function vect_model_store_cost
945
0d0293ac
MM
946 Models cost for stores. In the case of grouped accesses, one access
947 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
948
949void
b8698a0f 950vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 951 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
952 slp_tree slp_node,
953 stmt_vector_for_cost *prologue_cost_vec,
954 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
955{
956 int group_size;
92345349 957 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239
IR
958 struct data_reference *first_dr;
959 gimple first_stmt;
ebfd146a 960
8644a673 961 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
962 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
963 stmt_info, 0, vect_prologue);
ebfd146a 964
0d0293ac
MM
965 /* Grouped access? */
966 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
967 {
968 if (slp_node)
969 {
9771b263 970 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
971 group_size = 1;
972 }
973 else
974 {
e14c1050 975 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 976 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
977 }
978
979 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
980 }
0d0293ac 981 /* Not a grouped access. */
ebfd146a 982 else
720f5239
IR
983 {
984 group_size = 1;
985 first_dr = STMT_VINFO_DATA_REF (stmt_info);
986 }
ebfd146a 987
272c6793 988 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 989 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
990 access is instead being provided by a permute-and-store operation,
991 include the cost of the permutes. */
cee62fee
MM
992 if (!store_lanes_p && group_size > 1
993 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 994 {
e1377713
ES
995 /* Uses a high and low interleave or shuffle operations for each
996 needed permute. */
997 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
998 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
999 stmt_info, 0, vect_body);
ebfd146a 1000
73fbfcad 1001 if (dump_enabled_p ())
78c60e3d 1002 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1003 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1004 group_size);
ebfd146a
IR
1005 }
1006
cee62fee 1007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1008 /* Costs of the stores. */
cee62fee
MM
1009 if (STMT_VINFO_STRIDED_P (stmt_info)
1010 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f2e2a985
MM
1011 {
1012 /* N scalar stores plus extracting the elements. */
f2e2a985
MM
1013 inside_cost += record_stmt_cost (body_cost_vec,
1014 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1015 scalar_store, stmt_info, 0, vect_body);
f2e2a985
MM
1016 }
1017 else
1018 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 1019
cee62fee
MM
1020 if (STMT_VINFO_STRIDED_P (stmt_info))
1021 inside_cost += record_stmt_cost (body_cost_vec,
1022 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1023 vec_to_scalar, stmt_info, 0, vect_body);
1024
73fbfcad 1025 if (dump_enabled_p ())
78c60e3d
SS
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "vect_model_store_cost: inside_cost = %d, "
e645e942 1028 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1029}
1030
1031
720f5239
IR
1032/* Calculate cost of DR's memory access. */
1033void
1034vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1035 unsigned int *inside_cost,
92345349 1036 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1037{
1038 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1039 gimple stmt = DR_STMT (dr);
1040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1041
1042 switch (alignment_support_scheme)
1043 {
1044 case dr_aligned:
1045 {
92345349
BS
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vector_store, stmt_info, 0,
1048 vect_body);
720f5239 1049
73fbfcad 1050 if (dump_enabled_p ())
78c60e3d 1051 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1052 "vect_model_store_cost: aligned.\n");
720f5239
IR
1053 break;
1054 }
1055
1056 case dr_unaligned_supported:
1057 {
720f5239 1058 /* Here, we assign an additional cost for the unaligned store. */
92345349 1059 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1060 unaligned_store, stmt_info,
92345349 1061 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1062 if (dump_enabled_p ())
78c60e3d
SS
1063 dump_printf_loc (MSG_NOTE, vect_location,
1064 "vect_model_store_cost: unaligned supported by "
e645e942 1065 "hardware.\n");
720f5239
IR
1066 break;
1067 }
1068
38eec4c6
UW
1069 case dr_unaligned_unsupported:
1070 {
1071 *inside_cost = VECT_MAX_COST;
1072
73fbfcad 1073 if (dump_enabled_p ())
78c60e3d 1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1075 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1076 break;
1077 }
1078
720f5239
IR
1079 default:
1080 gcc_unreachable ();
1081 }
1082}
1083
1084
ebfd146a
IR
1085/* Function vect_model_load_cost
1086
0d0293ac
MM
1087 Models cost for loads. In the case of grouped accesses, the last access
1088 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1089 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1090 access scheme chosen. */
1091
1092void
92345349
BS
1093vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1094 bool load_lanes_p, slp_tree slp_node,
1095 stmt_vector_for_cost *prologue_cost_vec,
1096 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1097{
1098 int group_size;
ebfd146a
IR
1099 gimple first_stmt;
1100 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1101 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1102
0d0293ac 1103 /* Grouped accesses? */
e14c1050 1104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1105 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1106 {
0d0293ac 1107 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1108 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1109 }
0d0293ac 1110 /* Not a grouped access. */
ebfd146a
IR
1111 else
1112 {
1113 group_size = 1;
1114 first_dr = dr;
1115 }
1116
272c6793 1117 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1118 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1119 access is instead being provided by a load-and-permute operation,
1120 include the cost of the permutes. */
7b5fc413 1121 if (!load_lanes_p && group_size > 1
f2e2a985 1122 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 1123 {
2c23db6d
ES
1124 /* Uses an even and odd extract operations or shuffle operations
1125 for each needed permute. */
1126 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1127 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1128 stmt_info, 0, vect_body);
ebfd146a 1129
73fbfcad 1130 if (dump_enabled_p ())
e645e942
TJ
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1133 group_size);
ebfd146a
IR
1134 }
1135
1136 /* The loads themselves. */
f2e2a985 1137 if (STMT_VINFO_STRIDED_P (stmt_info)
7b5fc413 1138 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
a82960aa 1139 {
a21892ad
BS
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1142 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1144 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1145 }
1146 else
1147 vect_get_load_cost (first_dr, ncopies,
1148 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1149 || group_size > 1 || slp_node),
92345349
BS
1150 &inside_cost, &prologue_cost,
1151 prologue_cost_vec, body_cost_vec, true);
f2e2a985 1152 if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
1153 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1154 stmt_info, 0, vect_body);
720f5239 1155
73fbfcad 1156 if (dump_enabled_p ())
78c60e3d
SS
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: inside_cost = %d, "
e645e942 1159 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1160}
1161
1162
1163/* Calculate cost of DR's memory access. */
1164void
1165vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1166 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1167 unsigned int *prologue_cost,
1168 stmt_vector_for_cost *prologue_cost_vec,
1169 stmt_vector_for_cost *body_cost_vec,
1170 bool record_prologue_costs)
720f5239
IR
1171{
1172 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
c3e7ee41
BS
1173 gimple stmt = DR_STMT (dr);
1174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1175
1176 switch (alignment_support_scheme)
ebfd146a
IR
1177 {
1178 case dr_aligned:
1179 {
92345349
BS
1180 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1181 stmt_info, 0, vect_body);
ebfd146a 1182
73fbfcad 1183 if (dump_enabled_p ())
78c60e3d 1184 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1185 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1186
1187 break;
1188 }
1189 case dr_unaligned_supported:
1190 {
720f5239 1191 /* Here, we assign an additional cost for the unaligned load. */
92345349 1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1193 unaligned_load, stmt_info,
92345349 1194 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1195
73fbfcad 1196 if (dump_enabled_p ())
78c60e3d
SS
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned supported by "
e645e942 1199 "hardware.\n");
ebfd146a
IR
1200
1201 break;
1202 }
1203 case dr_explicit_realign:
1204 {
92345349
BS
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1206 vector_load, stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1208 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1209
1210 /* FIXME: If the misalignment remains fixed across the iterations of
1211 the containing loop, the following cost should be added to the
92345349 1212 prologue costs. */
ebfd146a 1213 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1214 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1215 stmt_info, 0, vect_body);
ebfd146a 1216
73fbfcad 1217 if (dump_enabled_p ())
e645e942
TJ
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "vect_model_load_cost: explicit realign\n");
8bd37302 1220
ebfd146a
IR
1221 break;
1222 }
1223 case dr_explicit_realign_optimized:
1224 {
73fbfcad 1225 if (dump_enabled_p ())
e645e942 1226 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1227 "vect_model_load_cost: unaligned software "
e645e942 1228 "pipelined.\n");
ebfd146a
IR
1229
1230 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1231 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1232 if this is an access in a group of loads, which provide grouped
ebfd146a 1233 access, then the above cost should only be considered for one
ff802fa1 1234 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1235 and a realignment op. */
1236
92345349 1237 if (add_realign_cost && record_prologue_costs)
ebfd146a 1238 {
92345349
BS
1239 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1240 vector_stmt, stmt_info,
1241 0, vect_prologue);
ebfd146a 1242 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1243 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1244 vector_stmt, stmt_info,
1245 0, vect_prologue);
ebfd146a
IR
1246 }
1247
92345349
BS
1248 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1249 stmt_info, 0, vect_body);
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1251 stmt_info, 0, vect_body);
8bd37302 1252
73fbfcad 1253 if (dump_enabled_p ())
78c60e3d 1254 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1255 "vect_model_load_cost: explicit realign optimized"
1256 "\n");
8bd37302 1257
ebfd146a
IR
1258 break;
1259 }
1260
38eec4c6
UW
1261 case dr_unaligned_unsupported:
1262 {
1263 *inside_cost = VECT_MAX_COST;
1264
73fbfcad 1265 if (dump_enabled_p ())
78c60e3d 1266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1267 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1268 break;
1269 }
1270
ebfd146a
IR
1271 default:
1272 gcc_unreachable ();
1273 }
ebfd146a
IR
1274}
1275
418b7df3
RG
1276/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1277 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1278
418b7df3
RG
1279static void
1280vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1281{
ebfd146a 1282 if (gsi)
418b7df3 1283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1284 else
1285 {
418b7df3 1286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1288
a70d6342
IR
1289 if (loop_vinfo)
1290 {
1291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1292 basic_block new_bb;
1293 edge pe;
a70d6342
IR
1294
1295 if (nested_in_vect_loop_p (loop, stmt))
1296 loop = loop->inner;
b8698a0f 1297
a70d6342 1298 pe = loop_preheader_edge (loop);
418b7df3 1299 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1300 gcc_assert (!new_bb);
1301 }
1302 else
1303 {
1304 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1305 basic_block bb;
1306 gimple_stmt_iterator gsi_bb_start;
1307
1308 gcc_assert (bb_vinfo);
1309 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1310 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1311 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1312 }
ebfd146a
IR
1313 }
1314
73fbfcad 1315 if (dump_enabled_p ())
ebfd146a 1316 {
78c60e3d
SS
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "created new init_stmt: ");
1319 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1320 }
418b7df3
RG
1321}
1322
1323/* Function vect_init_vector.
ebfd146a 1324
5467ee52
RG
1325 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1326 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1327 vector type a vector with all elements equal to VAL is created first.
1328 Place the initialization at BSI if it is not NULL. Otherwise, place the
1329 initialization at the loop preheader.
418b7df3
RG
1330 Return the DEF of INIT_STMT.
1331 It will be used in the vectorization of STMT. */
1332
1333tree
5467ee52 1334vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3
RG
1335{
1336 tree new_var;
1337 gimple init_stmt;
1338 tree vec_oprnd;
1339 tree new_temp;
1340
5467ee52
RG
1341 if (TREE_CODE (type) == VECTOR_TYPE
1342 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1343 {
5467ee52 1344 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1345 {
5467ee52
RG
1346 if (CONSTANT_CLASS_P (val))
1347 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
418b7df3
RG
1348 else
1349 {
b731b390 1350 new_temp = make_ssa_name (TREE_TYPE (type));
0d0e4a03 1351 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1352 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1353 val = new_temp;
418b7df3
RG
1354 }
1355 }
5467ee52 1356 val = build_vector_from_val (type, val);
418b7df3
RG
1357 }
1358
5467ee52 1359 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
5467ee52 1360 init_stmt = gimple_build_assign (new_var, val);
418b7df3
RG
1361 new_temp = make_ssa_name (new_var, init_stmt);
1362 gimple_assign_set_lhs (init_stmt, new_temp);
1363 vect_init_vector_1 (stmt, init_stmt, gsi);
ebfd146a
IR
1364 vec_oprnd = gimple_assign_lhs (init_stmt);
1365 return vec_oprnd;
1366}
1367
a70d6342 1368
ebfd146a
IR
1369/* Function vect_get_vec_def_for_operand.
1370
ff802fa1 1371 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1372 used in the vectorized stmt for STMT.
1373
1374 In the case that OP is an SSA_NAME which is defined in the loop, then
1375 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1376
1377 In case OP is an invariant or constant, a new stmt that creates a vector def
1378 needs to be introduced. */
1379
1380tree
1381vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1382{
1383 tree vec_oprnd;
1384 gimple vec_stmt;
1385 gimple def_stmt;
1386 stmt_vec_info def_stmt_info = NULL;
1387 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 1388 unsigned int nunits;
ebfd146a 1389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a 1390 tree def;
ebfd146a
IR
1391 enum vect_def_type dt;
1392 bool is_simple_use;
1393 tree vector_type;
1394
73fbfcad 1395 if (dump_enabled_p ())
ebfd146a 1396 {
78c60e3d
SS
1397 dump_printf_loc (MSG_NOTE, vect_location,
1398 "vect_get_vec_def_for_operand: ");
1399 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1400 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1401 }
1402
24ee1384
IR
1403 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1404 &def_stmt, &def, &dt);
ebfd146a 1405 gcc_assert (is_simple_use);
73fbfcad 1406 if (dump_enabled_p ())
ebfd146a 1407 {
78c60e3d 1408 int loc_printed = 0;
ebfd146a
IR
1409 if (def)
1410 {
78c60e3d
SS
1411 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1412 loc_printed = 1;
1413 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
e645e942 1414 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1415 }
1416 if (def_stmt)
1417 {
78c60e3d
SS
1418 if (loc_printed)
1419 dump_printf (MSG_NOTE, " def_stmt = ");
1420 else
1421 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1422 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1423 }
1424 }
1425
1426 switch (dt)
1427 {
1428 /* Case 1: operand is a constant. */
1429 case vect_constant_def:
1430 {
7569a6cc
RG
1431 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1432 gcc_assert (vector_type);
9dc3f7de 1433 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1434
b8698a0f 1435 if (scalar_def)
ebfd146a
IR
1436 *scalar_def = op;
1437
1438 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
73fbfcad 1439 if (dump_enabled_p ())
78c60e3d 1440 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1441 "Create vector_cst. nunits = %d\n", nunits);
ebfd146a 1442
418b7df3 1443 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1444 }
1445
1446 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1447 case vect_external_def:
ebfd146a
IR
1448 {
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1450 gcc_assert (vector_type);
ebfd146a 1451
b8698a0f 1452 if (scalar_def)
ebfd146a
IR
1453 *scalar_def = def;
1454
1455 /* Create 'vec_inv = {inv,inv,..,inv}' */
73fbfcad 1456 if (dump_enabled_p ())
e645e942 1457 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
ebfd146a 1458
418b7df3 1459 return vect_init_vector (stmt, def, vector_type, NULL);
ebfd146a
IR
1460 }
1461
1462 /* Case 3: operand is defined inside the loop. */
8644a673 1463 case vect_internal_def:
ebfd146a 1464 {
b8698a0f 1465 if (scalar_def)
ebfd146a
IR
1466 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1467
1468 /* Get the def from the vectorized stmt. */
1469 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1470
ebfd146a 1471 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1472 /* Get vectorized pattern statement. */
1473 if (!vec_stmt
1474 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1475 && !STMT_VINFO_RELEVANT (def_stmt_info))
1476 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1477 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1478 gcc_assert (vec_stmt);
1479 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1480 vec_oprnd = PHI_RESULT (vec_stmt);
1481 else if (is_gimple_call (vec_stmt))
1482 vec_oprnd = gimple_call_lhs (vec_stmt);
1483 else
1484 vec_oprnd = gimple_assign_lhs (vec_stmt);
1485 return vec_oprnd;
1486 }
1487
1488 /* Case 4: operand is defined by a loop header phi - reduction */
1489 case vect_reduction_def:
06066f92 1490 case vect_double_reduction_def:
7c5222ff 1491 case vect_nested_cycle:
ebfd146a
IR
1492 {
1493 struct loop *loop;
1494
1495 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1496 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1497
1498 /* Get the def before the loop */
1499 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1500 return get_initial_def_for_reduction (stmt, op, scalar_def);
1501 }
1502
1503 /* Case 5: operand is defined by loop-header phi - induction. */
1504 case vect_induction_def:
1505 {
1506 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1507
1508 /* Get the def from the vectorized stmt. */
1509 def_stmt_info = vinfo_for_stmt (def_stmt);
1510 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1511 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1512 vec_oprnd = PHI_RESULT (vec_stmt);
1513 else
1514 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1515 return vec_oprnd;
1516 }
1517
1518 default:
1519 gcc_unreachable ();
1520 }
1521}
1522
1523
1524/* Function vect_get_vec_def_for_stmt_copy
1525
ff802fa1 1526 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1527 vectorized stmt to be created (by the caller to this function) is a "copy"
1528 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1529 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1530 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1531 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1532 DT is the type of the vector def VEC_OPRND.
1533
1534 Context:
1535 In case the vectorization factor (VF) is bigger than the number
1536 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1537 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1538 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1539 smallest data-type determines the VF, and as a result, when vectorizing
1540 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1541 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1542 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1543 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1544 which VF=16 and nunits=4, so the number of copies required is 4):
1545
1546 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1547
ebfd146a
IR
1548 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1549 VS1.1: vx.1 = memref1 VS1.2
1550 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1551 VS1.3: vx.3 = memref3
ebfd146a
IR
1552
1553 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1554 VSnew.1: vz1 = vx.1 + ... VSnew.2
1555 VSnew.2: vz2 = vx.2 + ... VSnew.3
1556 VSnew.3: vz3 = vx.3 + ...
1557
1558 The vectorization of S1 is explained in vectorizable_load.
1559 The vectorization of S2:
b8698a0f
L
1560 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1561 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1562 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1563 returns the vector-def 'vx.0'.
1564
b8698a0f
L
1565 To create the remaining copies of the vector-stmt (VSnew.j), this
1566 function is called to get the relevant vector-def for each operand. It is
1567 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1568 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1569
b8698a0f
L
1570 For example, to obtain the vector-def 'vx.1' in order to create the
1571 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1572 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1573 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1574 and return its def ('vx.1').
1575 Overall, to create the above sequence this function will be called 3 times:
1576 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1577 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1578 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1579
1580tree
1581vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1582{
1583 gimple vec_stmt_for_operand;
1584 stmt_vec_info def_stmt_info;
1585
1586 /* Do nothing; can reuse same def. */
8644a673 1587 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1588 return vec_oprnd;
1589
1590 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1591 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1592 gcc_assert (def_stmt_info);
1593 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (vec_stmt_for_operand);
1595 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1596 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1597 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1598 else
1599 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1600 return vec_oprnd;
1601}
1602
1603
1604/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1605 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1606
1607static void
b8698a0f 1608vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1609 vec<tree> *vec_oprnds0,
1610 vec<tree> *vec_oprnds1)
ebfd146a 1611{
9771b263 1612 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1613
1614 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1615 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1616
9771b263 1617 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1618 {
9771b263 1619 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1620 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1621 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1622 }
1623}
1624
1625
d092494c
IR
1626/* Get vectorized definitions for OP0 and OP1.
1627 REDUC_INDEX is the index of reduction operand in case of reduction,
1628 and -1 otherwise. */
ebfd146a 1629
d092494c 1630void
ebfd146a 1631vect_get_vec_defs (tree op0, tree op1, gimple stmt,
9771b263
DN
1632 vec<tree> *vec_oprnds0,
1633 vec<tree> *vec_oprnds1,
d092494c 1634 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1635{
1636 if (slp_node)
d092494c
IR
1637 {
1638 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1639 auto_vec<tree> ops (nops);
1640 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1641
9771b263 1642 ops.quick_push (op0);
d092494c 1643 if (op1)
9771b263 1644 ops.quick_push (op1);
d092494c
IR
1645
1646 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1647
37b5ec8f 1648 *vec_oprnds0 = vec_defs[0];
d092494c 1649 if (op1)
37b5ec8f 1650 *vec_oprnds1 = vec_defs[1];
d092494c 1651 }
ebfd146a
IR
1652 else
1653 {
1654 tree vec_oprnd;
1655
9771b263 1656 vec_oprnds0->create (1);
b8698a0f 1657 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 1658 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1659
1660 if (op1)
1661 {
9771b263 1662 vec_oprnds1->create (1);
b8698a0f 1663 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
9771b263 1664 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1665 }
1666 }
1667}
1668
1669
1670/* Function vect_finish_stmt_generation.
1671
1672 Insert a new stmt. */
1673
1674void
1675vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1676 gimple_stmt_iterator *gsi)
1677{
1678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1680 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1681
1682 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1683
54e8e2c3
RG
1684 if (!gsi_end_p (*gsi)
1685 && gimple_has_mem_ops (vec_stmt))
1686 {
1687 gimple at_stmt = gsi_stmt (*gsi);
1688 tree vuse = gimple_vuse (at_stmt);
1689 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1690 {
1691 tree vdef = gimple_vdef (at_stmt);
1692 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1693 /* If we have an SSA vuse and insert a store, update virtual
1694 SSA form to avoid triggering the renamer. Do so only
1695 if we can easily see all uses - which is what almost always
1696 happens with the way vectorized stmts are inserted. */
1697 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1698 && ((is_gimple_assign (vec_stmt)
1699 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1700 || (is_gimple_call (vec_stmt)
1701 && !(gimple_call_flags (vec_stmt)
1702 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1703 {
1704 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1705 gimple_set_vdef (vec_stmt, new_vdef);
1706 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1707 }
1708 }
1709 }
ebfd146a
IR
1710 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1711
b8698a0f 1712 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1713 bb_vinfo));
ebfd146a 1714
73fbfcad 1715 if (dump_enabled_p ())
ebfd146a 1716 {
78c60e3d
SS
1717 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1718 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1719 }
1720
ad885386 1721 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1722
1723 /* While EH edges will generally prevent vectorization, stmt might
1724 e.g. be in a must-not-throw region. Ensure newly created stmts
1725 that could throw are part of the same region. */
1726 int lp_nr = lookup_stmt_eh_lp (stmt);
1727 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1728 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1729}
1730
1731/* Checks if CALL can be vectorized in type VECTYPE. Returns
1732 a function declaration if the target has a vectorized version
1733 of the function, or NULL_TREE if the function cannot be vectorized. */
1734
1735tree
538dd0b7 1736vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
ebfd146a
IR
1737{
1738 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1739
1740 /* We only handle functions that do not read or clobber memory -- i.e.
1741 const or novops ones. */
1742 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1743 return NULL_TREE;
1744
1745 if (!fndecl
1746 || TREE_CODE (fndecl) != FUNCTION_DECL
1747 || !DECL_BUILT_IN (fndecl))
1748 return NULL_TREE;
1749
62f7fd21 1750 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1751 vectype_in);
1752}
1753
5ce9450f
JJ
1754
1755static tree permute_vec_elements (tree, tree, tree, gimple,
1756 gimple_stmt_iterator *);
1757
1758
1759/* Function vectorizable_mask_load_store.
1760
1761 Check if STMT performs a conditional load or store that can be vectorized.
1762 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1763 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1764 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1765
1766static bool
1767vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1768 gimple *vec_stmt, slp_tree slp_node)
1769{
1770 tree vec_dest = NULL;
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1772 stmt_vec_info prev_stmt_info;
1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1776 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1778 tree elem_type;
1779 gimple new_stmt;
1780 tree dummy;
1781 tree dataref_ptr = NULL_TREE;
1782 gimple ptr_incr;
1783 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1784 int ncopies;
1785 int i, j;
1786 bool inv_p;
1787 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1788 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1789 int gather_scale = 1;
1790 enum vect_def_type gather_dt = vect_unknown_def_type;
1791 bool is_store;
1792 tree mask;
1793 gimple def_stmt;
1794 tree def;
1795 enum vect_def_type dt;
1796
1797 if (slp_node != NULL)
1798 return false;
1799
1800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1801 gcc_assert (ncopies >= 1);
1802
1803 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1804 mask = gimple_call_arg (stmt, 2);
1805 if (TYPE_PRECISION (TREE_TYPE (mask))
1806 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1807 return false;
1808
1809 /* FORNOW. This restriction should be relaxed. */
1810 if (nested_in_vect_loop && ncopies > 1)
1811 {
1812 if (dump_enabled_p ())
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "multiple types in nested loop.");
1815 return false;
1816 }
1817
1818 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1819 return false;
1820
1821 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1822 return false;
1823
1824 if (!STMT_VINFO_DATA_REF (stmt_info))
1825 return false;
1826
1827 elem_type = TREE_TYPE (vectype);
1828
1829 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1830 return false;
1831
f2e2a985 1832 if (STMT_VINFO_STRIDED_P (stmt_info))
5ce9450f
JJ
1833 return false;
1834
1835 if (STMT_VINFO_GATHER_P (stmt_info))
1836 {
1837 gimple def_stmt;
1838 tree def;
1839 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1840 &gather_off, &gather_scale);
1841 gcc_assert (gather_decl);
1842 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1843 &def_stmt, &def, &gather_dt,
1844 &gather_off_vectype))
1845 {
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848 "gather index use not simple.");
1849 return false;
1850 }
03b9e8e4
JJ
1851
1852 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1853 tree masktype
1854 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1855 if (TREE_CODE (masktype) == INTEGER_TYPE)
1856 {
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "masked gather with integer mask not supported.");
1860 return false;
1861 }
5ce9450f
JJ
1862 }
1863 else if (tree_int_cst_compare (nested_in_vect_loop
1864 ? STMT_VINFO_DR_STEP (stmt_info)
1865 : DR_STEP (dr), size_zero_node) <= 0)
1866 return false;
1867 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1868 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1869 return false;
1870
1871 if (TREE_CODE (mask) != SSA_NAME)
1872 return false;
1873
1874 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1875 &def_stmt, &def, &dt))
1876 return false;
1877
1878 if (is_store)
1879 {
1880 tree rhs = gimple_call_arg (stmt, 3);
1881 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1882 &def_stmt, &def, &dt))
1883 return false;
1884 }
1885
1886 if (!vec_stmt) /* transformation not required. */
1887 {
1888 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1889 if (is_store)
1890 vect_model_store_cost (stmt_info, ncopies, false, dt,
1891 NULL, NULL, NULL);
1892 else
1893 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1894 return true;
1895 }
1896
1897 /** Transform. **/
1898
1899 if (STMT_VINFO_GATHER_P (stmt_info))
1900 {
1901 tree vec_oprnd0 = NULL_TREE, op;
1902 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1903 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1904 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1905 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1906 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1907 edge pe = loop_preheader_edge (loop);
1908 gimple_seq seq;
1909 basic_block new_bb;
1910 enum { NARROW, NONE, WIDEN } modifier;
1911 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1912
acdcd61b
JJ
1913 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1914 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1918 scaletype = TREE_VALUE (arglist);
1919 gcc_checking_assert (types_compatible_p (srctype, rettype)
1920 && types_compatible_p (srctype, masktype));
1921
5ce9450f
JJ
1922 if (nunits == gather_off_nunits)
1923 modifier = NONE;
1924 else if (nunits == gather_off_nunits / 2)
1925 {
1926 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1927 modifier = WIDEN;
1928
1929 for (i = 0; i < gather_off_nunits; ++i)
1930 sel[i] = i | nunits;
1931
557be5a8 1932 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1933 }
1934 else if (nunits == gather_off_nunits * 2)
1935 {
1936 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1937 modifier = NARROW;
1938
1939 for (i = 0; i < nunits; ++i)
1940 sel[i] = i < gather_off_nunits
1941 ? i : i + nunits - gather_off_nunits;
1942
557be5a8 1943 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1944 ncopies *= 2;
acdcd61b
JJ
1945 for (i = 0; i < nunits; ++i)
1946 sel[i] = i | gather_off_nunits;
557be5a8 1947 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1948 }
1949 else
1950 gcc_unreachable ();
1951
5ce9450f
JJ
1952 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1953
1954 ptr = fold_convert (ptrtype, gather_base);
1955 if (!is_gimple_min_invariant (ptr))
1956 {
1957 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1958 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1959 gcc_assert (!new_bb);
1960 }
1961
1962 scale = build_int_cst (scaletype, gather_scale);
1963
1964 prev_stmt_info = NULL;
1965 for (j = 0; j < ncopies; ++j)
1966 {
1967 if (modifier == WIDEN && (j & 1))
1968 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1969 perm_mask, stmt, gsi);
1970 else if (j == 0)
1971 op = vec_oprnd0
1972 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1973 else
1974 op = vec_oprnd0
1975 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1976
1977 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1978 {
1979 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1980 == TYPE_VECTOR_SUBPARTS (idxtype));
1981 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 1982 var = make_ssa_name (var);
5ce9450f
JJ
1983 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1984 new_stmt
0d0e4a03 1985 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987 op = var;
1988 }
1989
acdcd61b
JJ
1990 if (mask_perm_mask && (j & 1))
1991 mask_op = permute_vec_elements (mask_op, mask_op,
1992 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1993 else
1994 {
acdcd61b
JJ
1995 if (j == 0)
1996 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1997 else
1998 {
1999 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2000 &def_stmt, &def, &dt);
2001 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2002 }
5ce9450f 2003
acdcd61b
JJ
2004 mask_op = vec_mask;
2005 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2006 {
2007 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2008 == TYPE_VECTOR_SUBPARTS (masktype));
2009 var = vect_get_new_vect_var (masktype, vect_simple_var,
2010 NULL);
b731b390 2011 var = make_ssa_name (var);
acdcd61b
JJ
2012 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2013 new_stmt
0d0e4a03 2014 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2016 mask_op = var;
2017 }
5ce9450f
JJ
2018 }
2019
2020 new_stmt
2021 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2022 scale);
2023
2024 if (!useless_type_conversion_p (vectype, rettype))
2025 {
2026 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2027 == TYPE_VECTOR_SUBPARTS (rettype));
2028 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2029 op = make_ssa_name (var, new_stmt);
2030 gimple_call_set_lhs (new_stmt, op);
2031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2032 var = make_ssa_name (vec_dest);
5ce9450f 2033 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2034 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2035 }
2036 else
2037 {
2038 var = make_ssa_name (vec_dest, new_stmt);
2039 gimple_call_set_lhs (new_stmt, var);
2040 }
2041
2042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2043
2044 if (modifier == NARROW)
2045 {
2046 if ((j & 1) == 0)
2047 {
2048 prev_res = var;
2049 continue;
2050 }
2051 var = permute_vec_elements (prev_res, var,
2052 perm_mask, stmt, gsi);
2053 new_stmt = SSA_NAME_DEF_STMT (var);
2054 }
2055
2056 if (prev_stmt_info == NULL)
2057 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2058 else
2059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2060 prev_stmt_info = vinfo_for_stmt (new_stmt);
2061 }
3efe2e2c
JJ
2062
2063 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2064 from the IL. */
2065 tree lhs = gimple_call_lhs (stmt);
2066 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2067 set_vinfo_for_stmt (new_stmt, stmt_info);
2068 set_vinfo_for_stmt (stmt, NULL);
2069 STMT_VINFO_STMT (stmt_info) = new_stmt;
2070 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2071 return true;
2072 }
2073 else if (is_store)
2074 {
2075 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2076 prev_stmt_info = NULL;
2077 for (i = 0; i < ncopies; i++)
2078 {
2079 unsigned align, misalign;
2080
2081 if (i == 0)
2082 {
2083 tree rhs = gimple_call_arg (stmt, 3);
2084 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2085 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2086 /* We should have catched mismatched types earlier. */
2087 gcc_assert (useless_type_conversion_p (vectype,
2088 TREE_TYPE (vec_rhs)));
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2093 }
2094 else
2095 {
2096 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2097 &def, &dt);
2098 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2099 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2100 &def, &dt);
2101 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2102 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2103 TYPE_SIZE_UNIT (vectype));
2104 }
2105
2106 align = TYPE_ALIGN_UNIT (vectype);
2107 if (aligned_access_p (dr))
2108 misalign = 0;
2109 else if (DR_MISALIGNMENT (dr) == -1)
2110 {
2111 align = TYPE_ALIGN_UNIT (elem_type);
2112 misalign = 0;
2113 }
2114 else
2115 misalign = DR_MISALIGNMENT (dr);
2116 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2117 misalign);
2118 new_stmt
2119 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2120 gimple_call_arg (stmt, 1),
2121 vec_mask, vec_rhs);
2122 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2123 if (i == 0)
2124 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2125 else
2126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2127 prev_stmt_info = vinfo_for_stmt (new_stmt);
2128 }
2129 }
2130 else
2131 {
2132 tree vec_mask = NULL_TREE;
2133 prev_stmt_info = NULL;
2134 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2135 for (i = 0; i < ncopies; i++)
2136 {
2137 unsigned align, misalign;
2138
2139 if (i == 0)
2140 {
2141 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2142 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2143 NULL_TREE, &dummy, gsi,
2144 &ptr_incr, false, &inv_p);
2145 gcc_assert (!inv_p);
2146 }
2147 else
2148 {
2149 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2150 &def, &dt);
2151 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2153 TYPE_SIZE_UNIT (vectype));
2154 }
2155
2156 align = TYPE_ALIGN_UNIT (vectype);
2157 if (aligned_access_p (dr))
2158 misalign = 0;
2159 else if (DR_MISALIGNMENT (dr) == -1)
2160 {
2161 align = TYPE_ALIGN_UNIT (elem_type);
2162 misalign = 0;
2163 }
2164 else
2165 misalign = DR_MISALIGNMENT (dr);
2166 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2167 misalign);
2168 new_stmt
2169 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2170 gimple_call_arg (stmt, 1),
2171 vec_mask);
b731b390 2172 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2174 if (i == 0)
2175 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2176 else
2177 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2178 prev_stmt_info = vinfo_for_stmt (new_stmt);
2179 }
2180 }
2181
3efe2e2c
JJ
2182 if (!is_store)
2183 {
2184 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2185 from the IL. */
2186 tree lhs = gimple_call_lhs (stmt);
2187 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2188 set_vinfo_for_stmt (new_stmt, stmt_info);
2189 set_vinfo_for_stmt (stmt, NULL);
2190 STMT_VINFO_STMT (stmt_info) = new_stmt;
2191 gsi_replace (gsi, new_stmt, true);
2192 }
2193
5ce9450f
JJ
2194 return true;
2195}
2196
2197
ebfd146a
IR
2198/* Function vectorizable_call.
2199
538dd0b7 2200 Check if GS performs a function call that can be vectorized.
b8698a0f 2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2204
2205static bool
538dd0b7 2206vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
190c2236 2207 slp_tree slp_node)
ebfd146a 2208{
538dd0b7 2209 gcall *stmt;
ebfd146a
IR
2210 tree vec_dest;
2211 tree scalar_dest;
2212 tree op, type;
2213 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2214 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2215 tree vectype_out, vectype_in;
2216 int nunits_in;
2217 int nunits_out;
2218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b690cc0f 2220 tree fndecl, new_temp, def, rhs_type;
ebfd146a 2221 gimple def_stmt;
0502fb85
UB
2222 enum vect_def_type dt[3]
2223 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 2224 gimple new_stmt = NULL;
ebfd146a 2225 int ncopies, j;
6e1aa848 2226 vec<tree> vargs = vNULL;
ebfd146a
IR
2227 enum { NARROW, NONE, WIDEN } modifier;
2228 size_t i, nargs;
9d5e7640 2229 tree lhs;
ebfd146a 2230
190c2236 2231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2232 return false;
2233
8644a673 2234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2235 return false;
2236
538dd0b7
DM
2237 /* Is GS a vectorizable call? */
2238 stmt = dyn_cast <gcall *> (gs);
2239 if (!stmt)
ebfd146a
IR
2240 return false;
2241
5ce9450f
JJ
2242 if (gimple_call_internal_p (stmt)
2243 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2244 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2245 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2246 slp_node);
2247
0136f8f0
AH
2248 if (gimple_call_lhs (stmt) == NULL_TREE
2249 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2250 return false;
2251
0136f8f0 2252 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2253
b690cc0f
RG
2254 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2255
ebfd146a
IR
2256 /* Process function arguments. */
2257 rhs_type = NULL_TREE;
b690cc0f 2258 vectype_in = NULL_TREE;
ebfd146a
IR
2259 nargs = gimple_call_num_args (stmt);
2260
1b1562a5
MM
2261 /* Bail out if the function has more than three arguments, we do not have
2262 interesting builtin functions to vectorize with more than two arguments
2263 except for fma. No arguments is also not good. */
2264 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2265 return false;
2266
74bf76ed
JJ
2267 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2268 if (gimple_call_internal_p (stmt)
2269 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2270 {
2271 nargs = 0;
2272 rhs_type = unsigned_type_node;
2273 }
2274
ebfd146a
IR
2275 for (i = 0; i < nargs; i++)
2276 {
b690cc0f
RG
2277 tree opvectype;
2278
ebfd146a
IR
2279 op = gimple_call_arg (stmt, i);
2280
2281 /* We can only handle calls with arguments of the same type. */
2282 if (rhs_type
8533c9d8 2283 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2284 {
73fbfcad 2285 if (dump_enabled_p ())
78c60e3d 2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2287 "argument types differ.\n");
ebfd146a
IR
2288 return false;
2289 }
b690cc0f
RG
2290 if (!rhs_type)
2291 rhs_type = TREE_TYPE (op);
ebfd146a 2292
24ee1384 2293 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b690cc0f 2294 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a 2295 {
73fbfcad 2296 if (dump_enabled_p ())
78c60e3d 2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2298 "use not simple.\n");
ebfd146a
IR
2299 return false;
2300 }
ebfd146a 2301
b690cc0f
RG
2302 if (!vectype_in)
2303 vectype_in = opvectype;
2304 else if (opvectype
2305 && opvectype != vectype_in)
2306 {
73fbfcad 2307 if (dump_enabled_p ())
78c60e3d 2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2309 "argument vector types differ.\n");
b690cc0f
RG
2310 return false;
2311 }
2312 }
2313 /* If all arguments are external or constant defs use a vector type with
2314 the same size as the output vector type. */
ebfd146a 2315 if (!vectype_in)
b690cc0f 2316 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2317 if (vec_stmt)
2318 gcc_assert (vectype_in);
2319 if (!vectype_in)
2320 {
73fbfcad 2321 if (dump_enabled_p ())
7d8930a0 2322 {
78c60e3d
SS
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "no vectype for scalar type ");
2325 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2326 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2327 }
2328
2329 return false;
2330 }
ebfd146a
IR
2331
2332 /* FORNOW */
b690cc0f
RG
2333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2334 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2335 if (nunits_in == nunits_out / 2)
2336 modifier = NARROW;
2337 else if (nunits_out == nunits_in)
2338 modifier = NONE;
2339 else if (nunits_out == nunits_in / 2)
2340 modifier = WIDEN;
2341 else
2342 return false;
2343
2344 /* For now, we only vectorize functions if a target specific builtin
2345 is available. TODO -- in some cases, it might be profitable to
2346 insert the calls for pieces of the vector, in order to be able
2347 to vectorize other operations in the loop. */
2348 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2349 if (fndecl == NULL_TREE)
2350 {
74bf76ed
JJ
2351 if (gimple_call_internal_p (stmt)
2352 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2353 && !slp_node
2354 && loop_vinfo
2355 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2356 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2357 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2358 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2359 {
2360 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2361 { 0, 1, 2, ... vf - 1 } vector. */
2362 gcc_assert (nargs == 0);
2363 }
2364 else
2365 {
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2368 "function is not vectorizable.\n");
74bf76ed
JJ
2369 return false;
2370 }
ebfd146a
IR
2371 }
2372
5006671f 2373 gcc_assert (!gimple_vuse (stmt));
ebfd146a 2374
190c2236
JJ
2375 if (slp_node || PURE_SLP_STMT (stmt_info))
2376 ncopies = 1;
2377 else if (modifier == NARROW)
ebfd146a
IR
2378 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2379 else
2380 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2381
2382 /* Sanity check: make sure that at least one copy of the vectorized stmt
2383 needs to be generated. */
2384 gcc_assert (ncopies >= 1);
2385
2386 if (!vec_stmt) /* transformation not required. */
2387 {
2388 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2389 if (dump_enabled_p ())
e645e942
TJ
2390 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2391 "\n");
c3e7ee41 2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
2393 return true;
2394 }
2395
2396 /** Transform. **/
2397
73fbfcad 2398 if (dump_enabled_p ())
e645e942 2399 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2400
2401 /* Handle def. */
2402 scalar_dest = gimple_call_lhs (stmt);
2403 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2404
2405 prev_stmt_info = NULL;
2406 switch (modifier)
2407 {
2408 case NONE:
2409 for (j = 0; j < ncopies; ++j)
2410 {
2411 /* Build argument list for the vectorized call. */
2412 if (j == 0)
9771b263 2413 vargs.create (nargs);
ebfd146a 2414 else
9771b263 2415 vargs.truncate (0);
ebfd146a 2416
190c2236
JJ
2417 if (slp_node)
2418 {
ef062b13 2419 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2420 vec<tree> vec_oprnds0;
190c2236
JJ
2421
2422 for (i = 0; i < nargs; i++)
9771b263 2423 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2424 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2425 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2426
2427 /* Arguments are ready. Create the new vector stmt. */
9771b263 2428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2429 {
2430 size_t k;
2431 for (k = 0; k < nargs; k++)
2432 {
37b5ec8f 2433 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2434 vargs[k] = vec_oprndsk[i];
190c2236
JJ
2435 }
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2441 }
2442
2443 for (i = 0; i < nargs; i++)
2444 {
37b5ec8f 2445 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2446 vec_oprndsi.release ();
190c2236 2447 }
190c2236
JJ
2448 continue;
2449 }
2450
ebfd146a
IR
2451 for (i = 0; i < nargs; i++)
2452 {
2453 op = gimple_call_arg (stmt, i);
2454 if (j == 0)
2455 vec_oprnd0
2456 = vect_get_vec_def_for_operand (op, stmt, NULL);
2457 else
63827fb8
IR
2458 {
2459 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2460 vec_oprnd0
2461 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2462 }
ebfd146a 2463
9771b263 2464 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2465 }
2466
74bf76ed
JJ
2467 if (gimple_call_internal_p (stmt)
2468 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2469 {
2470 tree *v = XALLOCAVEC (tree, nunits_out);
2471 int k;
2472 for (k = 0; k < nunits_out; ++k)
2473 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2474 tree cst = build_vector (vectype_out, v);
2475 tree new_var
2476 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2477 gimple init_stmt = gimple_build_assign (new_var, cst);
2478 new_temp = make_ssa_name (new_var, init_stmt);
2479 gimple_assign_set_lhs (init_stmt, new_temp);
2480 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2481 new_temp = make_ssa_name (vec_dest);
74bf76ed
JJ
2482 new_stmt = gimple_build_assign (new_temp,
2483 gimple_assign_lhs (init_stmt));
2484 }
2485 else
2486 {
2487 new_stmt = gimple_build_call_vec (fndecl, vargs);
2488 new_temp = make_ssa_name (vec_dest, new_stmt);
2489 gimple_call_set_lhs (new_stmt, new_temp);
2490 }
ebfd146a
IR
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2492
2493 if (j == 0)
2494 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2495 else
2496 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2497
2498 prev_stmt_info = vinfo_for_stmt (new_stmt);
2499 }
2500
2501 break;
2502
2503 case NARROW:
2504 for (j = 0; j < ncopies; ++j)
2505 {
2506 /* Build argument list for the vectorized call. */
2507 if (j == 0)
9771b263 2508 vargs.create (nargs * 2);
ebfd146a 2509 else
9771b263 2510 vargs.truncate (0);
ebfd146a 2511
190c2236
JJ
2512 if (slp_node)
2513 {
ef062b13 2514 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2515 vec<tree> vec_oprnds0;
190c2236
JJ
2516
2517 for (i = 0; i < nargs; i++)
9771b263 2518 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2519 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2520 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2521
2522 /* Arguments are ready. Create the new vector stmt. */
9771b263 2523 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2524 {
2525 size_t k;
9771b263 2526 vargs.truncate (0);
190c2236
JJ
2527 for (k = 0; k < nargs; k++)
2528 {
37b5ec8f 2529 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2530 vargs.quick_push (vec_oprndsk[i]);
2531 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236
JJ
2532 }
2533 new_stmt = gimple_build_call_vec (fndecl, vargs);
2534 new_temp = make_ssa_name (vec_dest, new_stmt);
2535 gimple_call_set_lhs (new_stmt, new_temp);
2536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2537 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2538 }
2539
2540 for (i = 0; i < nargs; i++)
2541 {
37b5ec8f 2542 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2543 vec_oprndsi.release ();
190c2236 2544 }
190c2236
JJ
2545 continue;
2546 }
2547
ebfd146a
IR
2548 for (i = 0; i < nargs; i++)
2549 {
2550 op = gimple_call_arg (stmt, i);
2551 if (j == 0)
2552 {
2553 vec_oprnd0
2554 = vect_get_vec_def_for_operand (op, stmt, NULL);
2555 vec_oprnd1
63827fb8 2556 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2557 }
2558 else
2559 {
336ecb65 2560 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2561 vec_oprnd0
63827fb8 2562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2563 vec_oprnd1
63827fb8 2564 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2565 }
2566
9771b263
DN
2567 vargs.quick_push (vec_oprnd0);
2568 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2569 }
2570
2571 new_stmt = gimple_build_call_vec (fndecl, vargs);
2572 new_temp = make_ssa_name (vec_dest, new_stmt);
2573 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2575
2576 if (j == 0)
2577 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2578 else
2579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2580
2581 prev_stmt_info = vinfo_for_stmt (new_stmt);
2582 }
2583
2584 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2585
2586 break;
2587
2588 case WIDEN:
2589 /* No current target implements this case. */
2590 return false;
2591 }
2592
9771b263 2593 vargs.release ();
ebfd146a 2594
ebfd146a
IR
2595 /* The call in STMT might prevent it from being removed in dce.
2596 We however cannot remove it here, due to the way the ssa name
2597 it defines is mapped to the new definition. So just replace
2598 rhs of the statement with something harmless. */
2599
dd34c087
JJ
2600 if (slp_node)
2601 return true;
2602
ebfd146a 2603 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2604 if (is_pattern_stmt_p (stmt_info))
2605 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2606 else
2607 lhs = gimple_call_lhs (stmt);
2608 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2609 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2610 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2611 STMT_VINFO_STMT (stmt_info) = new_stmt;
2612 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2613
2614 return true;
2615}
2616
2617
0136f8f0
AH
2618struct simd_call_arg_info
2619{
2620 tree vectype;
2621 tree op;
2622 enum vect_def_type dt;
2623 HOST_WIDE_INT linear_step;
2624 unsigned int align;
2625};
2626
2627/* Function vectorizable_simd_clone_call.
2628
2629 Check if STMT performs a function call that can be vectorized
2630 by calling a simd clone of the function.
2631 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2632 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2633 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2634
2635static bool
2636vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2637 gimple *vec_stmt, slp_tree slp_node)
2638{
2639 tree vec_dest;
2640 tree scalar_dest;
2641 tree op, type;
2642 tree vec_oprnd0 = NULL_TREE;
2643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2644 tree vectype;
2645 unsigned int nunits;
2646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2647 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2648 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2649 tree fndecl, new_temp, def;
2650 gimple def_stmt;
2651 gimple new_stmt = NULL;
2652 int ncopies, j;
2653 vec<simd_call_arg_info> arginfo = vNULL;
2654 vec<tree> vargs = vNULL;
2655 size_t i, nargs;
2656 tree lhs, rtype, ratype;
2657 vec<constructor_elt, va_gc> *ret_ctor_elts;
2658
2659 /* Is STMT a vectorizable call? */
2660 if (!is_gimple_call (stmt))
2661 return false;
2662
2663 fndecl = gimple_call_fndecl (stmt);
2664 if (fndecl == NULL_TREE)
2665 return false;
2666
d52f5295 2667 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2668 if (node == NULL || node->simd_clones == NULL)
2669 return false;
2670
2671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2672 return false;
2673
2674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2675 return false;
2676
2677 if (gimple_call_lhs (stmt)
2678 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2679 return false;
2680
2681 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2682
2683 vectype = STMT_VINFO_VECTYPE (stmt_info);
2684
2685 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2686 return false;
2687
2688 /* FORNOW */
2689 if (slp_node || PURE_SLP_STMT (stmt_info))
2690 return false;
2691
2692 /* Process function arguments. */
2693 nargs = gimple_call_num_args (stmt);
2694
2695 /* Bail out if the function has zero arguments. */
2696 if (nargs == 0)
2697 return false;
2698
2699 arginfo.create (nargs);
2700
2701 for (i = 0; i < nargs; i++)
2702 {
2703 simd_call_arg_info thisarginfo;
2704 affine_iv iv;
2705
2706 thisarginfo.linear_step = 0;
2707 thisarginfo.align = 0;
2708 thisarginfo.op = NULL_TREE;
2709
2710 op = gimple_call_arg (stmt, i);
2711 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2712 &def_stmt, &def, &thisarginfo.dt,
2713 &thisarginfo.vectype)
2714 || thisarginfo.dt == vect_uninitialized_def)
2715 {
2716 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2718 "use not simple.\n");
2719 arginfo.release ();
2720 return false;
2721 }
2722
2723 if (thisarginfo.dt == vect_constant_def
2724 || thisarginfo.dt == vect_external_def)
2725 gcc_assert (thisarginfo.vectype == NULL_TREE);
2726 else
2727 gcc_assert (thisarginfo.vectype != NULL_TREE);
2728
6c9e85fb
JJ
2729 /* For linear arguments, the analyze phase should have saved
2730 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2731 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2732 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2733 {
2734 gcc_assert (vec_stmt);
2735 thisarginfo.linear_step
2736 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2737 thisarginfo.op
2738 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2739 /* If loop has been peeled for alignment, we need to adjust it. */
2740 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2741 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2742 if (n1 != n2)
2743 {
2744 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2745 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2746 tree opt = TREE_TYPE (thisarginfo.op);
2747 bias = fold_convert (TREE_TYPE (step), bias);
2748 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2749 thisarginfo.op
2750 = fold_build2 (POINTER_TYPE_P (opt)
2751 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2752 thisarginfo.op, bias);
2753 }
2754 }
2755 else if (!vec_stmt
2756 && thisarginfo.dt != vect_constant_def
2757 && thisarginfo.dt != vect_external_def
2758 && loop_vinfo
2759 && TREE_CODE (op) == SSA_NAME
2760 && simple_iv (loop, loop_containing_stmt (stmt), op,
2761 &iv, false)
2762 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2763 {
2764 thisarginfo.linear_step = tree_to_shwi (iv.step);
2765 thisarginfo.op = iv.base;
2766 }
2767 else if ((thisarginfo.dt == vect_constant_def
2768 || thisarginfo.dt == vect_external_def)
2769 && POINTER_TYPE_P (TREE_TYPE (op)))
2770 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2771
2772 arginfo.quick_push (thisarginfo);
2773 }
2774
2775 unsigned int badness = 0;
2776 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2777 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2778 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2779 else
2780 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2781 n = n->simdclone->next_clone)
2782 {
2783 unsigned int this_badness = 0;
2784 if (n->simdclone->simdlen
2785 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2786 || n->simdclone->nargs != nargs)
2787 continue;
2788 if (n->simdclone->simdlen
2789 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2791 - exact_log2 (n->simdclone->simdlen)) * 1024;
2792 if (n->simdclone->inbranch)
2793 this_badness += 2048;
2794 int target_badness = targetm.simd_clone.usable (n);
2795 if (target_badness < 0)
2796 continue;
2797 this_badness += target_badness * 512;
2798 /* FORNOW: Have to add code to add the mask argument. */
2799 if (n->simdclone->inbranch)
2800 continue;
2801 for (i = 0; i < nargs; i++)
2802 {
2803 switch (n->simdclone->args[i].arg_type)
2804 {
2805 case SIMD_CLONE_ARG_TYPE_VECTOR:
2806 if (!useless_type_conversion_p
2807 (n->simdclone->args[i].orig_type,
2808 TREE_TYPE (gimple_call_arg (stmt, i))))
2809 i = -1;
2810 else if (arginfo[i].dt == vect_constant_def
2811 || arginfo[i].dt == vect_external_def
2812 || arginfo[i].linear_step)
2813 this_badness += 64;
2814 break;
2815 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2816 if (arginfo[i].dt != vect_constant_def
2817 && arginfo[i].dt != vect_external_def)
2818 i = -1;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2821 if (arginfo[i].dt == vect_constant_def
2822 || arginfo[i].dt == vect_external_def
2823 || (arginfo[i].linear_step
2824 != n->simdclone->args[i].linear_step))
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2828 /* FORNOW */
2829 i = -1;
2830 break;
2831 case SIMD_CLONE_ARG_TYPE_MASK:
2832 gcc_unreachable ();
2833 }
2834 if (i == (size_t) -1)
2835 break;
2836 if (n->simdclone->args[i].alignment > arginfo[i].align)
2837 {
2838 i = -1;
2839 break;
2840 }
2841 if (arginfo[i].align)
2842 this_badness += (exact_log2 (arginfo[i].align)
2843 - exact_log2 (n->simdclone->args[i].alignment));
2844 }
2845 if (i == (size_t) -1)
2846 continue;
2847 if (bestn == NULL || this_badness < badness)
2848 {
2849 bestn = n;
2850 badness = this_badness;
2851 }
2852 }
2853
2854 if (bestn == NULL)
2855 {
2856 arginfo.release ();
2857 return false;
2858 }
2859
2860 for (i = 0; i < nargs; i++)
2861 if ((arginfo[i].dt == vect_constant_def
2862 || arginfo[i].dt == vect_external_def)
2863 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2864 {
2865 arginfo[i].vectype
2866 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2867 i)));
2868 if (arginfo[i].vectype == NULL
2869 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2870 > bestn->simdclone->simdlen))
2871 {
2872 arginfo.release ();
2873 return false;
2874 }
2875 }
2876
2877 fndecl = bestn->decl;
2878 nunits = bestn->simdclone->simdlen;
2879 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2880
2881 /* If the function isn't const, only allow it in simd loops where user
2882 has asserted that at least nunits consecutive iterations can be
2883 performed using SIMD instructions. */
2884 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2885 && gimple_vuse (stmt))
2886 {
2887 arginfo.release ();
2888 return false;
2889 }
2890
2891 /* Sanity check: make sure that at least one copy of the vectorized stmt
2892 needs to be generated. */
2893 gcc_assert (ncopies >= 1);
2894
2895 if (!vec_stmt) /* transformation not required. */
2896 {
6c9e85fb
JJ
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2898 for (i = 0; i < nargs; i++)
2899 if (bestn->simdclone->args[i].arg_type
2900 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2901 {
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2903 + 1);
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2905 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2906 ? size_type_node : TREE_TYPE (arginfo[i].op);
2907 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2908 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2909 }
0136f8f0
AH
2910 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2911 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "=== vectorizable_simd_clone_call ===\n");
2914/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2915 arginfo.release ();
2916 return true;
2917 }
2918
2919 /** Transform. **/
2920
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2923
2924 /* Handle def. */
2925 scalar_dest = gimple_call_lhs (stmt);
2926 vec_dest = NULL_TREE;
2927 rtype = NULL_TREE;
2928 ratype = NULL_TREE;
2929 if (scalar_dest)
2930 {
2931 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2932 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2933 if (TREE_CODE (rtype) == ARRAY_TYPE)
2934 {
2935 ratype = rtype;
2936 rtype = TREE_TYPE (ratype);
2937 }
2938 }
2939
2940 prev_stmt_info = NULL;
2941 for (j = 0; j < ncopies; ++j)
2942 {
2943 /* Build argument list for the vectorized call. */
2944 if (j == 0)
2945 vargs.create (nargs);
2946 else
2947 vargs.truncate (0);
2948
2949 for (i = 0; i < nargs; i++)
2950 {
2951 unsigned int k, l, m, o;
2952 tree atype;
2953 op = gimple_call_arg (stmt, i);
2954 switch (bestn->simdclone->args[i].arg_type)
2955 {
2956 case SIMD_CLONE_ARG_TYPE_VECTOR:
2957 atype = bestn->simdclone->args[i].vector_type;
2958 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2959 for (m = j * o; m < (j + 1) * o; m++)
2960 {
2961 if (TYPE_VECTOR_SUBPARTS (atype)
2962 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2963 {
2964 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2965 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2966 / TYPE_VECTOR_SUBPARTS (atype));
2967 gcc_assert ((k & (k - 1)) == 0);
2968 if (m == 0)
2969 vec_oprnd0
2970 = vect_get_vec_def_for_operand (op, stmt, NULL);
2971 else
2972 {
2973 vec_oprnd0 = arginfo[i].op;
2974 if ((m & (k - 1)) == 0)
2975 vec_oprnd0
2976 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2977 vec_oprnd0);
2978 }
2979 arginfo[i].op = vec_oprnd0;
2980 vec_oprnd0
2981 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2982 size_int (prec),
2983 bitsize_int ((m & (k - 1)) * prec));
2984 new_stmt
b731b390 2985 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
2986 vec_oprnd0);
2987 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2988 vargs.safe_push (gimple_assign_lhs (new_stmt));
2989 }
2990 else
2991 {
2992 k = (TYPE_VECTOR_SUBPARTS (atype)
2993 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2994 gcc_assert ((k & (k - 1)) == 0);
2995 vec<constructor_elt, va_gc> *ctor_elts;
2996 if (k != 1)
2997 vec_alloc (ctor_elts, k);
2998 else
2999 ctor_elts = NULL;
3000 for (l = 0; l < k; l++)
3001 {
3002 if (m == 0 && l == 0)
3003 vec_oprnd0
3004 = vect_get_vec_def_for_operand (op, stmt, NULL);
3005 else
3006 vec_oprnd0
3007 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3008 arginfo[i].op);
3009 arginfo[i].op = vec_oprnd0;
3010 if (k == 1)
3011 break;
3012 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3013 vec_oprnd0);
3014 }
3015 if (k == 1)
3016 vargs.safe_push (vec_oprnd0);
3017 else
3018 {
3019 vec_oprnd0 = build_constructor (atype, ctor_elts);
3020 new_stmt
b731b390 3021 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3022 vec_oprnd0);
3023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3024 vargs.safe_push (gimple_assign_lhs (new_stmt));
3025 }
3026 }
3027 }
3028 break;
3029 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3030 vargs.safe_push (op);
3031 break;
3032 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3033 if (j == 0)
3034 {
3035 gimple_seq stmts;
3036 arginfo[i].op
3037 = force_gimple_operand (arginfo[i].op, &stmts, true,
3038 NULL_TREE);
3039 if (stmts != NULL)
3040 {
3041 basic_block new_bb;
3042 edge pe = loop_preheader_edge (loop);
3043 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3044 gcc_assert (!new_bb);
3045 }
b731b390 3046 tree phi_res = copy_ssa_name (op);
538dd0b7 3047 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0
AH
3048 set_vinfo_for_stmt (new_phi,
3049 new_stmt_vec_info (new_phi, loop_vinfo,
3050 NULL));
3051 add_phi_arg (new_phi, arginfo[i].op,
3052 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3053 enum tree_code code
3054 = POINTER_TYPE_P (TREE_TYPE (op))
3055 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3056 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3057 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3058 widest_int cst
3059 = wi::mul (bestn->simdclone->args[i].linear_step,
3060 ncopies * nunits);
3061 tree tcst = wide_int_to_tree (type, cst);
b731b390 3062 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3063 new_stmt
3064 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3065 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3066 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3067 set_vinfo_for_stmt (new_stmt,
3068 new_stmt_vec_info (new_stmt, loop_vinfo,
3069 NULL));
3070 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3071 UNKNOWN_LOCATION);
3072 arginfo[i].op = phi_res;
3073 vargs.safe_push (phi_res);
3074 }
3075 else
3076 {
3077 enum tree_code code
3078 = POINTER_TYPE_P (TREE_TYPE (op))
3079 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3080 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3081 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3082 widest_int cst
3083 = wi::mul (bestn->simdclone->args[i].linear_step,
3084 j * nunits);
3085 tree tcst = wide_int_to_tree (type, cst);
b731b390 3086 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3087 new_stmt = gimple_build_assign (new_temp, code,
3088 arginfo[i].op, tcst);
0136f8f0
AH
3089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090 vargs.safe_push (new_temp);
3091 }
3092 break;
3093 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3094 default:
3095 gcc_unreachable ();
3096 }
3097 }
3098
3099 new_stmt = gimple_build_call_vec (fndecl, vargs);
3100 if (vec_dest)
3101 {
3102 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3103 if (ratype)
b731b390 3104 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3105 else if (TYPE_VECTOR_SUBPARTS (vectype)
3106 == TYPE_VECTOR_SUBPARTS (rtype))
3107 new_temp = make_ssa_name (vec_dest, new_stmt);
3108 else
3109 new_temp = make_ssa_name (rtype, new_stmt);
3110 gimple_call_set_lhs (new_stmt, new_temp);
3111 }
3112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3113
3114 if (vec_dest)
3115 {
3116 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3117 {
3118 unsigned int k, l;
3119 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3120 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3121 gcc_assert ((k & (k - 1)) == 0);
3122 for (l = 0; l < k; l++)
3123 {
3124 tree t;
3125 if (ratype)
3126 {
3127 t = build_fold_addr_expr (new_temp);
3128 t = build2 (MEM_REF, vectype, t,
3129 build_int_cst (TREE_TYPE (t),
3130 l * prec / BITS_PER_UNIT));
3131 }
3132 else
3133 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3134 size_int (prec), bitsize_int (l * prec));
3135 new_stmt
b731b390 3136 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3138 if (j == 0 && l == 0)
3139 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3140 else
3141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3142
3143 prev_stmt_info = vinfo_for_stmt (new_stmt);
3144 }
3145
3146 if (ratype)
3147 {
3148 tree clobber = build_constructor (ratype, NULL);
3149 TREE_THIS_VOLATILE (clobber) = 1;
3150 new_stmt = gimple_build_assign (new_temp, clobber);
3151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3152 }
3153 continue;
3154 }
3155 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3156 {
3157 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3158 / TYPE_VECTOR_SUBPARTS (rtype));
3159 gcc_assert ((k & (k - 1)) == 0);
3160 if ((j & (k - 1)) == 0)
3161 vec_alloc (ret_ctor_elts, k);
3162 if (ratype)
3163 {
3164 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3165 for (m = 0; m < o; m++)
3166 {
3167 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3168 size_int (m), NULL_TREE, NULL_TREE);
3169 new_stmt
b731b390 3170 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3172 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3173 gimple_assign_lhs (new_stmt));
3174 }
3175 tree clobber = build_constructor (ratype, NULL);
3176 TREE_THIS_VOLATILE (clobber) = 1;
3177 new_stmt = gimple_build_assign (new_temp, clobber);
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3179 }
3180 else
3181 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3182 if ((j & (k - 1)) != k - 1)
3183 continue;
3184 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3185 new_stmt
b731b390 3186 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3188
3189 if ((unsigned) j == k - 1)
3190 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3191 else
3192 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3193
3194 prev_stmt_info = vinfo_for_stmt (new_stmt);
3195 continue;
3196 }
3197 else if (ratype)
3198 {
3199 tree t = build_fold_addr_expr (new_temp);
3200 t = build2 (MEM_REF, vectype, t,
3201 build_int_cst (TREE_TYPE (t), 0));
3202 new_stmt
b731b390 3203 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3205 tree clobber = build_constructor (ratype, NULL);
3206 TREE_THIS_VOLATILE (clobber) = 1;
3207 vect_finish_stmt_generation (stmt,
3208 gimple_build_assign (new_temp,
3209 clobber), gsi);
3210 }
3211 }
3212
3213 if (j == 0)
3214 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3215 else
3216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3217
3218 prev_stmt_info = vinfo_for_stmt (new_stmt);
3219 }
3220
3221 vargs.release ();
3222
3223 /* The call in STMT might prevent it from being removed in dce.
3224 We however cannot remove it here, due to the way the ssa name
3225 it defines is mapped to the new definition. So just replace
3226 rhs of the statement with something harmless. */
3227
3228 if (slp_node)
3229 return true;
3230
3231 if (scalar_dest)
3232 {
3233 type = TREE_TYPE (scalar_dest);
3234 if (is_pattern_stmt_p (stmt_info))
3235 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3236 else
3237 lhs = gimple_call_lhs (stmt);
3238 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3239 }
3240 else
3241 new_stmt = gimple_build_nop ();
3242 set_vinfo_for_stmt (new_stmt, stmt_info);
3243 set_vinfo_for_stmt (stmt, NULL);
3244 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3245 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3246 unlink_stmt_vdef (stmt);
3247
3248 return true;
3249}
3250
3251
ebfd146a
IR
3252/* Function vect_gen_widened_results_half
3253
3254 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3255 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3256 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3257 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3258 needs to be created (DECL is a function-decl of a target-builtin).
3259 STMT is the original scalar stmt that we are vectorizing. */
3260
3261static gimple
3262vect_gen_widened_results_half (enum tree_code code,
3263 tree decl,
3264 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3265 tree vec_dest, gimple_stmt_iterator *gsi,
3266 gimple stmt)
b8698a0f 3267{
ebfd146a 3268 gimple new_stmt;
b8698a0f
L
3269 tree new_temp;
3270
3271 /* Generate half of the widened result: */
3272 if (code == CALL_EXPR)
3273 {
3274 /* Target specific support */
ebfd146a
IR
3275 if (op_type == binary_op)
3276 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3277 else
3278 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3279 new_temp = make_ssa_name (vec_dest, new_stmt);
3280 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3281 }
3282 else
ebfd146a 3283 {
b8698a0f
L
3284 /* Generic support */
3285 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3286 if (op_type != binary_op)
3287 vec_oprnd1 = NULL;
0d0e4a03 3288 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3289 new_temp = make_ssa_name (vec_dest, new_stmt);
3290 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3291 }
ebfd146a
IR
3292 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3293
ebfd146a
IR
3294 return new_stmt;
3295}
3296
4a00c761
JJ
3297
3298/* Get vectorized definitions for loop-based vectorization. For the first
3299 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3300 scalar operand), and for the rest we get a copy with
3301 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3302 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3303 The vectors are collected into VEC_OPRNDS. */
3304
3305static void
3306vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
9771b263 3307 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3308{
3309 tree vec_oprnd;
3310
3311 /* Get first vector operand. */
3312 /* All the vector operands except the very first one (that is scalar oprnd)
3313 are stmt copies. */
3314 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3315 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3316 else
3317 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3318
9771b263 3319 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3320
3321 /* Get second vector operand. */
3322 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3323 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3324
3325 *oprnd = vec_oprnd;
3326
3327 /* For conversion in multiple steps, continue to get operands
3328 recursively. */
3329 if (multi_step_cvt)
3330 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3331}
3332
3333
3334/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3335 For multi-step conversions store the resulting vectors and call the function
3336 recursively. */
3337
3338static void
9771b263 3339vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4a00c761 3340 int multi_step_cvt, gimple stmt,
9771b263 3341 vec<tree> vec_dsts,
4a00c761
JJ
3342 gimple_stmt_iterator *gsi,
3343 slp_tree slp_node, enum tree_code code,
3344 stmt_vec_info *prev_stmt_info)
3345{
3346 unsigned int i;
3347 tree vop0, vop1, new_tmp, vec_dest;
3348 gimple new_stmt;
3349 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3350
9771b263 3351 vec_dest = vec_dsts.pop ();
4a00c761 3352
9771b263 3353 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3354 {
3355 /* Create demotion operation. */
9771b263
DN
3356 vop0 = (*vec_oprnds)[i];
3357 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3358 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3359 new_tmp = make_ssa_name (vec_dest, new_stmt);
3360 gimple_assign_set_lhs (new_stmt, new_tmp);
3361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3362
3363 if (multi_step_cvt)
3364 /* Store the resulting vector for next recursive call. */
9771b263 3365 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3366 else
3367 {
3368 /* This is the last step of the conversion sequence. Store the
3369 vectors in SLP_NODE or in vector info of the scalar statement
3370 (or in STMT_VINFO_RELATED_STMT chain). */
3371 if (slp_node)
9771b263 3372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
cee62fee
MM
3373
3374 if (!*prev_stmt_info)
3375 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4a00c761 3376 else
cee62fee 3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3378
cee62fee 3379 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4a00c761
JJ
3380 }
3381 }
3382
3383 /* For multi-step demotion operations we first generate demotion operations
3384 from the source type to the intermediate types, and then combine the
3385 results (stored in VEC_OPRNDS) in demotion operation to the destination
3386 type. */
3387 if (multi_step_cvt)
3388 {
3389 /* At each level of recursion we have half of the operands we had at the
3390 previous level. */
9771b263 3391 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3392 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3393 stmt, vec_dsts, gsi, slp_node,
3394 VEC_PACK_TRUNC_EXPR,
3395 prev_stmt_info);
3396 }
3397
9771b263 3398 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3399}
3400
3401
3402/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3403 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3404 the resulting vectors and call the function recursively. */
3405
3406static void
9771b263
DN
3407vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3408 vec<tree> *vec_oprnds1,
4a00c761
JJ
3409 gimple stmt, tree vec_dest,
3410 gimple_stmt_iterator *gsi,
3411 enum tree_code code1,
3412 enum tree_code code2, tree decl1,
3413 tree decl2, int op_type)
3414{
3415 int i;
3416 tree vop0, vop1, new_tmp1, new_tmp2;
3417 gimple new_stmt1, new_stmt2;
6e1aa848 3418 vec<tree> vec_tmp = vNULL;
4a00c761 3419
9771b263
DN
3420 vec_tmp.create (vec_oprnds0->length () * 2);
3421 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3422 {
3423 if (op_type == binary_op)
9771b263 3424 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3425 else
3426 vop1 = NULL_TREE;
3427
3428 /* Generate the two halves of promotion operation. */
3429 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3430 op_type, vec_dest, gsi, stmt);
3431 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3432 op_type, vec_dest, gsi, stmt);
3433 if (is_gimple_call (new_stmt1))
3434 {
3435 new_tmp1 = gimple_call_lhs (new_stmt1);
3436 new_tmp2 = gimple_call_lhs (new_stmt2);
3437 }
3438 else
3439 {
3440 new_tmp1 = gimple_assign_lhs (new_stmt1);
3441 new_tmp2 = gimple_assign_lhs (new_stmt2);
3442 }
3443
3444 /* Store the results for the next step. */
9771b263
DN
3445 vec_tmp.quick_push (new_tmp1);
3446 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3447 }
3448
689eaba3 3449 vec_oprnds0->release ();
4a00c761
JJ
3450 *vec_oprnds0 = vec_tmp;
3451}
3452
3453
b8698a0f
L
3454/* Check if STMT performs a conversion operation, that can be vectorized.
3455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3456 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3458
3459static bool
3460vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3461 gimple *vec_stmt, slp_tree slp_node)
3462{
3463 tree vec_dest;
3464 tree scalar_dest;
4a00c761 3465 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3466 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3467 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3468 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3469 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3470 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3471 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3472 tree new_temp;
3473 tree def;
3474 gimple def_stmt;
3475 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3476 gimple new_stmt = NULL;
3477 stmt_vec_info prev_stmt_info;
3478 int nunits_in;
3479 int nunits_out;
3480 tree vectype_out, vectype_in;
4a00c761
JJ
3481 int ncopies, i, j;
3482 tree lhs_type, rhs_type;
ebfd146a 3483 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3484 vec<tree> vec_oprnds0 = vNULL;
3485 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3486 tree vop0;
4a00c761
JJ
3487 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3488 int multi_step_cvt = 0;
6e1aa848
DN
3489 vec<tree> vec_dsts = vNULL;
3490 vec<tree> interm_types = vNULL;
4a00c761
JJ
3491 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3492 int op_type;
ef4bddc2 3493 machine_mode rhs_mode;
4a00c761 3494 unsigned short fltsz;
ebfd146a
IR
3495
3496 /* Is STMT a vectorizable conversion? */
3497
4a00c761 3498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3499 return false;
3500
8644a673 3501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3502 return false;
3503
3504 if (!is_gimple_assign (stmt))
3505 return false;
3506
3507 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3508 return false;
3509
3510 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3511 if (!CONVERT_EXPR_CODE_P (code)
3512 && code != FIX_TRUNC_EXPR
3513 && code != FLOAT_EXPR
3514 && code != WIDEN_MULT_EXPR
3515 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3516 return false;
3517
4a00c761
JJ
3518 op_type = TREE_CODE_LENGTH (code);
3519
ebfd146a 3520 /* Check types of lhs and rhs. */
b690cc0f 3521 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3522 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3523 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3524
ebfd146a
IR
3525 op0 = gimple_assign_rhs1 (stmt);
3526 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3527
3528 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3529 && !((INTEGRAL_TYPE_P (lhs_type)
3530 && INTEGRAL_TYPE_P (rhs_type))
3531 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3532 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3533 return false;
3534
3535 if ((INTEGRAL_TYPE_P (lhs_type)
3536 && (TYPE_PRECISION (lhs_type)
3537 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3538 || (INTEGRAL_TYPE_P (rhs_type)
3539 && (TYPE_PRECISION (rhs_type)
3540 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3541 {
73fbfcad 3542 if (dump_enabled_p ())
78c60e3d 3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3544 "type conversion to/from bit-precision unsupported."
3545 "\n");
4a00c761
JJ
3546 return false;
3547 }
3548
b690cc0f 3549 /* Check the operands of the operation. */
24ee1384 3550 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f
RG
3551 &def_stmt, &def, &dt[0], &vectype_in))
3552 {
73fbfcad 3553 if (dump_enabled_p ())
78c60e3d 3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3555 "use not simple.\n");
b690cc0f
RG
3556 return false;
3557 }
4a00c761
JJ
3558 if (op_type == binary_op)
3559 {
3560 bool ok;
3561
3562 op1 = gimple_assign_rhs2 (stmt);
3563 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3564 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3565 OP1. */
3566 if (CONSTANT_CLASS_P (op0))
f5709183 3567 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
4a00c761
JJ
3568 &def_stmt, &def, &dt[1], &vectype_in);
3569 else
f5709183 3570 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 3571 &def, &dt[1]);
4a00c761
JJ
3572
3573 if (!ok)
3574 {
73fbfcad 3575 if (dump_enabled_p ())
78c60e3d 3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3577 "use not simple.\n");
4a00c761
JJ
3578 return false;
3579 }
3580 }
3581
b690cc0f
RG
3582 /* If op0 is an external or constant defs use a vector type of
3583 the same size as the output vector type. */
ebfd146a 3584 if (!vectype_in)
b690cc0f 3585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3586 if (vec_stmt)
3587 gcc_assert (vectype_in);
3588 if (!vectype_in)
3589 {
73fbfcad 3590 if (dump_enabled_p ())
4a00c761 3591 {
78c60e3d
SS
3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3593 "no vectype for scalar type ");
3594 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3595 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3596 }
7d8930a0
IR
3597
3598 return false;
3599 }
ebfd146a 3600
b690cc0f
RG
3601 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3602 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3603 if (nunits_in < nunits_out)
ebfd146a
IR
3604 modifier = NARROW;
3605 else if (nunits_out == nunits_in)
3606 modifier = NONE;
ebfd146a 3607 else
4a00c761 3608 modifier = WIDEN;
ebfd146a 3609
ff802fa1
IR
3610 /* Multiple types in SLP are handled by creating the appropriate number of
3611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3612 case of SLP. */
437f4a00 3613 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3614 ncopies = 1;
4a00c761
JJ
3615 else if (modifier == NARROW)
3616 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3617 else
3618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3619
ebfd146a
IR
3620 /* Sanity check: make sure that at least one copy of the vectorized stmt
3621 needs to be generated. */
3622 gcc_assert (ncopies >= 1);
3623
ebfd146a 3624 /* Supportable by target? */
4a00c761 3625 switch (modifier)
ebfd146a 3626 {
4a00c761
JJ
3627 case NONE:
3628 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3629 return false;
3630 if (supportable_convert_operation (code, vectype_out, vectype_in,
3631 &decl1, &code1))
3632 break;
3633 /* FALLTHRU */
3634 unsupported:
73fbfcad 3635 if (dump_enabled_p ())
78c60e3d 3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3637 "conversion not supported by target.\n");
ebfd146a 3638 return false;
ebfd146a 3639
4a00c761
JJ
3640 case WIDEN:
3641 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3642 &code1, &code2, &multi_step_cvt,
3643 &interm_types))
4a00c761
JJ
3644 {
3645 /* Binary widening operation can only be supported directly by the
3646 architecture. */
3647 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3648 break;
3649 }
3650
3651 if (code != FLOAT_EXPR
3652 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3653 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3654 goto unsupported;
3655
3656 rhs_mode = TYPE_MODE (rhs_type);
3657 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3658 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3659 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3660 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3661 {
3662 cvt_type
3663 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3664 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3665 if (cvt_type == NULL_TREE)
3666 goto unsupported;
3667
3668 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3669 {
3670 if (!supportable_convert_operation (code, vectype_out,
3671 cvt_type, &decl1, &codecvt1))
3672 goto unsupported;
3673 }
3674 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3675 cvt_type, &codecvt1,
3676 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3677 &interm_types))
3678 continue;
3679 else
3680 gcc_assert (multi_step_cvt == 0);
3681
3682 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3683 vectype_in, &code1, &code2,
3684 &multi_step_cvt, &interm_types))
4a00c761
JJ
3685 break;
3686 }
3687
3688 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3689 goto unsupported;
3690
3691 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3692 codecvt2 = ERROR_MARK;
3693 else
3694 {
3695 multi_step_cvt++;
9771b263 3696 interm_types.safe_push (cvt_type);
4a00c761
JJ
3697 cvt_type = NULL_TREE;
3698 }
3699 break;
3700
3701 case NARROW:
3702 gcc_assert (op_type == unary_op);
3703 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3704 &code1, &multi_step_cvt,
3705 &interm_types))
3706 break;
3707
3708 if (code != FIX_TRUNC_EXPR
3709 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3710 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3711 goto unsupported;
3712
3713 rhs_mode = TYPE_MODE (rhs_type);
3714 cvt_type
3715 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3716 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3717 if (cvt_type == NULL_TREE)
3718 goto unsupported;
3719 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3720 &decl1, &codecvt1))
3721 goto unsupported;
3722 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3723 &code1, &multi_step_cvt,
3724 &interm_types))
3725 break;
3726 goto unsupported;
3727
3728 default:
3729 gcc_unreachable ();
ebfd146a
IR
3730 }
3731
3732 if (!vec_stmt) /* transformation not required. */
3733 {
73fbfcad 3734 if (dump_enabled_p ())
78c60e3d 3735 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3736 "=== vectorizable_conversion ===\n");
4a00c761 3737 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3738 {
3739 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3740 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3741 }
4a00c761
JJ
3742 else if (modifier == NARROW)
3743 {
3744 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3746 }
3747 else
3748 {
3749 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3750 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3751 }
9771b263 3752 interm_types.release ();
ebfd146a
IR
3753 return true;
3754 }
3755
3756 /** Transform. **/
73fbfcad 3757 if (dump_enabled_p ())
78c60e3d 3758 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3759 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3760
4a00c761
JJ
3761 if (op_type == binary_op)
3762 {
3763 if (CONSTANT_CLASS_P (op0))
3764 op0 = fold_convert (TREE_TYPE (op1), op0);
3765 else if (CONSTANT_CLASS_P (op1))
3766 op1 = fold_convert (TREE_TYPE (op0), op1);
3767 }
3768
3769 /* In case of multi-step conversion, we first generate conversion operations
3770 to the intermediate types, and then from that types to the final one.
3771 We create vector destinations for the intermediate type (TYPES) received
3772 from supportable_*_operation, and store them in the correct order
3773 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3774 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3775 vec_dest = vect_create_destination_var (scalar_dest,
3776 (cvt_type && modifier == WIDEN)
3777 ? cvt_type : vectype_out);
9771b263 3778 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3779
3780 if (multi_step_cvt)
3781 {
9771b263
DN
3782 for (i = interm_types.length () - 1;
3783 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3784 {
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 intermediate_type);
9771b263 3787 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3788 }
3789 }
ebfd146a 3790
4a00c761 3791 if (cvt_type)
82294ec1
JJ
3792 vec_dest = vect_create_destination_var (scalar_dest,
3793 modifier == WIDEN
3794 ? vectype_out : cvt_type);
4a00c761
JJ
3795
3796 if (!slp_node)
3797 {
30862efc 3798 if (modifier == WIDEN)
4a00c761 3799 {
c3284718 3800 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3801 if (op_type == binary_op)
9771b263 3802 vec_oprnds1.create (1);
4a00c761 3803 }
30862efc 3804 else if (modifier == NARROW)
9771b263
DN
3805 vec_oprnds0.create (
3806 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3807 }
3808 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3809 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3810
4a00c761 3811 last_oprnd = op0;
ebfd146a
IR
3812 prev_stmt_info = NULL;
3813 switch (modifier)
3814 {
3815 case NONE:
3816 for (j = 0; j < ncopies; j++)
3817 {
ebfd146a 3818 if (j == 0)
d092494c
IR
3819 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3820 -1);
ebfd146a
IR
3821 else
3822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3823
9771b263 3824 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3825 {
3826 /* Arguments are ready, create the new vector stmt. */
3827 if (code1 == CALL_EXPR)
3828 {
3829 new_stmt = gimple_build_call (decl1, 1, vop0);
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_call_set_lhs (new_stmt, new_temp);
3832 }
3833 else
3834 {
3835 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3836 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3837 new_temp = make_ssa_name (vec_dest, new_stmt);
3838 gimple_assign_set_lhs (new_stmt, new_temp);
3839 }
3840
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3842 if (slp_node)
9771b263 3843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761
JJ
3844 }
3845
ebfd146a
IR
3846 if (j == 0)
3847 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3848 else
3849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3850 prev_stmt_info = vinfo_for_stmt (new_stmt);
3851 }
3852 break;
3853
3854 case WIDEN:
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j = 0; j < ncopies; j++)
3860 {
4a00c761 3861 /* Handle uses. */
ebfd146a 3862 if (j == 0)
4a00c761
JJ
3863 {
3864 if (slp_node)
3865 {
3866 if (code == WIDEN_LSHIFT_EXPR)
3867 {
3868 unsigned int k;
ebfd146a 3869
4a00c761
JJ
3870 vec_oprnd1 = op1;
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 3875 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3876
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
3879 }
3880 else
3881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3882 &vec_oprnds1, slp_node, -1);
3883 }
3884 else
3885 {
3886 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
9771b263 3887 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3888 if (op_type == binary_op)
3889 {
3890 if (code == WIDEN_LSHIFT_EXPR)
3891 vec_oprnd1 = op1;
3892 else
3893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3894 NULL);
9771b263 3895 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3896 }
3897 }
3898 }
ebfd146a 3899 else
4a00c761
JJ
3900 {
3901 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
3902 vec_oprnds0.truncate (0);
3903 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
3904 if (op_type == binary_op)
3905 {
3906 if (code == WIDEN_LSHIFT_EXPR)
3907 vec_oprnd1 = op1;
3908 else
3909 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3910 vec_oprnd1);
9771b263
DN
3911 vec_oprnds1.truncate (0);
3912 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
3913 }
3914 }
ebfd146a 3915
4a00c761
JJ
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i = multi_step_cvt; i >= 0; i--)
3918 {
9771b263 3919 tree this_dest = vec_dsts[i];
4a00c761
JJ
3920 enum tree_code c1 = code1, c2 = code2;
3921 if (i == 0 && codecvt2 != ERROR_MARK)
3922 {
3923 c1 = codecvt1;
3924 c2 = codecvt2;
3925 }
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3927 &vec_oprnds1,
3928 stmt, this_dest, gsi,
3929 c1, c2, decl1, decl2,
3930 op_type);
3931 }
3932
9771b263 3933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3934 {
3935 if (cvt_type)
3936 {
3937 if (codecvt1 == CALL_EXPR)
3938 {
3939 new_stmt = gimple_build_call (decl1, 1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_call_set_lhs (new_stmt, new_temp);
3942 }
3943 else
3944 {
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 3946 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
3947 new_stmt = gimple_build_assign (new_temp, codecvt1,
3948 vop0);
4a00c761
JJ
3949 }
3950
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3952 }
3953 else
3954 new_stmt = SSA_NAME_DEF_STMT (vop0);
3955
3956 if (slp_node)
9771b263 3957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7ac5a140
RB
3958
3959 if (!prev_stmt_info)
3960 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4a00c761 3961 else
7ac5a140
RB
3962 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3963 prev_stmt_info = vinfo_for_stmt (new_stmt);
4a00c761 3964 }
ebfd146a 3965 }
4a00c761
JJ
3966
3967 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
3968 break;
3969
3970 case NARROW:
3971 /* In case the vectorization factor (VF) is bigger than the number
3972 of elements that we can fit in a vectype (nunits), we have to
3973 generate more than one vector stmt - i.e - we need to "unroll"
3974 the vector stmt by a factor VF/nunits. */
3975 for (j = 0; j < ncopies; j++)
3976 {
3977 /* Handle uses. */
4a00c761
JJ
3978 if (slp_node)
3979 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3980 slp_node, -1);
ebfd146a
IR
3981 else
3982 {
9771b263 3983 vec_oprnds0.truncate (0);
4a00c761
JJ
3984 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3985 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
3986 }
3987
4a00c761
JJ
3988 /* Arguments are ready. Create the new vector stmts. */
3989 if (cvt_type)
9771b263 3990 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3991 {
3992 if (codecvt1 == CALL_EXPR)
3993 {
3994 new_stmt = gimple_build_call (decl1, 1, vop0);
3995 new_temp = make_ssa_name (vec_dest, new_stmt);
3996 gimple_call_set_lhs (new_stmt, new_temp);
3997 }
3998 else
3999 {
4000 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4001 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4002 new_stmt = gimple_build_assign (new_temp, codecvt1,
4003 vop0);
4a00c761 4004 }
ebfd146a 4005
4a00c761 4006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4007 vec_oprnds0[i] = new_temp;
4a00c761 4008 }
ebfd146a 4009
4a00c761
JJ
4010 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4011 stmt, vec_dsts, gsi,
4012 slp_node, code1,
4013 &prev_stmt_info);
ebfd146a
IR
4014 }
4015
4016 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4017 break;
ebfd146a
IR
4018 }
4019
9771b263
DN
4020 vec_oprnds0.release ();
4021 vec_oprnds1.release ();
4022 vec_dsts.release ();
4023 interm_types.release ();
ebfd146a
IR
4024
4025 return true;
4026}
ff802fa1
IR
4027
4028
ebfd146a
IR
4029/* Function vectorizable_assignment.
4030
b8698a0f
L
4031 Check if STMT performs an assignment (copy) that can be vectorized.
4032 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4033 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4034 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4035
4036static bool
4037vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4038 gimple *vec_stmt, slp_tree slp_node)
4039{
4040 tree vec_dest;
4041 tree scalar_dest;
4042 tree op;
4043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4044 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4046 tree new_temp;
4047 tree def;
4048 gimple def_stmt;
4049 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 4050 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 4051 int ncopies;
f18b55bd 4052 int i, j;
6e1aa848 4053 vec<tree> vec_oprnds = vNULL;
ebfd146a 4054 tree vop;
a70d6342 4055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
4056 gimple new_stmt = NULL;
4057 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4058 enum tree_code code;
4059 tree vectype_in;
ebfd146a
IR
4060
4061 /* Multiple types in SLP are handled by creating the appropriate number of
4062 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4063 case of SLP. */
437f4a00 4064 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
4065 ncopies = 1;
4066 else
4067 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4068
4069 gcc_assert (ncopies >= 1);
ebfd146a 4070
a70d6342 4071 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4072 return false;
4073
8644a673 4074 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4075 return false;
4076
4077 /* Is vectorizable assignment? */
4078 if (!is_gimple_assign (stmt))
4079 return false;
4080
4081 scalar_dest = gimple_assign_lhs (stmt);
4082 if (TREE_CODE (scalar_dest) != SSA_NAME)
4083 return false;
4084
fde9c428 4085 code = gimple_assign_rhs_code (stmt);
ebfd146a 4086 if (gimple_assign_single_p (stmt)
fde9c428
RG
4087 || code == PAREN_EXPR
4088 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4089 op = gimple_assign_rhs1 (stmt);
4090 else
4091 return false;
4092
7b7ec6c5
RG
4093 if (code == VIEW_CONVERT_EXPR)
4094 op = TREE_OPERAND (op, 0);
4095
24ee1384 4096 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
fde9c428 4097 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a 4098 {
73fbfcad 4099 if (dump_enabled_p ())
78c60e3d 4100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4101 "use not simple.\n");
ebfd146a
IR
4102 return false;
4103 }
4104
fde9c428
RG
4105 /* We can handle NOP_EXPR conversions that do not change the number
4106 of elements or the vector size. */
7b7ec6c5
RG
4107 if ((CONVERT_EXPR_CODE_P (code)
4108 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4109 && (!vectype_in
4110 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4111 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4112 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4113 return false;
4114
7b7b1813
RG
4115 /* We do not handle bit-precision changes. */
4116 if ((CONVERT_EXPR_CODE_P (code)
4117 || code == VIEW_CONVERT_EXPR)
4118 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4119 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4121 || ((TYPE_PRECISION (TREE_TYPE (op))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4123 /* But a conversion that does not change the bit-pattern is ok. */
4124 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4125 > TYPE_PRECISION (TREE_TYPE (op)))
4126 && TYPE_UNSIGNED (TREE_TYPE (op))))
4127 {
73fbfcad 4128 if (dump_enabled_p ())
78c60e3d
SS
4129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4130 "type conversion to/from bit-precision "
e645e942 4131 "unsupported.\n");
7b7b1813
RG
4132 return false;
4133 }
4134
ebfd146a
IR
4135 if (!vec_stmt) /* transformation not required. */
4136 {
4137 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4138 if (dump_enabled_p ())
78c60e3d 4139 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4140 "=== vectorizable_assignment ===\n");
c3e7ee41 4141 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4142 return true;
4143 }
4144
4145 /** Transform. **/
73fbfcad 4146 if (dump_enabled_p ())
e645e942 4147 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4148
4149 /* Handle def. */
4150 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4151
4152 /* Handle use. */
f18b55bd 4153 for (j = 0; j < ncopies; j++)
ebfd146a 4154 {
f18b55bd
IR
4155 /* Handle uses. */
4156 if (j == 0)
d092494c 4157 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4158 else
4159 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4160
4161 /* Arguments are ready. create the new vector stmt. */
9771b263 4162 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4163 {
7b7ec6c5
RG
4164 if (CONVERT_EXPR_CODE_P (code)
4165 || code == VIEW_CONVERT_EXPR)
4a73490d 4166 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4167 new_stmt = gimple_build_assign (vec_dest, vop);
4168 new_temp = make_ssa_name (vec_dest, new_stmt);
4169 gimple_assign_set_lhs (new_stmt, new_temp);
4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 if (slp_node)
9771b263 4172 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4173 }
ebfd146a
IR
4174
4175 if (slp_node)
f18b55bd
IR
4176 continue;
4177
4178 if (j == 0)
4179 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4180 else
4181 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4182
4183 prev_stmt_info = vinfo_for_stmt (new_stmt);
4184 }
b8698a0f 4185
9771b263 4186 vec_oprnds.release ();
ebfd146a
IR
4187 return true;
4188}
4189
9dc3f7de 4190
1107f3ae
IR
4191/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4192 either as shift by a scalar or by a vector. */
4193
4194bool
4195vect_supportable_shift (enum tree_code code, tree scalar_type)
4196{
4197
ef4bddc2 4198 machine_mode vec_mode;
1107f3ae
IR
4199 optab optab;
4200 int icode;
4201 tree vectype;
4202
4203 vectype = get_vectype_for_scalar_type (scalar_type);
4204 if (!vectype)
4205 return false;
4206
4207 optab = optab_for_tree_code (code, vectype, optab_scalar);
4208 if (!optab
4209 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4210 {
4211 optab = optab_for_tree_code (code, vectype, optab_vector);
4212 if (!optab
4213 || (optab_handler (optab, TYPE_MODE (vectype))
4214 == CODE_FOR_nothing))
4215 return false;
4216 }
4217
4218 vec_mode = TYPE_MODE (vectype);
4219 icode = (int) optab_handler (optab, vec_mode);
4220 if (icode == CODE_FOR_nothing)
4221 return false;
4222
4223 return true;
4224}
4225
4226
9dc3f7de
IR
4227/* Function vectorizable_shift.
4228
4229 Check if STMT performs a shift operation that can be vectorized.
4230 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4231 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4232 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4233
4234static bool
4235vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4236 gimple *vec_stmt, slp_tree slp_node)
4237{
4238 tree vec_dest;
4239 tree scalar_dest;
4240 tree op0, op1 = NULL;
4241 tree vec_oprnd1 = NULL_TREE;
4242 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4243 tree vectype;
4244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4245 enum tree_code code;
ef4bddc2 4246 machine_mode vec_mode;
9dc3f7de
IR
4247 tree new_temp;
4248 optab optab;
4249 int icode;
ef4bddc2 4250 machine_mode optab_op2_mode;
9dc3f7de
IR
4251 tree def;
4252 gimple def_stmt;
4253 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4254 gimple new_stmt = NULL;
4255 stmt_vec_info prev_stmt_info;
4256 int nunits_in;
4257 int nunits_out;
4258 tree vectype_out;
cede2577 4259 tree op1_vectype;
9dc3f7de
IR
4260 int ncopies;
4261 int j, i;
6e1aa848
DN
4262 vec<tree> vec_oprnds0 = vNULL;
4263 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4264 tree vop0, vop1;
4265 unsigned int k;
49eab32e 4266 bool scalar_shift_arg = true;
9dc3f7de
IR
4267 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4268 int vf;
4269
4270 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4271 return false;
4272
4273 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4274 return false;
4275
4276 /* Is STMT a vectorizable binary/unary operation? */
4277 if (!is_gimple_assign (stmt))
4278 return false;
4279
4280 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4281 return false;
4282
4283 code = gimple_assign_rhs_code (stmt);
4284
4285 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4286 || code == RROTATE_EXPR))
4287 return false;
4288
4289 scalar_dest = gimple_assign_lhs (stmt);
4290 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4291 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4292 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4293 {
73fbfcad 4294 if (dump_enabled_p ())
78c60e3d 4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4296 "bit-precision shifts not supported.\n");
7b7b1813
RG
4297 return false;
4298 }
9dc3f7de
IR
4299
4300 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4301 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
9dc3f7de
IR
4302 &def_stmt, &def, &dt[0], &vectype))
4303 {
73fbfcad 4304 if (dump_enabled_p ())
78c60e3d 4305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4306 "use not simple.\n");
9dc3f7de
IR
4307 return false;
4308 }
4309 /* If op0 is an external or constant def use a vector type with
4310 the same size as the output vector type. */
4311 if (!vectype)
4312 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4313 if (vec_stmt)
4314 gcc_assert (vectype);
4315 if (!vectype)
4316 {
73fbfcad 4317 if (dump_enabled_p ())
78c60e3d 4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4319 "no vectype for scalar type\n");
9dc3f7de
IR
4320 return false;
4321 }
4322
4323 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4324 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4325 if (nunits_out != nunits_in)
4326 return false;
4327
4328 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4329 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4330 &def, &dt[1], &op1_vectype))
9dc3f7de 4331 {
73fbfcad 4332 if (dump_enabled_p ())
78c60e3d 4333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4334 "use not simple.\n");
9dc3f7de
IR
4335 return false;
4336 }
4337
4338 if (loop_vinfo)
4339 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4340 else
4341 vf = 1;
4342
4343 /* Multiple types in SLP are handled by creating the appropriate number of
4344 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4345 case of SLP. */
437f4a00 4346 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4347 ncopies = 1;
4348 else
4349 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4350
4351 gcc_assert (ncopies >= 1);
4352
4353 /* Determine whether the shift amount is a vector, or scalar. If the
4354 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4355
49eab32e
JJ
4356 if (dt[1] == vect_internal_def && !slp_node)
4357 scalar_shift_arg = false;
4358 else if (dt[1] == vect_constant_def
4359 || dt[1] == vect_external_def
4360 || dt[1] == vect_internal_def)
4361 {
4362 /* In SLP, need to check whether the shift count is the same,
4363 in loops if it is a constant or invariant, it is always
4364 a scalar shift. */
4365 if (slp_node)
4366 {
9771b263 4367 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
49eab32e
JJ
4368 gimple slpstmt;
4369
9771b263 4370 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4371 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4372 scalar_shift_arg = false;
4373 }
4374 }
4375 else
4376 {
73fbfcad 4377 if (dump_enabled_p ())
78c60e3d 4378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4379 "operand mode requires invariant argument.\n");
49eab32e
JJ
4380 return false;
4381 }
4382
9dc3f7de 4383 /* Vector shifted by vector. */
49eab32e 4384 if (!scalar_shift_arg)
9dc3f7de
IR
4385 {
4386 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4387 if (dump_enabled_p ())
78c60e3d 4388 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4389 "vector/vector shift/rotate found.\n");
78c60e3d 4390
aa948027
JJ
4391 if (!op1_vectype)
4392 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4393 if (op1_vectype == NULL_TREE
4394 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4395 {
73fbfcad 4396 if (dump_enabled_p ())
78c60e3d
SS
4397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4398 "unusable type for last operand in"
e645e942 4399 " vector/vector shift/rotate.\n");
cede2577
JJ
4400 return false;
4401 }
9dc3f7de
IR
4402 }
4403 /* See if the machine has a vector shifted by scalar insn and if not
4404 then see if it has a vector shifted by vector insn. */
49eab32e 4405 else
9dc3f7de
IR
4406 {
4407 optab = optab_for_tree_code (code, vectype, optab_scalar);
4408 if (optab
4409 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4410 {
73fbfcad 4411 if (dump_enabled_p ())
78c60e3d 4412 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4413 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4414 }
4415 else
4416 {
4417 optab = optab_for_tree_code (code, vectype, optab_vector);
4418 if (optab
4419 && (optab_handler (optab, TYPE_MODE (vectype))
4420 != CODE_FOR_nothing))
4421 {
49eab32e
JJ
4422 scalar_shift_arg = false;
4423
73fbfcad 4424 if (dump_enabled_p ())
78c60e3d 4425 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4426 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4427
4428 /* Unlike the other binary operators, shifts/rotates have
4429 the rhs being int, instead of the same type as the lhs,
4430 so make sure the scalar is the right type if we are
aa948027 4431 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4432 if (dt[1] == vect_constant_def)
4433 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4434 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4435 TREE_TYPE (op1)))
4436 {
4437 if (slp_node
4438 && TYPE_MODE (TREE_TYPE (vectype))
4439 != TYPE_MODE (TREE_TYPE (op1)))
4440 {
73fbfcad 4441 if (dump_enabled_p ())
78c60e3d
SS
4442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4443 "unusable type for last operand in"
e645e942 4444 " vector/vector shift/rotate.\n");
21c0a521 4445 return false;
aa948027
JJ
4446 }
4447 if (vec_stmt && !slp_node)
4448 {
4449 op1 = fold_convert (TREE_TYPE (vectype), op1);
4450 op1 = vect_init_vector (stmt, op1,
4451 TREE_TYPE (vectype), NULL);
4452 }
4453 }
9dc3f7de
IR
4454 }
4455 }
4456 }
9dc3f7de
IR
4457
4458 /* Supportable by target? */
4459 if (!optab)
4460 {
73fbfcad 4461 if (dump_enabled_p ())
78c60e3d 4462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4463 "no optab.\n");
9dc3f7de
IR
4464 return false;
4465 }
4466 vec_mode = TYPE_MODE (vectype);
4467 icode = (int) optab_handler (optab, vec_mode);
4468 if (icode == CODE_FOR_nothing)
4469 {
73fbfcad 4470 if (dump_enabled_p ())
78c60e3d 4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4472 "op not supported by target.\n");
9dc3f7de
IR
4473 /* Check only during analysis. */
4474 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4475 || (vf < vect_min_worthwhile_factor (code)
4476 && !vec_stmt))
4477 return false;
73fbfcad 4478 if (dump_enabled_p ())
e645e942
TJ
4479 dump_printf_loc (MSG_NOTE, vect_location,
4480 "proceeding using word mode.\n");
9dc3f7de
IR
4481 }
4482
4483 /* Worthwhile without SIMD support? Check only during analysis. */
4484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4485 && vf < vect_min_worthwhile_factor (code)
4486 && !vec_stmt)
4487 {
73fbfcad 4488 if (dump_enabled_p ())
78c60e3d 4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4490 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4491 return false;
4492 }
4493
4494 if (!vec_stmt) /* transformation not required. */
4495 {
4496 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4497 if (dump_enabled_p ())
e645e942
TJ
4498 dump_printf_loc (MSG_NOTE, vect_location,
4499 "=== vectorizable_shift ===\n");
c3e7ee41 4500 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4501 return true;
4502 }
4503
4504 /** Transform. **/
4505
73fbfcad 4506 if (dump_enabled_p ())
78c60e3d 4507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4508 "transform binary/unary operation.\n");
9dc3f7de
IR
4509
4510 /* Handle def. */
4511 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4512
9dc3f7de
IR
4513 prev_stmt_info = NULL;
4514 for (j = 0; j < ncopies; j++)
4515 {
4516 /* Handle uses. */
4517 if (j == 0)
4518 {
4519 if (scalar_shift_arg)
4520 {
4521 /* Vector shl and shr insn patterns can be defined with scalar
4522 operand 2 (shift operand). In this case, use constant or loop
4523 invariant op1 directly, without extending it to vector mode
4524 first. */
4525 optab_op2_mode = insn_data[icode].operand[2].mode;
4526 if (!VECTOR_MODE_P (optab_op2_mode))
4527 {
73fbfcad 4528 if (dump_enabled_p ())
78c60e3d 4529 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4530 "operand 1 using scalar mode.\n");
9dc3f7de 4531 vec_oprnd1 = op1;
8930f723 4532 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4533 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4534 if (slp_node)
4535 {
4536 /* Store vec_oprnd1 for every vector stmt to be created
4537 for SLP_NODE. We check during the analysis that all
4538 the shift arguments are the same.
4539 TODO: Allow different constants for different vector
4540 stmts generated for an SLP instance. */
4541 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4542 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4543 }
4544 }
4545 }
4546
4547 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4548 (a special case for certain kind of vector shifts); otherwise,
4549 operand 1 should be of a vector type (the usual case). */
4550 if (vec_oprnd1)
4551 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4552 slp_node, -1);
9dc3f7de
IR
4553 else
4554 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4555 slp_node, -1);
9dc3f7de
IR
4556 }
4557 else
4558 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4559
4560 /* Arguments are ready. Create the new vector stmt. */
9771b263 4561 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4562 {
9771b263 4563 vop1 = vec_oprnds1[i];
0d0e4a03 4564 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4565 new_temp = make_ssa_name (vec_dest, new_stmt);
4566 gimple_assign_set_lhs (new_stmt, new_temp);
4567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4568 if (slp_node)
9771b263 4569 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4570 }
4571
4572 if (slp_node)
4573 continue;
4574
4575 if (j == 0)
4576 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4577 else
4578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4579 prev_stmt_info = vinfo_for_stmt (new_stmt);
4580 }
4581
9771b263
DN
4582 vec_oprnds0.release ();
4583 vec_oprnds1.release ();
9dc3f7de
IR
4584
4585 return true;
4586}
4587
4588
ebfd146a
IR
4589/* Function vectorizable_operation.
4590
16949072
RG
4591 Check if STMT performs a binary, unary or ternary operation that can
4592 be vectorized.
b8698a0f 4593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4596
4597static bool
4598vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4599 gimple *vec_stmt, slp_tree slp_node)
4600{
00f07b86 4601 tree vec_dest;
ebfd146a 4602 tree scalar_dest;
16949072 4603 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4604 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4605 tree vectype;
ebfd146a
IR
4606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4607 enum tree_code code;
ef4bddc2 4608 machine_mode vec_mode;
ebfd146a
IR
4609 tree new_temp;
4610 int op_type;
00f07b86 4611 optab optab;
ebfd146a 4612 int icode;
ebfd146a
IR
4613 tree def;
4614 gimple def_stmt;
16949072
RG
4615 enum vect_def_type dt[3]
4616 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
4617 gimple new_stmt = NULL;
4618 stmt_vec_info prev_stmt_info;
b690cc0f 4619 int nunits_in;
ebfd146a
IR
4620 int nunits_out;
4621 tree vectype_out;
4622 int ncopies;
4623 int j, i;
6e1aa848
DN
4624 vec<tree> vec_oprnds0 = vNULL;
4625 vec<tree> vec_oprnds1 = vNULL;
4626 vec<tree> vec_oprnds2 = vNULL;
16949072 4627 tree vop0, vop1, vop2;
a70d6342
IR
4628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4629 int vf;
4630
a70d6342 4631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4632 return false;
4633
8644a673 4634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
4635 return false;
4636
4637 /* Is STMT a vectorizable binary/unary operation? */
4638 if (!is_gimple_assign (stmt))
4639 return false;
4640
4641 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4642 return false;
4643
ebfd146a
IR
4644 code = gimple_assign_rhs_code (stmt);
4645
4646 /* For pointer addition, we should use the normal plus for
4647 the vector addition. */
4648 if (code == POINTER_PLUS_EXPR)
4649 code = PLUS_EXPR;
4650
4651 /* Support only unary or binary operations. */
4652 op_type = TREE_CODE_LENGTH (code);
16949072 4653 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4654 {
73fbfcad 4655 if (dump_enabled_p ())
78c60e3d 4656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4657 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4658 op_type);
ebfd146a
IR
4659 return false;
4660 }
4661
b690cc0f
RG
4662 scalar_dest = gimple_assign_lhs (stmt);
4663 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4664
7b7b1813
RG
4665 /* Most operations cannot handle bit-precision types without extra
4666 truncations. */
4667 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4668 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4669 /* Exception are bitwise binary operations. */
4670 && code != BIT_IOR_EXPR
4671 && code != BIT_XOR_EXPR
4672 && code != BIT_AND_EXPR)
4673 {
73fbfcad 4674 if (dump_enabled_p ())
78c60e3d 4675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4676 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4677 return false;
4678 }
4679
ebfd146a 4680 op0 = gimple_assign_rhs1 (stmt);
24ee1384 4681 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b690cc0f 4682 &def_stmt, &def, &dt[0], &vectype))
ebfd146a 4683 {
73fbfcad 4684 if (dump_enabled_p ())
78c60e3d 4685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4686 "use not simple.\n");
ebfd146a
IR
4687 return false;
4688 }
b690cc0f
RG
4689 /* If op0 is an external or constant def use a vector type with
4690 the same size as the output vector type. */
4691 if (!vectype)
4692 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
4693 if (vec_stmt)
4694 gcc_assert (vectype);
4695 if (!vectype)
4696 {
73fbfcad 4697 if (dump_enabled_p ())
7d8930a0 4698 {
78c60e3d
SS
4699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4700 "no vectype for scalar type ");
4701 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4702 TREE_TYPE (op0));
e645e942 4703 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4704 }
4705
4706 return false;
4707 }
b690cc0f
RG
4708
4709 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4710 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4711 if (nunits_out != nunits_in)
4712 return false;
ebfd146a 4713
16949072 4714 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4715 {
4716 op1 = gimple_assign_rhs2 (stmt);
24ee1384
IR
4717 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4718 &def, &dt[1]))
ebfd146a 4719 {
73fbfcad 4720 if (dump_enabled_p ())
78c60e3d 4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4722 "use not simple.\n");
ebfd146a
IR
4723 return false;
4724 }
4725 }
16949072
RG
4726 if (op_type == ternary_op)
4727 {
4728 op2 = gimple_assign_rhs3 (stmt);
24ee1384
IR
4729 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4730 &def, &dt[2]))
16949072 4731 {
73fbfcad 4732 if (dump_enabled_p ())
78c60e3d 4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4734 "use not simple.\n");
16949072
RG
4735 return false;
4736 }
4737 }
ebfd146a 4738
b690cc0f
RG
4739 if (loop_vinfo)
4740 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4741 else
4742 vf = 1;
4743
4744 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4745 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4746 case of SLP. */
437f4a00 4747 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4748 ncopies = 1;
4749 else
4750 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4751
4752 gcc_assert (ncopies >= 1);
4753
9dc3f7de 4754 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4755 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4756 || code == RROTATE_EXPR)
9dc3f7de 4757 return false;
ebfd146a 4758
ebfd146a 4759 /* Supportable by target? */
00f07b86
RH
4760
4761 vec_mode = TYPE_MODE (vectype);
4762 if (code == MULT_HIGHPART_EXPR)
ebfd146a 4763 {
00f07b86 4764 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
dee54b6e 4765 icode = LAST_INSN_CODE;
00f07b86
RH
4766 else
4767 icode = CODE_FOR_nothing;
ebfd146a 4768 }
00f07b86
RH
4769 else
4770 {
4771 optab = optab_for_tree_code (code, vectype, optab_default);
4772 if (!optab)
5deb57cb 4773 {
73fbfcad 4774 if (dump_enabled_p ())
78c60e3d 4775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4776 "no optab.\n");
00f07b86 4777 return false;
5deb57cb 4778 }
00f07b86 4779 icode = (int) optab_handler (optab, vec_mode);
5deb57cb
JJ
4780 }
4781
ebfd146a
IR
4782 if (icode == CODE_FOR_nothing)
4783 {
73fbfcad 4784 if (dump_enabled_p ())
78c60e3d 4785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4786 "op not supported by target.\n");
ebfd146a
IR
4787 /* Check only during analysis. */
4788 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4789 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4790 return false;
73fbfcad 4791 if (dump_enabled_p ())
e645e942
TJ
4792 dump_printf_loc (MSG_NOTE, vect_location,
4793 "proceeding using word mode.\n");
383d9c83
IR
4794 }
4795
4a00c761 4796 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4797 if (!VECTOR_MODE_P (vec_mode)
4798 && !vec_stmt
4799 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4800 {
73fbfcad 4801 if (dump_enabled_p ())
78c60e3d 4802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4803 "not worthwhile without SIMD support.\n");
e34842c6 4804 return false;
7d8930a0 4805 }
ebfd146a 4806
ebfd146a
IR
4807 if (!vec_stmt) /* transformation not required. */
4808 {
4a00c761 4809 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 4810 if (dump_enabled_p ())
78c60e3d 4811 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4812 "=== vectorizable_operation ===\n");
c3e7ee41 4813 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4814 return true;
4815 }
4816
4817 /** Transform. **/
4818
73fbfcad 4819 if (dump_enabled_p ())
78c60e3d 4820 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4821 "transform binary/unary operation.\n");
383d9c83 4822
ebfd146a 4823 /* Handle def. */
00f07b86 4824 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 4825
ebfd146a
IR
4826 /* In case the vectorization factor (VF) is bigger than the number
4827 of elements that we can fit in a vectype (nunits), we have to generate
4828 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
4829 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4830 from one copy of the vector stmt to the next, in the field
4831 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4832 stages to find the correct vector defs to be used when vectorizing
4833 stmts that use the defs of the current stmt. The example below
4834 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4835 we need to create 4 vectorized stmts):
4836
4837 before vectorization:
4838 RELATED_STMT VEC_STMT
4839 S1: x = memref - -
4840 S2: z = x + 1 - -
4841
4842 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4843 there):
4844 RELATED_STMT VEC_STMT
4845 VS1_0: vx0 = memref0 VS1_1 -
4846 VS1_1: vx1 = memref1 VS1_2 -
4847 VS1_2: vx2 = memref2 VS1_3 -
4848 VS1_3: vx3 = memref3 - -
4849 S1: x = load - VS1_0
4850 S2: z = x + 1 - -
4851
4852 step2: vectorize stmt S2 (done here):
4853 To vectorize stmt S2 we first need to find the relevant vector
4854 def for the first operand 'x'. This is, as usual, obtained from
4855 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4856 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4857 relevant vector def 'vx0'. Having found 'vx0' we can generate
4858 the vector stmt VS2_0, and as usual, record it in the
4859 STMT_VINFO_VEC_STMT of stmt S2.
4860 When creating the second copy (VS2_1), we obtain the relevant vector
4861 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4862 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4863 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4864 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4865 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4866 chain of stmts and pointers:
4867 RELATED_STMT VEC_STMT
4868 VS1_0: vx0 = memref0 VS1_1 -
4869 VS1_1: vx1 = memref1 VS1_2 -
4870 VS1_2: vx2 = memref2 VS1_3 -
4871 VS1_3: vx3 = memref3 - -
4872 S1: x = load - VS1_0
4873 VS2_0: vz0 = vx0 + v1 VS2_1 -
4874 VS2_1: vz1 = vx1 + v1 VS2_2 -
4875 VS2_2: vz2 = vx2 + v1 VS2_3 -
4876 VS2_3: vz3 = vx3 + v1 - -
4877 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
4878
4879 prev_stmt_info = NULL;
4880 for (j = 0; j < ncopies; j++)
4881 {
4882 /* Handle uses. */
4883 if (j == 0)
4a00c761
JJ
4884 {
4885 if (op_type == binary_op || op_type == ternary_op)
4886 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4887 slp_node, -1);
4888 else
4889 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4890 slp_node, -1);
4891 if (op_type == ternary_op)
36ba4aae 4892 {
9771b263
DN
4893 vec_oprnds2.create (1);
4894 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4895 stmt,
4896 NULL));
36ba4aae 4897 }
4a00c761 4898 }
ebfd146a 4899 else
4a00c761
JJ
4900 {
4901 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4902 if (op_type == ternary_op)
4903 {
9771b263
DN
4904 tree vec_oprnd = vec_oprnds2.pop ();
4905 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4906 vec_oprnd));
4a00c761
JJ
4907 }
4908 }
4909
4910 /* Arguments are ready. Create the new vector stmt. */
9771b263 4911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 4912 {
4a00c761 4913 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 4914 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 4915 vop2 = ((op_type == ternary_op)
9771b263 4916 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 4917 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
4918 new_temp = make_ssa_name (vec_dest, new_stmt);
4919 gimple_assign_set_lhs (new_stmt, new_temp);
4920 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4921 if (slp_node)
9771b263 4922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
4923 }
4924
4a00c761
JJ
4925 if (slp_node)
4926 continue;
4927
4928 if (j == 0)
4929 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4930 else
4931 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4932 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
4933 }
4934
9771b263
DN
4935 vec_oprnds0.release ();
4936 vec_oprnds1.release ();
4937 vec_oprnds2.release ();
ebfd146a 4938
ebfd146a
IR
4939 return true;
4940}
4941
c716e67f
XDL
4942/* A helper function to ensure data reference DR's base alignment
4943 for STMT_INFO. */
4944
4945static void
4946ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4947{
4948 if (!dr->aux)
4949 return;
4950
4951 if (((dataref_aux *)dr->aux)->base_misaligned)
4952 {
4953 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4954 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4955
428f0c67
JH
4956 if (decl_in_symtab_p (base_decl))
4957 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4958 else
4959 {
4960 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4961 DECL_USER_ALIGN (base_decl) = 1;
4962 }
c716e67f
XDL
4963 ((dataref_aux *)dr->aux)->base_misaligned = false;
4964 }
4965}
4966
ebfd146a 4967
09dfa495
BM
4968/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4969 reversal of the vector elements. If that is impossible to do,
4970 returns NULL. */
4971
4972static tree
4973perm_mask_for_reverse (tree vectype)
4974{
4975 int i, nunits;
4976 unsigned char *sel;
4977
4978 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4979 sel = XALLOCAVEC (unsigned char, nunits);
4980
4981 for (i = 0; i < nunits; ++i)
4982 sel[i] = nunits - 1 - i;
4983
557be5a8
AL
4984 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4985 return NULL_TREE;
4986 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
4987}
4988
ebfd146a
IR
4989/* Function vectorizable_store.
4990
b8698a0f
L
4991 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4992 can be vectorized.
4993 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4994 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4995 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4996
4997static bool
4998vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 4999 slp_tree slp_node)
ebfd146a
IR
5000{
5001 tree scalar_dest;
5002 tree data_ref;
5003 tree op;
5004 tree vec_oprnd = NULL_TREE;
5005 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5006 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5008 tree elem_type;
ebfd146a 5009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5010 struct loop *loop = NULL;
ef4bddc2 5011 machine_mode vec_mode;
ebfd146a
IR
5012 tree dummy;
5013 enum dr_alignment_support alignment_support_scheme;
5014 tree def;
5015 gimple def_stmt;
5016 enum vect_def_type dt;
5017 stmt_vec_info prev_stmt_info = NULL;
5018 tree dataref_ptr = NULL_TREE;
74bf76ed 5019 tree dataref_offset = NULL_TREE;
fef4d2b3 5020 gimple ptr_incr = NULL;
f2e2a985 5021 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
5022 int ncopies;
5023 int j;
5024 gimple next_stmt, first_stmt = NULL;
0d0293ac 5025 bool grouped_store = false;
272c6793 5026 bool store_lanes_p = false;
ebfd146a 5027 unsigned int group_size, i;
6e1aa848
DN
5028 vec<tree> dr_chain = vNULL;
5029 vec<tree> oprnds = vNULL;
5030 vec<tree> result_chain = vNULL;
ebfd146a 5031 bool inv_p;
09dfa495
BM
5032 bool negative = false;
5033 tree offset = NULL_TREE;
6e1aa848 5034 vec<tree> vec_oprnds = vNULL;
ebfd146a 5035 bool slp = (slp_node != NULL);
ebfd146a 5036 unsigned int vec_num;
a70d6342 5037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
272c6793 5038 tree aggr_type;
a70d6342
IR
5039
5040 if (loop_vinfo)
5041 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5042
5043 /* Multiple types in SLP are handled by creating the appropriate number of
5044 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5045 case of SLP. */
437f4a00 5046 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5047 ncopies = 1;
5048 else
5049 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5050
5051 gcc_assert (ncopies >= 1);
5052
5053 /* FORNOW. This restriction should be relaxed. */
a70d6342 5054 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a 5055 {
73fbfcad 5056 if (dump_enabled_p ())
78c60e3d 5057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5058 "multiple types in nested loop.\n");
ebfd146a
IR
5059 return false;
5060 }
5061
a70d6342 5062 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5063 return false;
5064
8644a673 5065 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5066 return false;
5067
5068 /* Is vectorizable store? */
5069
5070 if (!is_gimple_assign (stmt))
5071 return false;
5072
5073 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5074 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5075 && is_pattern_stmt_p (stmt_info))
5076 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5077 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5078 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5079 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5080 && TREE_CODE (scalar_dest) != COMPONENT_REF
5081 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5082 && TREE_CODE (scalar_dest) != REALPART_EXPR
5083 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5084 return false;
5085
5086 gcc_assert (gimple_assign_single_p (stmt));
5087 op = gimple_assign_rhs1 (stmt);
24ee1384
IR
5088 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5089 &def, &dt))
ebfd146a 5090 {
73fbfcad 5091 if (dump_enabled_p ())
78c60e3d 5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5093 "use not simple.\n");
ebfd146a
IR
5094 return false;
5095 }
5096
272c6793 5097 elem_type = TREE_TYPE (vectype);
ebfd146a 5098 vec_mode = TYPE_MODE (vectype);
7b7b1813 5099
ebfd146a
IR
5100 /* FORNOW. In some cases can vectorize even if data-type not supported
5101 (e.g. - array initialization with 0). */
947131ba 5102 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5103 return false;
5104
5105 if (!STMT_VINFO_DATA_REF (stmt_info))
5106 return false;
5107
f2e2a985 5108 if (!STMT_VINFO_STRIDED_P (stmt_info))
09dfa495 5109 {
f2e2a985
MM
5110 negative =
5111 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5112 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5113 size_zero_node) < 0;
5114 if (negative && ncopies > 1)
09dfa495
BM
5115 {
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f2e2a985 5118 "multiple types with negative step.\n");
09dfa495
BM
5119 return false;
5120 }
f2e2a985 5121 if (negative)
09dfa495 5122 {
f2e2a985
MM
5123 gcc_assert (!grouped_store);
5124 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5125 if (alignment_support_scheme != dr_aligned
5126 && alignment_support_scheme != dr_unaligned_supported)
5127 {
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5130 "negative step but alignment required.\n");
5131 return false;
5132 }
5133 if (dt != vect_constant_def
5134 && dt != vect_external_def
5135 && !perm_mask_for_reverse (vectype))
5136 {
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5139 "negative step and reversing not supported.\n");
5140 return false;
5141 }
09dfa495
BM
5142 }
5143 }
5144
0d0293ac 5145 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5146 {
0d0293ac 5147 grouped_store = true;
e14c1050 5148 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
cee62fee
MM
5149 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5150 if (!slp
5151 && !PURE_SLP_STMT (stmt_info)
5152 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5153 {
272c6793
RS
5154 if (vect_store_lanes_supported (vectype, group_size))
5155 store_lanes_p = true;
0d0293ac 5156 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5157 return false;
5158 }
b8698a0f 5159
cee62fee
MM
5160 if (STMT_VINFO_STRIDED_P (stmt_info)
5161 && (slp || PURE_SLP_STMT (stmt_info))
5162 && (group_size > nunits
5163 || nunits % group_size != 0))
5164 {
5165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5166 "unhandled strided group store\n");
5167 return false;
5168 }
5169
ebfd146a
IR
5170 if (first_stmt == stmt)
5171 {
5172 /* STMT is the leader of the group. Check the operands of all the
5173 stmts of the group. */
e14c1050 5174 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5175 while (next_stmt)
5176 {
5177 gcc_assert (gimple_assign_single_p (next_stmt));
5178 op = gimple_assign_rhs1 (next_stmt);
24ee1384
IR
5179 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5180 &def_stmt, &def, &dt))
ebfd146a 5181 {
73fbfcad 5182 if (dump_enabled_p ())
78c60e3d 5183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5184 "use not simple.\n");
ebfd146a
IR
5185 return false;
5186 }
e14c1050 5187 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5188 }
5189 }
5190 }
5191
5192 if (!vec_stmt) /* transformation not required. */
5193 {
5194 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5195 /* The SLP costs are calculated during SLP analysis. */
5196 if (!PURE_SLP_STMT (stmt_info))
5197 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5198 NULL, NULL, NULL);
ebfd146a
IR
5199 return true;
5200 }
5201
5202 /** Transform. **/
5203
c716e67f
XDL
5204 ensure_base_align (stmt_info, dr);
5205
0d0293ac 5206 if (grouped_store)
ebfd146a
IR
5207 {
5208 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5209 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5210
e14c1050 5211 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5212
5213 /* FORNOW */
a70d6342 5214 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5215
5216 /* We vectorize all the stmts of the interleaving group when we
5217 reach the last stmt in the group. */
e14c1050
IR
5218 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5219 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5220 && !slp)
5221 {
5222 *vec_stmt = NULL;
5223 return true;
5224 }
5225
5226 if (slp)
4b5caab7 5227 {
0d0293ac 5228 grouped_store = false;
4b5caab7
IR
5229 /* VEC_NUM is the number of vect stmts to be created for this
5230 group. */
5231 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5232 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4b5caab7 5233 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5234 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5235 }
ebfd146a 5236 else
4b5caab7
IR
5237 /* VEC_NUM is the number of vect stmts to be created for this
5238 group. */
ebfd146a
IR
5239 vec_num = group_size;
5240 }
b8698a0f 5241 else
ebfd146a
IR
5242 {
5243 first_stmt = stmt;
5244 first_dr = dr;
5245 group_size = vec_num = 1;
ebfd146a 5246 }
b8698a0f 5247
73fbfcad 5248 if (dump_enabled_p ())
78c60e3d 5249 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5250 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5251
f2e2a985
MM
5252 if (STMT_VINFO_STRIDED_P (stmt_info))
5253 {
5254 gimple_stmt_iterator incr_gsi;
5255 bool insert_after;
5256 gimple incr;
5257 tree offvar;
5258 tree ivstep;
5259 tree running_off;
5260 gimple_seq stmts = NULL;
5261 tree stride_base, stride_step, alias_off;
5262 tree vec_oprnd;
5263
5264 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5265
5266 stride_base
5267 = fold_build_pointer_plus
5268 (unshare_expr (DR_BASE_ADDRESS (dr)),
5269 size_binop (PLUS_EXPR,
5270 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5271 convert_to_ptrofftype (DR_INIT(dr))));
5272 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5273
5274 /* For a store with loop-invariant (but other than power-of-2)
5275 stride (i.e. not a grouped access) like so:
5276
5277 for (i = 0; i < n; i += stride)
5278 array[i] = ...;
5279
5280 we generate a new induction variable and new stores from
5281 the components of the (vectorized) rhs:
5282
5283 for (j = 0; ; j += VF*stride)
5284 vectemp = ...;
5285 tmp1 = vectemp[0];
5286 array[j] = tmp1;
5287 tmp2 = vectemp[1];
5288 array[j + stride] = tmp2;
5289 ...
5290 */
5291
cee62fee
MM
5292 unsigned nstores = nunits;
5293 tree ltype = elem_type;
5294 if (slp)
5295 {
5296 nstores = nunits / group_size;
5297 if (group_size < nunits)
5298 ltype = build_vector_type (elem_type, group_size);
5299 else
5300 ltype = vectype;
5301 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5302 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5303 }
5304
f2e2a985
MM
5305 ivstep = stride_step;
5306 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5307 build_int_cst (TREE_TYPE (ivstep),
cee62fee 5308 ncopies * nstores));
f2e2a985
MM
5309
5310 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5311
5312 create_iv (stride_base, ivstep, NULL,
5313 loop, &incr_gsi, insert_after,
5314 &offvar, NULL);
5315 incr = gsi_stmt (incr_gsi);
5316 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5317
5318 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5319 if (stmts)
5320 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5321
5322 prev_stmt_info = NULL;
5323 running_off = offvar;
5324 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5325 for (j = 0; j < ncopies; j++)
5326 {
5327 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5328 and first_stmt == stmt. */
5329 if (j == 0)
5330 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5331 else
5332 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5333
cee62fee 5334 for (i = 0; i < nstores; i++)
f2e2a985
MM
5335 {
5336 tree newref, newoff;
5337 gimple incr, assign;
cee62fee 5338 tree size = TYPE_SIZE (ltype);
f2e2a985
MM
5339 /* Extract the i'th component. */
5340 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5341 size);
cee62fee 5342 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
f2e2a985
MM
5343 size, pos);
5344
5345 elem = force_gimple_operand_gsi (gsi, elem, true,
5346 NULL_TREE, true,
5347 GSI_SAME_STMT);
5348
cee62fee 5349 newref = build2 (MEM_REF, ltype,
f2e2a985
MM
5350 running_off, alias_off);
5351
5352 /* And store it to *running_off. */
5353 assign = gimple_build_assign (newref, elem);
5354 vect_finish_stmt_generation (stmt, assign, gsi);
5355
5356 newoff = copy_ssa_name (running_off, NULL);
5357 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5358 running_off, stride_step);
5359 vect_finish_stmt_generation (stmt, incr, gsi);
5360
5361 running_off = newoff;
5362 if (j == 0 && i == i)
5363 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5364 else
5365 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5366 prev_stmt_info = vinfo_for_stmt (assign);
5367 }
5368 }
5369 return true;
5370 }
5371
9771b263
DN
5372 dr_chain.create (group_size);
5373 oprnds.create (group_size);
ebfd146a 5374
720f5239 5375 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5376 gcc_assert (alignment_support_scheme);
272c6793
RS
5377 /* Targets with store-lane instructions must not require explicit
5378 realignment. */
5379 gcc_assert (!store_lanes_p
5380 || alignment_support_scheme == dr_aligned
5381 || alignment_support_scheme == dr_unaligned_supported);
5382
09dfa495
BM
5383 if (negative)
5384 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5385
272c6793
RS
5386 if (store_lanes_p)
5387 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5388 else
5389 aggr_type = vectype;
ebfd146a
IR
5390
5391 /* In case the vectorization factor (VF) is bigger than the number
5392 of elements that we can fit in a vectype (nunits), we have to generate
5393 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5394 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5395 vect_get_vec_def_for_copy_stmt. */
5396
0d0293ac 5397 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5398
5399 S1: &base + 2 = x2
5400 S2: &base = x0
5401 S3: &base + 1 = x1
5402 S4: &base + 3 = x3
5403
5404 We create vectorized stores starting from base address (the access of the
5405 first stmt in the chain (S2 in the above example), when the last store stmt
5406 of the chain (S4) is reached:
5407
5408 VS1: &base = vx2
5409 VS2: &base + vec_size*1 = vx0
5410 VS3: &base + vec_size*2 = vx1
5411 VS4: &base + vec_size*3 = vx3
5412
5413 Then permutation statements are generated:
5414
3fcc1b55
JJ
5415 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5416 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5417 ...
b8698a0f 5418
ebfd146a
IR
5419 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5420 (the order of the data-refs in the output of vect_permute_store_chain
5421 corresponds to the order of scalar stmts in the interleaving chain - see
5422 the documentation of vect_permute_store_chain()).
5423
5424 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5425 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5426 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5427 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5428 */
5429
5430 prev_stmt_info = NULL;
5431 for (j = 0; j < ncopies; j++)
5432 {
5433 gimple new_stmt;
ebfd146a
IR
5434
5435 if (j == 0)
5436 {
5437 if (slp)
5438 {
5439 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5440 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5441 NULL, slp_node, -1);
ebfd146a 5442
9771b263 5443 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5444 }
5445 else
5446 {
b8698a0f
L
5447 /* For interleaved stores we collect vectorized defs for all the
5448 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5449 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5450 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5451
0d0293ac 5452 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5453 OPRNDS are of size 1. */
b8698a0f 5454 next_stmt = first_stmt;
ebfd146a
IR
5455 for (i = 0; i < group_size; i++)
5456 {
b8698a0f
L
5457 /* Since gaps are not supported for interleaved stores,
5458 GROUP_SIZE is the exact number of stmts in the chain.
5459 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5460 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5461 iteration of the loop will be executed. */
5462 gcc_assert (next_stmt
5463 && gimple_assign_single_p (next_stmt));
5464 op = gimple_assign_rhs1 (next_stmt);
5465
b8698a0f 5466 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 5467 NULL);
9771b263
DN
5468 dr_chain.quick_push (vec_oprnd);
5469 oprnds.quick_push (vec_oprnd);
e14c1050 5470 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5471 }
5472 }
5473
5474 /* We should have catched mismatched types earlier. */
5475 gcc_assert (useless_type_conversion_p (vectype,
5476 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5477 bool simd_lane_access_p
5478 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5479 if (simd_lane_access_p
5480 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5481 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5482 && integer_zerop (DR_OFFSET (first_dr))
5483 && integer_zerop (DR_INIT (first_dr))
5484 && alias_sets_conflict_p (get_alias_set (aggr_type),
5485 get_alias_set (DR_REF (first_dr))))
5486 {
5487 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5488 dataref_offset = build_int_cst (reference_alias_ptr_type
5489 (DR_REF (first_dr)), 0);
8928eff3 5490 inv_p = false;
74bf76ed
JJ
5491 }
5492 else
5493 dataref_ptr
5494 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5495 simd_lane_access_p ? loop : NULL,
09dfa495 5496 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5497 simd_lane_access_p, &inv_p);
a70d6342 5498 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5499 }
b8698a0f 5500 else
ebfd146a 5501 {
b8698a0f
L
5502 /* For interleaved stores we created vectorized defs for all the
5503 defs stored in OPRNDS in the previous iteration (previous copy).
5504 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5505 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5506 next copy.
0d0293ac 5507 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5508 OPRNDS are of size 1. */
5509 for (i = 0; i < group_size; i++)
5510 {
9771b263 5511 op = oprnds[i];
24ee1384
IR
5512 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5513 &def, &dt);
b8698a0f 5514 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5515 dr_chain[i] = vec_oprnd;
5516 oprnds[i] = vec_oprnd;
ebfd146a 5517 }
74bf76ed
JJ
5518 if (dataref_offset)
5519 dataref_offset
5520 = int_const_binop (PLUS_EXPR, dataref_offset,
5521 TYPE_SIZE_UNIT (aggr_type));
5522 else
5523 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5524 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5525 }
5526
272c6793 5527 if (store_lanes_p)
ebfd146a 5528 {
272c6793 5529 tree vec_array;
267d3070 5530
272c6793
RS
5531 /* Combine all the vectors into an array. */
5532 vec_array = create_vector_array (vectype, vec_num);
5533 for (i = 0; i < vec_num; i++)
c2d7ab2a 5534 {
9771b263 5535 vec_oprnd = dr_chain[i];
272c6793 5536 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5537 }
b8698a0f 5538
272c6793
RS
5539 /* Emit:
5540 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5541 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5542 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5543 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5545 }
5546 else
5547 {
5548 new_stmt = NULL;
0d0293ac 5549 if (grouped_store)
272c6793 5550 {
b6b9227d
JJ
5551 if (j == 0)
5552 result_chain.create (group_size);
272c6793
RS
5553 /* Permute. */
5554 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5555 &result_chain);
5556 }
c2d7ab2a 5557
272c6793
RS
5558 next_stmt = first_stmt;
5559 for (i = 0; i < vec_num; i++)
5560 {
644ffefd 5561 unsigned align, misalign;
272c6793
RS
5562
5563 if (i > 0)
5564 /* Bump the vector pointer. */
5565 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5566 stmt, NULL_TREE);
5567
5568 if (slp)
9771b263 5569 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5570 else if (grouped_store)
5571 /* For grouped stores vectorized defs are interleaved in
272c6793 5572 vect_permute_store_chain(). */
9771b263 5573 vec_oprnd = result_chain[i];
272c6793
RS
5574
5575 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
74bf76ed
JJ
5576 dataref_offset
5577 ? dataref_offset
5578 : build_int_cst (reference_alias_ptr_type
5579 (DR_REF (first_dr)), 0));
644ffefd 5580 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5581 if (aligned_access_p (first_dr))
644ffefd 5582 misalign = 0;
272c6793
RS
5583 else if (DR_MISALIGNMENT (first_dr) == -1)
5584 {
5585 TREE_TYPE (data_ref)
5586 = build_aligned_type (TREE_TYPE (data_ref),
5587 TYPE_ALIGN (elem_type));
644ffefd
MJ
5588 align = TYPE_ALIGN_UNIT (elem_type);
5589 misalign = 0;
272c6793
RS
5590 }
5591 else
5592 {
5593 TREE_TYPE (data_ref)
5594 = build_aligned_type (TREE_TYPE (data_ref),
5595 TYPE_ALIGN (elem_type));
644ffefd 5596 misalign = DR_MISALIGNMENT (first_dr);
272c6793 5597 }
74bf76ed
JJ
5598 if (dataref_offset == NULL_TREE)
5599 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5600 misalign);
c2d7ab2a 5601
f234d260
BM
5602 if (negative
5603 && dt != vect_constant_def
5604 && dt != vect_external_def)
09dfa495
BM
5605 {
5606 tree perm_mask = perm_mask_for_reverse (vectype);
5607 tree perm_dest
5608 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5609 vectype);
b731b390 5610 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
5611
5612 /* Generate the permute statement. */
5613 gimple perm_stmt
0d0e4a03
JJ
5614 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5615 vec_oprnd, perm_mask);
09dfa495
BM
5616 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5617
5618 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5619 vec_oprnd = new_temp;
5620 }
5621
272c6793
RS
5622 /* Arguments are ready. Create the new vector stmt. */
5623 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5624 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5625
5626 if (slp)
5627 continue;
5628
e14c1050 5629 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
5630 if (!next_stmt)
5631 break;
5632 }
ebfd146a 5633 }
1da0876c
RS
5634 if (!slp)
5635 {
5636 if (j == 0)
5637 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5638 else
5639 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5640 prev_stmt_info = vinfo_for_stmt (new_stmt);
5641 }
ebfd146a
IR
5642 }
5643
9771b263
DN
5644 dr_chain.release ();
5645 oprnds.release ();
5646 result_chain.release ();
5647 vec_oprnds.release ();
ebfd146a
IR
5648
5649 return true;
5650}
5651
557be5a8
AL
5652/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5653 VECTOR_CST mask. No checks are made that the target platform supports the
5654 mask, so callers may wish to test can_vec_perm_p separately, or use
5655 vect_gen_perm_mask_checked. */
a1e53f3f 5656
3fcc1b55 5657tree
557be5a8 5658vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 5659{
d2a12ae7 5660 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 5661 int i, nunits;
a1e53f3f 5662
22e4dee7 5663 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 5664
96f9265a
RG
5665 mask_elt_type = lang_hooks.types.type_for_mode
5666 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 5667 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 5668
d2a12ae7 5669 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 5670 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
5671 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5672 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 5673
2635892a 5674 return mask_vec;
a1e53f3f
L
5675}
5676
cf7aa6a3
AL
5677/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5678 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
5679
5680tree
5681vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5682{
5683 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5684 return vect_gen_perm_mask_any (vectype, sel);
5685}
5686
aec7ae7d
JJ
5687/* Given a vector variable X and Y, that was generated for the scalar
5688 STMT, generate instructions to permute the vector elements of X and Y
5689 using permutation mask MASK_VEC, insert them at *GSI and return the
5690 permuted vector variable. */
a1e53f3f
L
5691
5692static tree
aec7ae7d
JJ
5693permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5694 gimple_stmt_iterator *gsi)
a1e53f3f
L
5695{
5696 tree vectype = TREE_TYPE (x);
aec7ae7d 5697 tree perm_dest, data_ref;
a1e53f3f
L
5698 gimple perm_stmt;
5699
acdcd61b 5700 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 5701 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
5702
5703 /* Generate the permute statement. */
0d0e4a03 5704 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
5705 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5706
5707 return data_ref;
5708}
5709
6b916b36
RB
5710/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5711 inserting them on the loops preheader edge. Returns true if we
5712 were successful in doing so (and thus STMT can be moved then),
5713 otherwise returns false. */
5714
5715static bool
5716hoist_defs_of_uses (gimple stmt, struct loop *loop)
5717{
5718 ssa_op_iter i;
5719 tree op;
5720 bool any = false;
5721
5722 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5723 {
5724 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5725 if (!gimple_nop_p (def_stmt)
5726 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5727 {
5728 /* Make sure we don't need to recurse. While we could do
5729 so in simple cases when there are more complex use webs
5730 we don't have an easy way to preserve stmt order to fulfil
5731 dependencies within them. */
5732 tree op2;
5733 ssa_op_iter i2;
d1417442
JJ
5734 if (gimple_code (def_stmt) == GIMPLE_PHI)
5735 return false;
6b916b36
RB
5736 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5737 {
5738 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5739 if (!gimple_nop_p (def_stmt2)
5740 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5741 return false;
5742 }
5743 any = true;
5744 }
5745 }
5746
5747 if (!any)
5748 return true;
5749
5750 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5751 {
5752 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5753 if (!gimple_nop_p (def_stmt)
5754 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5755 {
5756 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5757 gsi_remove (&gsi, false);
5758 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5759 }
5760 }
5761
5762 return true;
5763}
5764
ebfd146a
IR
5765/* vectorizable_load.
5766
b8698a0f
L
5767 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5768 can be vectorized.
5769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5770 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5772
5773static bool
5774vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
c716e67f 5775 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
5776{
5777 tree scalar_dest;
5778 tree vec_dest = NULL;
5779 tree data_ref = NULL;
5780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 5781 stmt_vec_info prev_stmt_info;
ebfd146a 5782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5783 struct loop *loop = NULL;
ebfd146a 5784 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 5785 bool nested_in_vect_loop = false;
c716e67f 5786 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
ebfd146a 5787 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
272c6793 5788 tree elem_type;
ebfd146a 5789 tree new_temp;
ef4bddc2 5790 machine_mode mode;
ebfd146a
IR
5791 gimple new_stmt = NULL;
5792 tree dummy;
5793 enum dr_alignment_support alignment_support_scheme;
5794 tree dataref_ptr = NULL_TREE;
74bf76ed 5795 tree dataref_offset = NULL_TREE;
fef4d2b3 5796 gimple ptr_incr = NULL;
ebfd146a
IR
5797 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5798 int ncopies;
9b999e8c 5799 int i, j, group_size = -1, group_gap_adj;
ebfd146a
IR
5800 tree msq = NULL_TREE, lsq;
5801 tree offset = NULL_TREE;
356bbc4c 5802 tree byte_offset = NULL_TREE;
ebfd146a 5803 tree realignment_token = NULL_TREE;
538dd0b7 5804 gphi *phi = NULL;
6e1aa848 5805 vec<tree> dr_chain = vNULL;
0d0293ac 5806 bool grouped_load = false;
272c6793 5807 bool load_lanes_p = false;
ebfd146a 5808 gimple first_stmt;
ebfd146a 5809 bool inv_p;
319e6439 5810 bool negative = false;
ebfd146a
IR
5811 bool compute_in_loop = false;
5812 struct loop *at_loop;
5813 int vec_num;
5814 bool slp = (slp_node != NULL);
5815 bool slp_perm = false;
5816 enum tree_code code;
a70d6342
IR
5817 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5818 int vf;
272c6793 5819 tree aggr_type;
aec7ae7d
JJ
5820 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5821 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5822 int gather_scale = 1;
5823 enum vect_def_type gather_dt = vect_unknown_def_type;
a70d6342
IR
5824
5825 if (loop_vinfo)
5826 {
5827 loop = LOOP_VINFO_LOOP (loop_vinfo);
5828 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5829 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5830 }
5831 else
3533e503 5832 vf = 1;
ebfd146a
IR
5833
5834 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5835 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 5836 case of SLP. */
437f4a00 5837 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
5838 ncopies = 1;
5839 else
5840 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5841
5842 gcc_assert (ncopies >= 1);
5843
5844 /* FORNOW. This restriction should be relaxed. */
5845 if (nested_in_vect_loop && ncopies > 1)
5846 {
73fbfcad 5847 if (dump_enabled_p ())
78c60e3d 5848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5849 "multiple types in nested loop.\n");
ebfd146a
IR
5850 return false;
5851 }
5852
f2556b68
RB
5853 /* Invalidate assumptions made by dependence analysis when vectorization
5854 on the unrolled body effectively re-orders stmts. */
5855 if (ncopies > 1
5856 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5857 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5858 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5859 {
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "cannot perform implicit CSE when unrolling "
5863 "with negative dependence distance\n");
5864 return false;
5865 }
5866
a70d6342 5867 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5868 return false;
5869
8644a673 5870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
5871 return false;
5872
5873 /* Is vectorizable load? */
5874 if (!is_gimple_assign (stmt))
5875 return false;
5876
5877 scalar_dest = gimple_assign_lhs (stmt);
5878 if (TREE_CODE (scalar_dest) != SSA_NAME)
5879 return false;
5880
5881 code = gimple_assign_rhs_code (stmt);
5882 if (code != ARRAY_REF
38000232 5883 && code != BIT_FIELD_REF
ebfd146a 5884 && code != INDIRECT_REF
e9dbe7bb
IR
5885 && code != COMPONENT_REF
5886 && code != IMAGPART_EXPR
70f34814 5887 && code != REALPART_EXPR
42373e0b
RG
5888 && code != MEM_REF
5889 && TREE_CODE_CLASS (code) != tcc_declaration)
ebfd146a
IR
5890 return false;
5891
5892 if (!STMT_VINFO_DATA_REF (stmt_info))
5893 return false;
5894
7b7b1813 5895 elem_type = TREE_TYPE (vectype);
947131ba 5896 mode = TYPE_MODE (vectype);
ebfd146a
IR
5897
5898 /* FORNOW. In some cases can vectorize even if data-type not supported
5899 (e.g. - data copies). */
947131ba 5900 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 5901 {
73fbfcad 5902 if (dump_enabled_p ())
78c60e3d 5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5904 "Aligned load, but unsupported type.\n");
ebfd146a
IR
5905 return false;
5906 }
5907
ebfd146a 5908 /* Check if the load is a part of an interleaving chain. */
0d0293ac 5909 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5910 {
0d0293ac 5911 grouped_load = true;
ebfd146a 5912 /* FORNOW */
aec7ae7d 5913 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
ebfd146a 5914
e14c1050 5915 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d5f035ea
RB
5916
5917 /* If this is single-element interleaving with an element distance
5918 that leaves unused vector loads around punt - we at least create
5919 very sub-optimal code in that case (and blow up memory,
5920 see PR65518). */
5921 if (first_stmt == stmt
5922 && !GROUP_NEXT_ELEMENT (stmt_info)
5923 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5924 {
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5927 "single-element interleaving not supported "
5928 "for not adjacent vector loads\n");
5929 return false;
5930 }
5931
b1af7da6
RB
5932 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5933 slp_perm = true;
5934
7b5fc413
RB
5935 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5936 if (!slp
5937 && !PURE_SLP_STMT (stmt_info)
f2e2a985 5938 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5939 {
272c6793
RS
5940 if (vect_load_lanes_supported (vectype, group_size))
5941 load_lanes_p = true;
0d0293ac 5942 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
5943 return false;
5944 }
f2556b68
RB
5945
5946 /* Invalidate assumptions made by dependence analysis when vectorization
5947 on the unrolled body effectively re-orders stmts. */
5948 if (!PURE_SLP_STMT (stmt_info)
5949 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5950 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5951 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5952 {
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955 "cannot perform implicit CSE when performing "
5956 "group loads with negative dependence distance\n");
5957 return false;
5958 }
96bb56b2
RB
5959
5960 /* Similarly when the stmt is a load that is both part of a SLP
5961 instance and a loop vectorized stmt via the same-dr mechanism
5962 we have to give up. */
5963 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5964 && (STMT_SLP_TYPE (stmt_info)
5965 != STMT_SLP_TYPE (vinfo_for_stmt
5966 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5967 {
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "conflicting SLP types for CSEd load\n");
5971 return false;
5972 }
ebfd146a
IR
5973 }
5974
a1e53f3f 5975
aec7ae7d
JJ
5976 if (STMT_VINFO_GATHER_P (stmt_info))
5977 {
5978 gimple def_stmt;
5979 tree def;
5980 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5981 &gather_off, &gather_scale);
5982 gcc_assert (gather_decl);
24ee1384 5983 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
aec7ae7d
JJ
5984 &def_stmt, &def, &gather_dt,
5985 &gather_off_vectype))
5986 {
73fbfcad 5987 if (dump_enabled_p ())
78c60e3d 5988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5989 "gather index use not simple.\n");
aec7ae7d
JJ
5990 return false;
5991 }
5992 }
f2e2a985 5993 else if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
5994 {
5995 if ((grouped_load
5996 && (slp || PURE_SLP_STMT (stmt_info)))
5997 && (group_size > nunits
5998 || nunits % group_size != 0
b1af7da6 5999 /* We don't support load permutations. */
7b5fc413
RB
6000 || slp_perm))
6001 {
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "unhandled strided group load\n");
6004 return false;
6005 }
6006 }
319e6439
RG
6007 else
6008 {
6009 negative = tree_int_cst_compare (nested_in_vect_loop
6010 ? STMT_VINFO_DR_STEP (stmt_info)
6011 : DR_STEP (dr),
6012 size_zero_node) < 0;
6013 if (negative && ncopies > 1)
6014 {
73fbfcad 6015 if (dump_enabled_p ())
78c60e3d 6016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6017 "multiple types with negative step.\n");
319e6439
RG
6018 return false;
6019 }
6020
6021 if (negative)
6022 {
08940f33
RB
6023 if (grouped_load)
6024 {
6025 if (dump_enabled_p ())
6026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6027 "negative step for group load not supported"
6028 "\n");
08940f33
RB
6029 return false;
6030 }
319e6439
RG
6031 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6032 if (alignment_support_scheme != dr_aligned
6033 && alignment_support_scheme != dr_unaligned_supported)
6034 {
73fbfcad 6035 if (dump_enabled_p ())
78c60e3d 6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6037 "negative step but alignment required.\n");
319e6439
RG
6038 return false;
6039 }
6040 if (!perm_mask_for_reverse (vectype))
6041 {
73fbfcad 6042 if (dump_enabled_p ())
78c60e3d 6043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6044 "negative step and reversing not supported."
6045 "\n");
319e6439
RG
6046 return false;
6047 }
6048 }
7d75abc8 6049 }
aec7ae7d 6050
ebfd146a
IR
6051 if (!vec_stmt) /* transformation not required. */
6052 {
6053 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6054 /* The SLP costs are calculated during SLP analysis. */
6055 if (!PURE_SLP_STMT (stmt_info))
6056 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6057 NULL, NULL, NULL);
ebfd146a
IR
6058 return true;
6059 }
6060
73fbfcad 6061 if (dump_enabled_p ())
78c60e3d 6062 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6063 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
6064
6065 /** Transform. **/
6066
c716e67f
XDL
6067 ensure_base_align (stmt_info, dr);
6068
aec7ae7d
JJ
6069 if (STMT_VINFO_GATHER_P (stmt_info))
6070 {
6071 tree vec_oprnd0 = NULL_TREE, op;
6072 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6073 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6074 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6075 edge pe = loop_preheader_edge (loop);
6076 gimple_seq seq;
6077 basic_block new_bb;
6078 enum { NARROW, NONE, WIDEN } modifier;
6079 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6080
6081 if (nunits == gather_off_nunits)
6082 modifier = NONE;
6083 else if (nunits == gather_off_nunits / 2)
6084 {
6085 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6086 modifier = WIDEN;
6087
6088 for (i = 0; i < gather_off_nunits; ++i)
6089 sel[i] = i | nunits;
6090
557be5a8 6091 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
6092 }
6093 else if (nunits == gather_off_nunits * 2)
6094 {
6095 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6096 modifier = NARROW;
6097
6098 for (i = 0; i < nunits; ++i)
6099 sel[i] = i < gather_off_nunits
6100 ? i : i + nunits - gather_off_nunits;
6101
557be5a8 6102 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
6103 ncopies *= 2;
6104 }
6105 else
6106 gcc_unreachable ();
6107
6108 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6109 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6110 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6111 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6112 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6113 scaletype = TREE_VALUE (arglist);
d3c2fee0 6114 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6115
6116 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6117
6118 ptr = fold_convert (ptrtype, gather_base);
6119 if (!is_gimple_min_invariant (ptr))
6120 {
6121 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6122 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6123 gcc_assert (!new_bb);
6124 }
6125
6126 /* Currently we support only unconditional gather loads,
6127 so mask should be all ones. */
d3c2fee0
AI
6128 if (TREE_CODE (masktype) == INTEGER_TYPE)
6129 mask = build_int_cst (masktype, -1);
6130 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6131 {
6132 mask = build_int_cst (TREE_TYPE (masktype), -1);
6133 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6134 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6135 }
aec7ae7d
JJ
6136 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6137 {
6138 REAL_VALUE_TYPE r;
6139 long tmp[6];
6140 for (j = 0; j < 6; ++j)
6141 tmp[j] = -1;
6142 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6143 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6144 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6145 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6146 }
6147 else
6148 gcc_unreachable ();
aec7ae7d
JJ
6149
6150 scale = build_int_cst (scaletype, gather_scale);
6151
d3c2fee0
AI
6152 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6153 merge = build_int_cst (TREE_TYPE (rettype), 0);
6154 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6155 {
6156 REAL_VALUE_TYPE r;
6157 long tmp[6];
6158 for (j = 0; j < 6; ++j)
6159 tmp[j] = 0;
6160 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6161 merge = build_real (TREE_TYPE (rettype), r);
6162 }
6163 else
6164 gcc_unreachable ();
6165 merge = build_vector_from_val (rettype, merge);
6166 merge = vect_init_vector (stmt, merge, rettype, NULL);
6167
aec7ae7d
JJ
6168 prev_stmt_info = NULL;
6169 for (j = 0; j < ncopies; ++j)
6170 {
6171 if (modifier == WIDEN && (j & 1))
6172 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6173 perm_mask, stmt, gsi);
6174 else if (j == 0)
6175 op = vec_oprnd0
6176 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6177 else
6178 op = vec_oprnd0
6179 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6180
6181 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6182 {
6183 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6184 == TYPE_VECTOR_SUBPARTS (idxtype));
6185 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
b731b390 6186 var = make_ssa_name (var);
aec7ae7d
JJ
6187 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6188 new_stmt
0d0e4a03 6189 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6191 op = var;
6192 }
6193
6194 new_stmt
d3c2fee0 6195 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6196
6197 if (!useless_type_conversion_p (vectype, rettype))
6198 {
6199 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6200 == TYPE_VECTOR_SUBPARTS (rettype));
6201 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
aec7ae7d
JJ
6202 op = make_ssa_name (var, new_stmt);
6203 gimple_call_set_lhs (new_stmt, op);
6204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6205 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6206 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6207 new_stmt
0d0e4a03 6208 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6209 }
6210 else
6211 {
6212 var = make_ssa_name (vec_dest, new_stmt);
6213 gimple_call_set_lhs (new_stmt, var);
6214 }
6215
6216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6217
6218 if (modifier == NARROW)
6219 {
6220 if ((j & 1) == 0)
6221 {
6222 prev_res = var;
6223 continue;
6224 }
6225 var = permute_vec_elements (prev_res, var,
6226 perm_mask, stmt, gsi);
6227 new_stmt = SSA_NAME_DEF_STMT (var);
6228 }
6229
6230 if (prev_stmt_info == NULL)
6231 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6232 else
6233 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6234 prev_stmt_info = vinfo_for_stmt (new_stmt);
6235 }
6236 return true;
6237 }
f2e2a985 6238 else if (STMT_VINFO_STRIDED_P (stmt_info))
7d75abc8
MM
6239 {
6240 gimple_stmt_iterator incr_gsi;
6241 bool insert_after;
6242 gimple incr;
6243 tree offvar;
7d75abc8
MM
6244 tree ivstep;
6245 tree running_off;
9771b263 6246 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6247 gimple_seq stmts = NULL;
14ac6aa2
RB
6248 tree stride_base, stride_step, alias_off;
6249
6250 gcc_assert (!nested_in_vect_loop);
7d75abc8 6251
14ac6aa2
RB
6252 stride_base
6253 = fold_build_pointer_plus
6254 (unshare_expr (DR_BASE_ADDRESS (dr)),
6255 size_binop (PLUS_EXPR,
6256 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
c3284718 6257 convert_to_ptrofftype (DR_INIT (dr))));
14ac6aa2 6258 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7d75abc8
MM
6259
6260 /* For a load with loop-invariant (but other than power-of-2)
6261 stride (i.e. not a grouped access) like so:
6262
6263 for (i = 0; i < n; i += stride)
6264 ... = array[i];
6265
6266 we generate a new induction variable and new accesses to
6267 form a new vector (or vectors, depending on ncopies):
6268
6269 for (j = 0; ; j += VF*stride)
6270 tmp1 = array[j];
6271 tmp2 = array[j + stride];
6272 ...
6273 vectemp = {tmp1, tmp2, ...}
6274 */
6275
6276 ivstep = stride_step;
6277 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6278 build_int_cst (TREE_TYPE (ivstep), vf));
6279
6280 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6281
6282 create_iv (stride_base, ivstep, NULL,
6283 loop, &incr_gsi, insert_after,
6284 &offvar, NULL);
6285 incr = gsi_stmt (incr_gsi);
6286 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6287
6288 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6289 if (stmts)
6290 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6291
6292 prev_stmt_info = NULL;
6293 running_off = offvar;
14ac6aa2 6294 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7b5fc413
RB
6295 int nloads = nunits;
6296 tree ltype = TREE_TYPE (vectype);
6297 if (slp)
6298 {
6299 nloads = nunits / group_size;
6300 if (group_size < nunits)
6301 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6302 else
6303 ltype = vectype;
6304 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6305 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6306 gcc_assert (!slp_perm);
6307 }
7d75abc8
MM
6308 for (j = 0; j < ncopies; j++)
6309 {
6310 tree vec_inv;
6311
7b5fc413
RB
6312 if (nloads > 1)
6313 {
6314 vec_alloc (v, nloads);
6315 for (i = 0; i < nloads; i++)
6316 {
6317 tree newref, newoff;
6318 gimple incr;
6319 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6320
6321 newref = force_gimple_operand_gsi (gsi, newref, true,
6322 NULL_TREE, true,
6323 GSI_SAME_STMT);
6324 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6325 newoff = copy_ssa_name (running_off);
6326 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6327 running_off, stride_step);
6328 vect_finish_stmt_generation (stmt, incr, gsi);
6329
6330 running_off = newoff;
6331 }
6332
6333 vec_inv = build_constructor (vectype, v);
6334 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6335 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6336 }
6337 else
7d75abc8 6338 {
7b5fc413
RB
6339 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6340 build2 (MEM_REF, ltype,
6341 running_off, alias_off));
6342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6343
6344 tree newoff = copy_ssa_name (running_off);
6345 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
0d0e4a03 6346 running_off, stride_step);
7d75abc8
MM
6347 vect_finish_stmt_generation (stmt, incr, gsi);
6348
6349 running_off = newoff;
6350 }
6351
7b5fc413
RB
6352 if (slp)
6353 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7d75abc8
MM
6354 if (j == 0)
6355 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6356 else
6357 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6358 prev_stmt_info = vinfo_for_stmt (new_stmt);
6359 }
6360 return true;
6361 }
aec7ae7d 6362
0d0293ac 6363 if (grouped_load)
ebfd146a 6364 {
e14c1050 6365 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6aa904c4 6366 if (slp
01d8bf07 6367 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
9771b263
DN
6368 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6369 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6370
ebfd146a 6371 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6372 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6373 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6374 ??? But we can only do so if there is exactly one
6375 as we have no way to get at the rest. Leave the CSE
6376 opportunity alone.
6377 ??? With the group load eventually participating
6378 in multiple different permutations (having multiple
6379 slp nodes which refer to the same group) the CSE
6380 is even wrong code. See PR56270. */
6381 && !slp)
ebfd146a
IR
6382 {
6383 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6384 return true;
6385 }
6386 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6387 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
9b999e8c 6388 group_gap_adj = 0;
ebfd146a
IR
6389
6390 /* VEC_NUM is the number of vect stmts to be created for this group. */
6391 if (slp)
6392 {
0d0293ac 6393 grouped_load = false;
ebfd146a 6394 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9b999e8c 6395 group_gap_adj = vf * group_size - nunits * vec_num;
a70d6342 6396 }
ebfd146a 6397 else
9b999e8c 6398 vec_num = group_size;
ebfd146a
IR
6399 }
6400 else
6401 {
6402 first_stmt = stmt;
6403 first_dr = dr;
6404 group_size = vec_num = 1;
9b999e8c 6405 group_gap_adj = 0;
ebfd146a
IR
6406 }
6407
720f5239 6408 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6409 gcc_assert (alignment_support_scheme);
272c6793
RS
6410 /* Targets with load-lane instructions must not require explicit
6411 realignment. */
6412 gcc_assert (!load_lanes_p
6413 || alignment_support_scheme == dr_aligned
6414 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6415
6416 /* In case the vectorization factor (VF) is bigger than the number
6417 of elements that we can fit in a vectype (nunits), we have to generate
6418 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6419 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6420 from one copy of the vector stmt to the next, in the field
ff802fa1 6421 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6422 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6423 stmts that use the defs of the current stmt. The example below
6424 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6425 need to create 4 vectorized stmts):
ebfd146a
IR
6426
6427 before vectorization:
6428 RELATED_STMT VEC_STMT
6429 S1: x = memref - -
6430 S2: z = x + 1 - -
6431
6432 step 1: vectorize stmt S1:
6433 We first create the vector stmt VS1_0, and, as usual, record a
6434 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6435 Next, we create the vector stmt VS1_1, and record a pointer to
6436 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6437 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6438 stmts and pointers:
6439 RELATED_STMT VEC_STMT
6440 VS1_0: vx0 = memref0 VS1_1 -
6441 VS1_1: vx1 = memref1 VS1_2 -
6442 VS1_2: vx2 = memref2 VS1_3 -
6443 VS1_3: vx3 = memref3 - -
6444 S1: x = load - VS1_0
6445 S2: z = x + 1 - -
6446
b8698a0f
L
6447 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6448 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6449 stmt S2. */
6450
0d0293ac 6451 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6452
6453 S1: x2 = &base + 2
6454 S2: x0 = &base
6455 S3: x1 = &base + 1
6456 S4: x3 = &base + 3
6457
b8698a0f 6458 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6459 starting from the access of the first stmt of the chain:
6460
6461 VS1: vx0 = &base
6462 VS2: vx1 = &base + vec_size*1
6463 VS3: vx3 = &base + vec_size*2
6464 VS4: vx4 = &base + vec_size*3
6465
6466 Then permutation statements are generated:
6467
e2c83630
RH
6468 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6469 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6470 ...
6471
6472 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6473 (the order of the data-refs in the output of vect_permute_load_chain
6474 corresponds to the order of scalar stmts in the interleaving chain - see
6475 the documentation of vect_permute_load_chain()).
6476 The generation of permutation stmts and recording them in
0d0293ac 6477 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6478
b8698a0f 6479 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6480 permutation stmts above are created for every copy. The result vector
6481 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6482 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6483
6484 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6485 on a target that supports unaligned accesses (dr_unaligned_supported)
6486 we generate the following code:
6487 p = initial_addr;
6488 indx = 0;
6489 loop {
6490 p = p + indx * vectype_size;
6491 vec_dest = *(p);
6492 indx = indx + 1;
6493 }
6494
6495 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6496 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6497 then generate the following code, in which the data in each iteration is
6498 obtained by two vector loads, one from the previous iteration, and one
6499 from the current iteration:
6500 p1 = initial_addr;
6501 msq_init = *(floor(p1))
6502 p2 = initial_addr + VS - 1;
6503 realignment_token = call target_builtin;
6504 indx = 0;
6505 loop {
6506 p2 = p2 + indx * vectype_size
6507 lsq = *(floor(p2))
6508 vec_dest = realign_load (msq, lsq, realignment_token)
6509 indx = indx + 1;
6510 msq = lsq;
6511 } */
6512
6513 /* If the misalignment remains the same throughout the execution of the
6514 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6515 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6516 This can only occur when vectorizing memory accesses in the inner-loop
6517 nested within an outer-loop that is being vectorized. */
6518
d1e4b493 6519 if (nested_in_vect_loop
211bea38 6520 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
6521 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6522 {
6523 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6524 compute_in_loop = true;
6525 }
6526
6527 if ((alignment_support_scheme == dr_explicit_realign_optimized
6528 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 6529 && !compute_in_loop)
ebfd146a
IR
6530 {
6531 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6532 alignment_support_scheme, NULL_TREE,
6533 &at_loop);
6534 if (alignment_support_scheme == dr_explicit_realign_optimized)
6535 {
538dd0b7 6536 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
6537 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6538 size_one_node);
ebfd146a
IR
6539 }
6540 }
6541 else
6542 at_loop = loop;
6543
a1e53f3f
L
6544 if (negative)
6545 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6546
272c6793
RS
6547 if (load_lanes_p)
6548 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6549 else
6550 aggr_type = vectype;
6551
ebfd146a
IR
6552 prev_stmt_info = NULL;
6553 for (j = 0; j < ncopies; j++)
b8698a0f 6554 {
272c6793 6555 /* 1. Create the vector or array pointer update chain. */
ebfd146a 6556 if (j == 0)
74bf76ed
JJ
6557 {
6558 bool simd_lane_access_p
6559 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6560 if (simd_lane_access_p
6561 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6562 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6563 && integer_zerop (DR_OFFSET (first_dr))
6564 && integer_zerop (DR_INIT (first_dr))
6565 && alias_sets_conflict_p (get_alias_set (aggr_type),
6566 get_alias_set (DR_REF (first_dr)))
6567 && (alignment_support_scheme == dr_aligned
6568 || alignment_support_scheme == dr_unaligned_supported))
6569 {
6570 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6571 dataref_offset = build_int_cst (reference_alias_ptr_type
6572 (DR_REF (first_dr)), 0);
8928eff3 6573 inv_p = false;
74bf76ed
JJ
6574 }
6575 else
6576 dataref_ptr
6577 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6578 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
6579 simd_lane_access_p, &inv_p,
6580 byte_offset);
74bf76ed
JJ
6581 }
6582 else if (dataref_offset)
6583 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6584 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6585 else
272c6793
RS
6586 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6587 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 6588
0d0293ac 6589 if (grouped_load || slp_perm)
9771b263 6590 dr_chain.create (vec_num);
5ce1ee7f 6591
272c6793 6592 if (load_lanes_p)
ebfd146a 6593 {
272c6793
RS
6594 tree vec_array;
6595
6596 vec_array = create_vector_array (vectype, vec_num);
6597
6598 /* Emit:
6599 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6600 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6601 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6602 gimple_call_set_lhs (new_stmt, vec_array);
6603 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 6604
272c6793
RS
6605 /* Extract each vector into an SSA_NAME. */
6606 for (i = 0; i < vec_num; i++)
ebfd146a 6607 {
272c6793
RS
6608 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6609 vec_array, i);
9771b263 6610 dr_chain.quick_push (new_temp);
272c6793
RS
6611 }
6612
6613 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 6614 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
6615 }
6616 else
6617 {
6618 for (i = 0; i < vec_num; i++)
6619 {
6620 if (i > 0)
6621 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6622 stmt, NULL_TREE);
6623
6624 /* 2. Create the vector-load in the loop. */
6625 switch (alignment_support_scheme)
6626 {
6627 case dr_aligned:
6628 case dr_unaligned_supported:
be1ac4ec 6629 {
644ffefd
MJ
6630 unsigned int align, misalign;
6631
272c6793
RS
6632 data_ref
6633 = build2 (MEM_REF, vectype, dataref_ptr,
74bf76ed
JJ
6634 dataref_offset
6635 ? dataref_offset
6636 : build_int_cst (reference_alias_ptr_type
6637 (DR_REF (first_dr)), 0));
644ffefd 6638 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
6639 if (alignment_support_scheme == dr_aligned)
6640 {
6641 gcc_assert (aligned_access_p (first_dr));
644ffefd 6642 misalign = 0;
272c6793
RS
6643 }
6644 else if (DR_MISALIGNMENT (first_dr) == -1)
6645 {
6646 TREE_TYPE (data_ref)
6647 = build_aligned_type (TREE_TYPE (data_ref),
6648 TYPE_ALIGN (elem_type));
644ffefd
MJ
6649 align = TYPE_ALIGN_UNIT (elem_type);
6650 misalign = 0;
272c6793
RS
6651 }
6652 else
6653 {
6654 TREE_TYPE (data_ref)
6655 = build_aligned_type (TREE_TYPE (data_ref),
6656 TYPE_ALIGN (elem_type));
644ffefd 6657 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6658 }
74bf76ed
JJ
6659 if (dataref_offset == NULL_TREE)
6660 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6661 align, misalign);
272c6793 6662 break;
be1ac4ec 6663 }
272c6793 6664 case dr_explicit_realign:
267d3070 6665 {
272c6793 6666 tree ptr, bump;
272c6793 6667
d88981fc 6668 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
6669
6670 if (compute_in_loop)
6671 msq = vect_setup_realignment (first_stmt, gsi,
6672 &realignment_token,
6673 dr_explicit_realign,
6674 dataref_ptr, NULL);
6675
b731b390 6676 ptr = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6677 new_stmt = gimple_build_assign
6678 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6679 build_int_cst
6680 (TREE_TYPE (dataref_ptr),
6681 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6683 data_ref
6684 = build2 (MEM_REF, vectype, ptr,
6685 build_int_cst (reference_alias_ptr_type
6686 (DR_REF (first_dr)), 0));
6687 vec_dest = vect_create_destination_var (scalar_dest,
6688 vectype);
6689 new_stmt = gimple_build_assign (vec_dest, data_ref);
6690 new_temp = make_ssa_name (vec_dest, new_stmt);
6691 gimple_assign_set_lhs (new_stmt, new_temp);
6692 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6693 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6694 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6695 msq = new_temp;
6696
d88981fc 6697 bump = size_binop (MULT_EXPR, vs,
7b7b1813 6698 TYPE_SIZE_UNIT (elem_type));
d88981fc 6699 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 6700 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
6701 new_stmt = gimple_build_assign
6702 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
6703 build_int_cst
6704 (TREE_TYPE (ptr),
6705 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
070ecdfd 6706 ptr = copy_ssa_name (dataref_ptr, new_stmt);
272c6793
RS
6707 gimple_assign_set_lhs (new_stmt, ptr);
6708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6709 data_ref
6710 = build2 (MEM_REF, vectype, ptr,
6711 build_int_cst (reference_alias_ptr_type
6712 (DR_REF (first_dr)), 0));
6713 break;
267d3070 6714 }
272c6793 6715 case dr_explicit_realign_optimized:
b731b390 6716 new_temp = copy_ssa_name (dataref_ptr);
0d0e4a03
JJ
6717 new_stmt = gimple_build_assign
6718 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
6719 build_int_cst
6720 (TREE_TYPE (dataref_ptr),
6721 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
6722 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6723 data_ref
6724 = build2 (MEM_REF, vectype, new_temp,
6725 build_int_cst (reference_alias_ptr_type
6726 (DR_REF (first_dr)), 0));
6727 break;
6728 default:
6729 gcc_unreachable ();
6730 }
ebfd146a 6731 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 6732 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
6733 new_temp = make_ssa_name (vec_dest, new_stmt);
6734 gimple_assign_set_lhs (new_stmt, new_temp);
6735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6736
272c6793
RS
6737 /* 3. Handle explicit realignment if necessary/supported.
6738 Create in loop:
6739 vec_dest = realign_load (msq, lsq, realignment_token) */
6740 if (alignment_support_scheme == dr_explicit_realign_optimized
6741 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 6742 {
272c6793
RS
6743 lsq = gimple_assign_lhs (new_stmt);
6744 if (!realignment_token)
6745 realignment_token = dataref_ptr;
6746 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
6747 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6748 msq, lsq, realignment_token);
272c6793
RS
6749 new_temp = make_ssa_name (vec_dest, new_stmt);
6750 gimple_assign_set_lhs (new_stmt, new_temp);
6751 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6752
6753 if (alignment_support_scheme == dr_explicit_realign_optimized)
6754 {
6755 gcc_assert (phi);
6756 if (i == vec_num - 1 && j == ncopies - 1)
6757 add_phi_arg (phi, lsq,
6758 loop_latch_edge (containing_loop),
9e227d60 6759 UNKNOWN_LOCATION);
272c6793
RS
6760 msq = lsq;
6761 }
ebfd146a 6762 }
ebfd146a 6763
59fd17e3
RB
6764 /* 4. Handle invariant-load. */
6765 if (inv_p && !bb_vinfo)
6766 {
59fd17e3 6767 gcc_assert (!grouped_load);
d1417442
JJ
6768 /* If we have versioned for aliasing or the loop doesn't
6769 have any data dependencies that would preclude this,
6770 then we are sure this is a loop invariant load and
6771 thus we can insert it on the preheader edge. */
6772 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6773 && !nested_in_vect_loop
6b916b36 6774 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
6775 {
6776 if (dump_enabled_p ())
6777 {
6778 dump_printf_loc (MSG_NOTE, vect_location,
6779 "hoisting out of the vectorized "
6780 "loop: ");
6781 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 6782 }
b731b390 6783 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
6784 gsi_insert_on_edge_immediate
6785 (loop_preheader_edge (loop),
6786 gimple_build_assign (tem,
6787 unshare_expr
6788 (gimple_assign_rhs1 (stmt))));
6789 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6790 }
6791 else
6792 {
6793 gimple_stmt_iterator gsi2 = *gsi;
6794 gsi_next (&gsi2);
6795 new_temp = vect_init_vector (stmt, scalar_dest,
6796 vectype, &gsi2);
6797 }
59fd17e3 6798 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0
RB
6799 set_vinfo_for_stmt (new_stmt,
6800 new_stmt_vec_info (new_stmt, loop_vinfo,
6801 bb_vinfo));
59fd17e3
RB
6802 }
6803
272c6793
RS
6804 if (negative)
6805 {
aec7ae7d
JJ
6806 tree perm_mask = perm_mask_for_reverse (vectype);
6807 new_temp = permute_vec_elements (new_temp, new_temp,
6808 perm_mask, stmt, gsi);
ebfd146a
IR
6809 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6810 }
267d3070 6811
272c6793 6812 /* Collect vector loads and later create their permutation in
0d0293ac
MM
6813 vect_transform_grouped_load (). */
6814 if (grouped_load || slp_perm)
9771b263 6815 dr_chain.quick_push (new_temp);
267d3070 6816
272c6793
RS
6817 /* Store vector loads in the corresponding SLP_NODE. */
6818 if (slp && !slp_perm)
9771b263 6819 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 6820 }
9b999e8c
RB
6821 /* Bump the vector pointer to account for a gap or for excess
6822 elements loaded for a permuted SLP load. */
6823 if (group_gap_adj != 0)
a64b9c26 6824 {
9b999e8c
RB
6825 bool ovf;
6826 tree bump
6827 = wide_int_to_tree (sizetype,
6828 wi::smul (TYPE_SIZE_UNIT (elem_type),
6829 group_gap_adj, &ovf));
a64b9c26
RB
6830 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6831 stmt, bump);
6832 }
ebfd146a
IR
6833 }
6834
6835 if (slp && !slp_perm)
6836 continue;
6837
6838 if (slp_perm)
6839 {
01d8bf07 6840 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
6841 slp_node_instance, false))
6842 {
9771b263 6843 dr_chain.release ();
ebfd146a
IR
6844 return false;
6845 }
6846 }
6847 else
6848 {
0d0293ac 6849 if (grouped_load)
ebfd146a 6850 {
272c6793 6851 if (!load_lanes_p)
0d0293ac 6852 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 6853 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
6854 }
6855 else
6856 {
6857 if (j == 0)
6858 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6859 else
6860 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6861 prev_stmt_info = vinfo_for_stmt (new_stmt);
6862 }
6863 }
9771b263 6864 dr_chain.release ();
ebfd146a
IR
6865 }
6866
ebfd146a
IR
6867 return true;
6868}
6869
6870/* Function vect_is_simple_cond.
b8698a0f 6871
ebfd146a
IR
6872 Input:
6873 LOOP - the loop that is being vectorized.
6874 COND - Condition that is checked for simple use.
6875
e9e1d143
RG
6876 Output:
6877 *COMP_VECTYPE - the vector type for the comparison.
6878
ebfd146a
IR
6879 Returns whether a COND can be vectorized. Checks whether
6880 condition operands are supportable using vec_is_simple_use. */
6881
87aab9b2 6882static bool
24ee1384
IR
6883vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6884 bb_vec_info bb_vinfo, tree *comp_vectype)
ebfd146a
IR
6885{
6886 tree lhs, rhs;
6887 tree def;
6888 enum vect_def_type dt;
e9e1d143 6889 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a
IR
6890
6891 if (!COMPARISON_CLASS_P (cond))
6892 return false;
6893
6894 lhs = TREE_OPERAND (cond, 0);
6895 rhs = TREE_OPERAND (cond, 1);
6896
6897 if (TREE_CODE (lhs) == SSA_NAME)
6898 {
6899 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
24ee1384
IR
6900 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6901 &lhs_def_stmt, &def, &dt, &vectype1))
ebfd146a
IR
6902 return false;
6903 }
6904 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6905 && TREE_CODE (lhs) != FIXED_CST)
6906 return false;
6907
6908 if (TREE_CODE (rhs) == SSA_NAME)
6909 {
6910 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
24ee1384
IR
6911 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6912 &rhs_def_stmt, &def, &dt, &vectype2))
ebfd146a
IR
6913 return false;
6914 }
f7e531cf 6915 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
6916 && TREE_CODE (rhs) != FIXED_CST)
6917 return false;
6918
e9e1d143 6919 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
6920 return true;
6921}
6922
6923/* vectorizable_condition.
6924
b8698a0f
L
6925 Check if STMT is conditional modify expression that can be vectorized.
6926 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6927 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
6928 at GSI.
6929
6930 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6931 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6932 else caluse if it is 2).
ebfd146a
IR
6933
6934 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6935
4bbe8262 6936bool
ebfd146a 6937vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f7e531cf
IR
6938 gimple *vec_stmt, tree reduc_def, int reduc_index,
6939 slp_tree slp_node)
ebfd146a
IR
6940{
6941 tree scalar_dest = NULL_TREE;
6942 tree vec_dest = NULL_TREE;
ebfd146a
IR
6943 tree cond_expr, then_clause, else_clause;
6944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6945 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
df11cc78 6946 tree comp_vectype = NULL_TREE;
ff802fa1
IR
6947 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6948 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
6949 tree vec_compare, vec_cond_expr;
6950 tree new_temp;
6951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 6952 tree def;
a855b1b1 6953 enum vect_def_type dt, dts[4];
ebfd146a 6954 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f7e531cf 6955 int ncopies;
ebfd146a 6956 enum tree_code code;
a855b1b1 6957 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
6958 int i, j;
6959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
6960 vec<tree> vec_oprnds0 = vNULL;
6961 vec<tree> vec_oprnds1 = vNULL;
6962 vec<tree> vec_oprnds2 = vNULL;
6963 vec<tree> vec_oprnds3 = vNULL;
74946978 6964 tree vec_cmp_type;
b8698a0f 6965
f7e531cf
IR
6966 if (slp_node || PURE_SLP_STMT (stmt_info))
6967 ncopies = 1;
6968 else
6969 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
437f4a00 6970
ebfd146a 6971 gcc_assert (ncopies >= 1);
a855b1b1 6972 if (reduc_index && ncopies > 1)
ebfd146a
IR
6973 return false; /* FORNOW */
6974
f7e531cf
IR
6975 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6976 return false;
6977
6978 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6979 return false;
6980
4bbe8262
IR
6981 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6982 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6983 && reduc_def))
ebfd146a
IR
6984 return false;
6985
ebfd146a 6986 /* FORNOW: not yet supported. */
b8698a0f 6987 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 6988 {
73fbfcad 6989 if (dump_enabled_p ())
78c60e3d 6990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6991 "value used after loop.\n");
ebfd146a
IR
6992 return false;
6993 }
6994
6995 /* Is vectorizable conditional operation? */
6996 if (!is_gimple_assign (stmt))
6997 return false;
6998
6999 code = gimple_assign_rhs_code (stmt);
7000
7001 if (code != COND_EXPR)
7002 return false;
7003
4e71066d
RG
7004 cond_expr = gimple_assign_rhs1 (stmt);
7005 then_clause = gimple_assign_rhs2 (stmt);
7006 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7007
24ee1384
IR
7008 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7009 &comp_vectype)
e9e1d143 7010 || !comp_vectype)
ebfd146a
IR
7011 return false;
7012
7013 if (TREE_CODE (then_clause) == SSA_NAME)
7014 {
7015 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
24ee1384 7016 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
7017 &then_def_stmt, &def, &dt))
7018 return false;
7019 }
b8698a0f 7020 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
7021 && TREE_CODE (then_clause) != REAL_CST
7022 && TREE_CODE (then_clause) != FIXED_CST)
7023 return false;
7024
7025 if (TREE_CODE (else_clause) == SSA_NAME)
7026 {
7027 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
24ee1384 7028 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
ebfd146a
IR
7029 &else_def_stmt, &def, &dt))
7030 return false;
7031 }
b8698a0f 7032 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
7033 && TREE_CODE (else_clause) != REAL_CST
7034 && TREE_CODE (else_clause) != FIXED_CST)
7035 return false;
7036
74946978
MP
7037 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7038 /* The result of a vector comparison should be signed type. */
7039 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7040 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7041 if (vec_cmp_type == NULL_TREE)
7042 return false;
784fb9b3 7043
b8698a0f 7044 if (!vec_stmt)
ebfd146a
IR
7045 {
7046 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 7047 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
7048 }
7049
f7e531cf
IR
7050 /* Transform. */
7051
7052 if (!slp_node)
7053 {
9771b263
DN
7054 vec_oprnds0.create (1);
7055 vec_oprnds1.create (1);
7056 vec_oprnds2.create (1);
7057 vec_oprnds3.create (1);
f7e531cf 7058 }
ebfd146a
IR
7059
7060 /* Handle def. */
7061 scalar_dest = gimple_assign_lhs (stmt);
7062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7063
7064 /* Handle cond expr. */
a855b1b1
MM
7065 for (j = 0; j < ncopies; j++)
7066 {
538dd0b7 7067 gassign *new_stmt = NULL;
a855b1b1
MM
7068 if (j == 0)
7069 {
f7e531cf
IR
7070 if (slp_node)
7071 {
00f96dc9
TS
7072 auto_vec<tree, 4> ops;
7073 auto_vec<vec<tree>, 4> vec_defs;
9771b263 7074
9771b263
DN
7075 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7076 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7077 ops.safe_push (then_clause);
7078 ops.safe_push (else_clause);
f7e531cf 7079 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
7080 vec_oprnds3 = vec_defs.pop ();
7081 vec_oprnds2 = vec_defs.pop ();
7082 vec_oprnds1 = vec_defs.pop ();
7083 vec_oprnds0 = vec_defs.pop ();
f7e531cf 7084
9771b263
DN
7085 ops.release ();
7086 vec_defs.release ();
f7e531cf
IR
7087 }
7088 else
7089 {
7090 gimple gtemp;
7091 vec_cond_lhs =
a855b1b1
MM
7092 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7093 stmt, NULL);
24ee1384
IR
7094 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7095 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f7e531cf
IR
7096
7097 vec_cond_rhs =
7098 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7099 stmt, NULL);
24ee1384
IR
7100 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7101 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f7e531cf
IR
7102 if (reduc_index == 1)
7103 vec_then_clause = reduc_def;
7104 else
7105 {
7106 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7107 stmt, NULL);
24ee1384 7108 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f7e531cf
IR
7109 NULL, &gtemp, &def, &dts[2]);
7110 }
7111 if (reduc_index == 2)
7112 vec_else_clause = reduc_def;
7113 else
7114 {
7115 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
a855b1b1 7116 stmt, NULL);
24ee1384 7117 vect_is_simple_use (else_clause, stmt, loop_vinfo,
a855b1b1 7118 NULL, &gtemp, &def, &dts[3]);
f7e531cf 7119 }
a855b1b1
MM
7120 }
7121 }
7122 else
7123 {
f7e531cf 7124 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
9771b263 7125 vec_oprnds0.pop ());
f7e531cf 7126 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
9771b263 7127 vec_oprnds1.pop ());
a855b1b1 7128 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 7129 vec_oprnds2.pop ());
a855b1b1 7130 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 7131 vec_oprnds3.pop ());
f7e531cf
IR
7132 }
7133
7134 if (!slp_node)
7135 {
9771b263
DN
7136 vec_oprnds0.quick_push (vec_cond_lhs);
7137 vec_oprnds1.quick_push (vec_cond_rhs);
7138 vec_oprnds2.quick_push (vec_then_clause);
7139 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
7140 }
7141
9dc3f7de 7142 /* Arguments are ready. Create the new vector stmt. */
9771b263 7143 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 7144 {
9771b263
DN
7145 vec_cond_rhs = vec_oprnds1[i];
7146 vec_then_clause = vec_oprnds2[i];
7147 vec_else_clause = vec_oprnds3[i];
a855b1b1 7148
784fb9b3
JJ
7149 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7150 vec_cond_lhs, vec_cond_rhs);
f7e531cf
IR
7151 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7152 vec_compare, vec_then_clause, vec_else_clause);
a855b1b1 7153
f7e531cf
IR
7154 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7155 new_temp = make_ssa_name (vec_dest, new_stmt);
7156 gimple_assign_set_lhs (new_stmt, new_temp);
7157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7158 if (slp_node)
9771b263 7159 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
7160 }
7161
7162 if (slp_node)
7163 continue;
7164
7165 if (j == 0)
7166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7167 else
7168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7169
7170 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 7171 }
b8698a0f 7172
9771b263
DN
7173 vec_oprnds0.release ();
7174 vec_oprnds1.release ();
7175 vec_oprnds2.release ();
7176 vec_oprnds3.release ();
f7e531cf 7177
ebfd146a
IR
7178 return true;
7179}
7180
7181
8644a673 7182/* Make sure the statement is vectorizable. */
ebfd146a
IR
7183
7184bool
a70d6342 7185vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7186{
8644a673 7187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7188 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7189 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7190 bool ok;
a70d6342 7191 tree scalar_type, vectype;
363477c0
JJ
7192 gimple pattern_stmt;
7193 gimple_seq pattern_def_seq;
ebfd146a 7194
73fbfcad 7195 if (dump_enabled_p ())
ebfd146a 7196 {
78c60e3d
SS
7197 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7198 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7199 }
ebfd146a 7200
1825a1f3 7201 if (gimple_has_volatile_ops (stmt))
b8698a0f 7202 {
73fbfcad 7203 if (dump_enabled_p ())
78c60e3d 7204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7205 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7206
7207 return false;
7208 }
b8698a0f
L
7209
7210 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7211 to include:
7212 - the COND_EXPR which is the loop exit condition
7213 - any LABEL_EXPRs in the loop
b8698a0f 7214 - computations that are used only for array indexing or loop control.
8644a673 7215 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7216 instance, therefore, all the statements are relevant.
ebfd146a 7217
d092494c 7218 Pattern statement needs to be analyzed instead of the original statement
83197f37 7219 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7220 statements. In basic blocks we are called from some SLP instance
7221 traversal, don't analyze pattern stmts instead, the pattern stmts
7222 already will be part of SLP instance. */
83197f37
IR
7223
7224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7225 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7226 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7227 {
9d5e7640 7228 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7229 && pattern_stmt
9d5e7640
IR
7230 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7231 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7232 {
83197f37 7233 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7234 stmt = pattern_stmt;
7235 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7236 if (dump_enabled_p ())
9d5e7640 7237 {
78c60e3d
SS
7238 dump_printf_loc (MSG_NOTE, vect_location,
7239 "==> examining pattern statement: ");
7240 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7241 }
7242 }
7243 else
7244 {
73fbfcad 7245 if (dump_enabled_p ())
e645e942 7246 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7247
9d5e7640
IR
7248 return true;
7249 }
8644a673 7250 }
83197f37 7251 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7252 && node == NULL
83197f37
IR
7253 && pattern_stmt
7254 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7255 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7256 {
7257 /* Analyze PATTERN_STMT too. */
73fbfcad 7258 if (dump_enabled_p ())
83197f37 7259 {
78c60e3d
SS
7260 dump_printf_loc (MSG_NOTE, vect_location,
7261 "==> examining pattern statement: ");
7262 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7263 }
7264
7265 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7266 return false;
7267 }
ebfd146a 7268
1107f3ae 7269 if (is_pattern_stmt_p (stmt_info)
079c527f 7270 && node == NULL
363477c0 7271 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 7272 {
363477c0 7273 gimple_stmt_iterator si;
1107f3ae 7274
363477c0
JJ
7275 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7276 {
7277 gimple pattern_def_stmt = gsi_stmt (si);
7278 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7279 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7280 {
7281 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 7282 if (dump_enabled_p ())
363477c0 7283 {
78c60e3d
SS
7284 dump_printf_loc (MSG_NOTE, vect_location,
7285 "==> examining pattern def statement: ");
7286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 7287 }
1107f3ae 7288
363477c0
JJ
7289 if (!vect_analyze_stmt (pattern_def_stmt,
7290 need_to_vectorize, node))
7291 return false;
7292 }
7293 }
7294 }
1107f3ae 7295
8644a673
IR
7296 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7297 {
7298 case vect_internal_def:
7299 break;
ebfd146a 7300
8644a673 7301 case vect_reduction_def:
7c5222ff 7302 case vect_nested_cycle:
14a61437
RB
7303 gcc_assert (!bb_vinfo
7304 && (relevance == vect_used_in_outer
7305 || relevance == vect_used_in_outer_by_reduction
7306 || relevance == vect_used_by_reduction
7307 || relevance == vect_unused_in_scope));
8644a673
IR
7308 break;
7309
7310 case vect_induction_def:
7311 case vect_constant_def:
7312 case vect_external_def:
7313 case vect_unknown_def_type:
7314 default:
7315 gcc_unreachable ();
7316 }
ebfd146a 7317
a70d6342
IR
7318 if (bb_vinfo)
7319 {
7320 gcc_assert (PURE_SLP_STMT (stmt_info));
7321
b690cc0f 7322 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 7323 if (dump_enabled_p ())
a70d6342 7324 {
78c60e3d
SS
7325 dump_printf_loc (MSG_NOTE, vect_location,
7326 "get vectype for scalar type: ");
7327 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 7328 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7329 }
7330
7331 vectype = get_vectype_for_scalar_type (scalar_type);
7332 if (!vectype)
7333 {
73fbfcad 7334 if (dump_enabled_p ())
a70d6342 7335 {
78c60e3d
SS
7336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7337 "not SLPed: unsupported data-type ");
7338 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7339 scalar_type);
e645e942 7340 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
7341 }
7342 return false;
7343 }
7344
73fbfcad 7345 if (dump_enabled_p ())
a70d6342 7346 {
78c60e3d
SS
7347 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7348 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 7349 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
7350 }
7351
7352 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7353 }
7354
8644a673 7355 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 7356 {
8644a673 7357 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
7358 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7359 || (is_gimple_call (stmt)
7360 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 7361 *need_to_vectorize = true;
ebfd146a
IR
7362 }
7363
b1af7da6
RB
7364 if (PURE_SLP_STMT (stmt_info) && !node)
7365 {
7366 dump_printf_loc (MSG_NOTE, vect_location,
7367 "handled only by SLP analysis\n");
7368 return true;
7369 }
7370
7371 ok = true;
7372 if (!bb_vinfo
7373 && (STMT_VINFO_RELEVANT_P (stmt_info)
7374 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7375 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7376 || vectorizable_conversion (stmt, NULL, NULL, node)
7377 || vectorizable_shift (stmt, NULL, NULL, node)
7378 || vectorizable_operation (stmt, NULL, NULL, node)
7379 || vectorizable_assignment (stmt, NULL, NULL, node)
7380 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7381 || vectorizable_call (stmt, NULL, NULL, node)
7382 || vectorizable_store (stmt, NULL, NULL, node)
7383 || vectorizable_reduction (stmt, NULL, NULL, node)
7384 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7385 else
7386 {
7387 if (bb_vinfo)
7388 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7389 || vectorizable_conversion (stmt, NULL, NULL, node)
7390 || vectorizable_shift (stmt, NULL, NULL, node)
7391 || vectorizable_operation (stmt, NULL, NULL, node)
7392 || vectorizable_assignment (stmt, NULL, NULL, node)
7393 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7394 || vectorizable_call (stmt, NULL, NULL, node)
7395 || vectorizable_store (stmt, NULL, NULL, node)
7396 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7397 }
8644a673
IR
7398
7399 if (!ok)
ebfd146a 7400 {
73fbfcad 7401 if (dump_enabled_p ())
8644a673 7402 {
78c60e3d
SS
7403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7404 "not vectorized: relevant stmt not ");
7405 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7406 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7407 }
b8698a0f 7408
ebfd146a
IR
7409 return false;
7410 }
7411
a70d6342
IR
7412 if (bb_vinfo)
7413 return true;
7414
8644a673
IR
7415 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7416 need extra handling, except for vectorizable reductions. */
7417 if (STMT_VINFO_LIVE_P (stmt_info)
7418 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7419 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 7420
8644a673 7421 if (!ok)
ebfd146a 7422 {
73fbfcad 7423 if (dump_enabled_p ())
8644a673 7424 {
78c60e3d
SS
7425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7426 "not vectorized: live stmt not ");
7427 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7428 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 7429 }
b8698a0f 7430
8644a673 7431 return false;
ebfd146a
IR
7432 }
7433
ebfd146a
IR
7434 return true;
7435}
7436
7437
7438/* Function vect_transform_stmt.
7439
7440 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7441
7442bool
7443vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
0d0293ac 7444 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
7445 slp_instance slp_node_instance)
7446{
7447 bool is_store = false;
7448 gimple vec_stmt = NULL;
7449 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 7450 bool done;
ebfd146a
IR
7451
7452 switch (STMT_VINFO_TYPE (stmt_info))
7453 {
7454 case type_demotion_vec_info_type:
ebfd146a 7455 case type_promotion_vec_info_type:
ebfd146a
IR
7456 case type_conversion_vec_info_type:
7457 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7458 gcc_assert (done);
7459 break;
7460
7461 case induc_vec_info_type:
7462 gcc_assert (!slp_node);
7463 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7464 gcc_assert (done);
7465 break;
7466
9dc3f7de
IR
7467 case shift_vec_info_type:
7468 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7469 gcc_assert (done);
7470 break;
7471
ebfd146a
IR
7472 case op_vec_info_type:
7473 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7474 gcc_assert (done);
7475 break;
7476
7477 case assignment_vec_info_type:
7478 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7479 gcc_assert (done);
7480 break;
7481
7482 case load_vec_info_type:
b8698a0f 7483 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
7484 slp_node_instance);
7485 gcc_assert (done);
7486 break;
7487
7488 case store_vec_info_type:
7489 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7490 gcc_assert (done);
0d0293ac 7491 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
7492 {
7493 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 7494 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
7495 one are skipped, and there vec_stmt_info shouldn't be freed
7496 meanwhile. */
0d0293ac 7497 *grouped_store = true;
ebfd146a
IR
7498 if (STMT_VINFO_VEC_STMT (stmt_info))
7499 is_store = true;
7500 }
7501 else
7502 is_store = true;
7503 break;
7504
7505 case condition_vec_info_type:
f7e531cf 7506 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
7507 gcc_assert (done);
7508 break;
7509
7510 case call_vec_info_type:
190c2236 7511 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 7512 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
7513 if (is_gimple_call (stmt)
7514 && gimple_call_internal_p (stmt)
7515 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7516 is_store = true;
ebfd146a
IR
7517 break;
7518
0136f8f0
AH
7519 case call_simd_clone_vec_info_type:
7520 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7521 stmt = gsi_stmt (*gsi);
7522 break;
7523
ebfd146a 7524 case reduc_vec_info_type:
b5aeb3bb 7525 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
7526 gcc_assert (done);
7527 break;
7528
7529 default:
7530 if (!STMT_VINFO_LIVE_P (stmt_info))
7531 {
73fbfcad 7532 if (dump_enabled_p ())
78c60e3d 7533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7534 "stmt not supported.\n");
ebfd146a
IR
7535 gcc_unreachable ();
7536 }
7537 }
7538
7539 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7540 is being vectorized, but outside the immediately enclosing loop. */
7541 if (vec_stmt
a70d6342
IR
7542 && STMT_VINFO_LOOP_VINFO (stmt_info)
7543 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7544 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
7545 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7546 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 7547 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 7548 vect_used_in_outer_by_reduction))
ebfd146a 7549 {
a70d6342
IR
7550 struct loop *innerloop = LOOP_VINFO_LOOP (
7551 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
7552 imm_use_iterator imm_iter;
7553 use_operand_p use_p;
7554 tree scalar_dest;
7555 gimple exit_phi;
7556
73fbfcad 7557 if (dump_enabled_p ())
78c60e3d 7558 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7559 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
7560
7561 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7562 (to be used when vectorizing outer-loop stmts that use the DEF of
7563 STMT). */
7564 if (gimple_code (stmt) == GIMPLE_PHI)
7565 scalar_dest = PHI_RESULT (stmt);
7566 else
7567 scalar_dest = gimple_assign_lhs (stmt);
7568
7569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7570 {
7571 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7572 {
7573 exit_phi = USE_STMT (use_p);
7574 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7575 }
7576 }
7577 }
7578
7579 /* Handle stmts whose DEF is used outside the loop-nest that is
7580 being vectorized. */
7581 if (STMT_VINFO_LIVE_P (stmt_info)
7582 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7583 {
7584 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7585 gcc_assert (done);
7586 }
7587
7588 if (vec_stmt)
83197f37 7589 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 7590
b8698a0f 7591 return is_store;
ebfd146a
IR
7592}
7593
7594
b8698a0f 7595/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
7596 stmt_vec_info. */
7597
7598void
7599vect_remove_stores (gimple first_stmt)
7600{
7601 gimple next = first_stmt;
7602 gimple tmp;
7603 gimple_stmt_iterator next_si;
7604
7605 while (next)
7606 {
78048b1c
JJ
7607 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7608
7609 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7610 if (is_pattern_stmt_p (stmt_info))
7611 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
7612 /* Free the attached stmt_vec_info and remove the stmt. */
7613 next_si = gsi_for_stmt (next);
3d3f2249 7614 unlink_stmt_vdef (next);
ebfd146a 7615 gsi_remove (&next_si, true);
3d3f2249 7616 release_defs (next);
ebfd146a
IR
7617 free_stmt_vec_info (next);
7618 next = tmp;
7619 }
7620}
7621
7622
7623/* Function new_stmt_vec_info.
7624
7625 Create and initialize a new stmt_vec_info struct for STMT. */
7626
7627stmt_vec_info
b8698a0f 7628new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7629 bb_vec_info bb_vinfo)
ebfd146a
IR
7630{
7631 stmt_vec_info res;
7632 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7633
7634 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7635 STMT_VINFO_STMT (res) = stmt;
7636 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 7637 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 7638 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
7639 STMT_VINFO_LIVE_P (res) = false;
7640 STMT_VINFO_VECTYPE (res) = NULL;
7641 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 7642 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
7643 STMT_VINFO_IN_PATTERN_P (res) = false;
7644 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 7645 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a
IR
7646 STMT_VINFO_DATA_REF (res) = NULL;
7647
7648 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7649 STMT_VINFO_DR_OFFSET (res) = NULL;
7650 STMT_VINFO_DR_INIT (res) = NULL;
7651 STMT_VINFO_DR_STEP (res) = NULL;
7652 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7653
7654 if (gimple_code (stmt) == GIMPLE_PHI
7655 && is_loop_header_bb_p (gimple_bb (stmt)))
7656 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7657 else
8644a673
IR
7658 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7659
9771b263 7660 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 7661 STMT_SLP_TYPE (res) = loop_vect;
e14c1050
IR
7662 GROUP_FIRST_ELEMENT (res) = NULL;
7663 GROUP_NEXT_ELEMENT (res) = NULL;
7664 GROUP_SIZE (res) = 0;
7665 GROUP_STORE_COUNT (res) = 0;
7666 GROUP_GAP (res) = 0;
7667 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
7668
7669 return res;
7670}
7671
7672
7673/* Create a hash table for stmt_vec_info. */
7674
7675void
7676init_stmt_vec_info_vec (void)
7677{
9771b263
DN
7678 gcc_assert (!stmt_vec_info_vec.exists ());
7679 stmt_vec_info_vec.create (50);
ebfd146a
IR
7680}
7681
7682
7683/* Free hash table for stmt_vec_info. */
7684
7685void
7686free_stmt_vec_info_vec (void)
7687{
93675444
JJ
7688 unsigned int i;
7689 vec_void_p info;
7690 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7691 if (info != NULL)
7692 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
9771b263
DN
7693 gcc_assert (stmt_vec_info_vec.exists ());
7694 stmt_vec_info_vec.release ();
ebfd146a
IR
7695}
7696
7697
7698/* Free stmt vectorization related info. */
7699
7700void
7701free_stmt_vec_info (gimple stmt)
7702{
7703 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7704
7705 if (!stmt_info)
7706 return;
7707
78048b1c
JJ
7708 /* Check if this statement has a related "pattern stmt"
7709 (introduced by the vectorizer during the pattern recognition
7710 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7711 too. */
7712 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7713 {
7714 stmt_vec_info patt_info
7715 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7716 if (patt_info)
7717 {
363477c0 7718 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
f0281fde
RB
7719 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7720 gimple_set_bb (patt_stmt, NULL);
7721 tree lhs = gimple_get_lhs (patt_stmt);
7722 if (TREE_CODE (lhs) == SSA_NAME)
7723 release_ssa_name (lhs);
363477c0
JJ
7724 if (seq)
7725 {
7726 gimple_stmt_iterator si;
7727 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde
RB
7728 {
7729 gimple seq_stmt = gsi_stmt (si);
7730 gimple_set_bb (seq_stmt, NULL);
7731 lhs = gimple_get_lhs (patt_stmt);
7732 if (TREE_CODE (lhs) == SSA_NAME)
7733 release_ssa_name (lhs);
7734 free_stmt_vec_info (seq_stmt);
7735 }
363477c0 7736 }
f0281fde 7737 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
7738 }
7739 }
7740
9771b263 7741 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 7742 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
7743 set_vinfo_for_stmt (stmt, NULL);
7744 free (stmt_info);
7745}
7746
7747
bb67d9c7 7748/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 7749
bb67d9c7 7750 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
7751 by the target. */
7752
bb67d9c7
RG
7753static tree
7754get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 7755{
ef4bddc2
RS
7756 machine_mode inner_mode = TYPE_MODE (scalar_type);
7757 machine_mode simd_mode;
2f816591 7758 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
7759 int nunits;
7760 tree vectype;
7761
cc4b5170 7762 if (nbytes == 0)
ebfd146a
IR
7763 return NULL_TREE;
7764
48f2e373
RB
7765 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7766 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7767 return NULL_TREE;
7768
7b7b1813
RG
7769 /* For vector types of elements whose mode precision doesn't
7770 match their types precision we use a element type of mode
7771 precision. The vectorization routines will have to make sure
48f2e373
RB
7772 they support the proper result truncation/extension.
7773 We also make sure to build vector types with INTEGER_TYPE
7774 component type only. */
6d7971b8 7775 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
7776 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7777 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
7778 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7779 TYPE_UNSIGNED (scalar_type));
6d7971b8 7780
ccbf5bb4
RG
7781 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7782 When the component mode passes the above test simply use a type
7783 corresponding to that mode. The theory is that any use that
7784 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 7785 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 7786 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
7787 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7788
7789 /* We can't build a vector type of elements with alignment bigger than
7790 their size. */
dfc2e2ac 7791 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
7792 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7793 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 7794
dfc2e2ac
RB
7795 /* If we felt back to using the mode fail if there was
7796 no scalar type for it. */
7797 if (scalar_type == NULL_TREE)
7798 return NULL_TREE;
7799
bb67d9c7
RG
7800 /* If no size was supplied use the mode the target prefers. Otherwise
7801 lookup a vector mode of the specified size. */
7802 if (size == 0)
7803 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7804 else
7805 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
7806 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7807 if (nunits <= 1)
7808 return NULL_TREE;
ebfd146a
IR
7809
7810 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
7811
7812 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7813 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 7814 return NULL_TREE;
ebfd146a
IR
7815
7816 return vectype;
7817}
7818
bb67d9c7
RG
7819unsigned int current_vector_size;
7820
7821/* Function get_vectype_for_scalar_type.
7822
7823 Returns the vector type corresponding to SCALAR_TYPE as supported
7824 by the target. */
7825
7826tree
7827get_vectype_for_scalar_type (tree scalar_type)
7828{
7829 tree vectype;
7830 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7831 current_vector_size);
7832 if (vectype
7833 && current_vector_size == 0)
7834 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7835 return vectype;
7836}
7837
b690cc0f
RG
7838/* Function get_same_sized_vectype
7839
7840 Returns a vector type corresponding to SCALAR_TYPE of size
7841 VECTOR_TYPE if supported by the target. */
7842
7843tree
bb67d9c7 7844get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 7845{
bb67d9c7
RG
7846 return get_vectype_for_scalar_type_and_size
7847 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
7848}
7849
ebfd146a
IR
7850/* Function vect_is_simple_use.
7851
7852 Input:
a70d6342
IR
7853 LOOP_VINFO - the vect info of the loop that is being vectorized.
7854 BB_VINFO - the vect info of the basic block that is being vectorized.
24ee1384 7855 OPERAND - operand of STMT in the loop or bb.
ebfd146a
IR
7856 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7857
7858 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 7859 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 7860 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 7861 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
7862 is the case in reduction/induction computations).
7863 For basic blocks, supportable operands are constants and bb invariants.
7864 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
7865
7866bool
24ee1384 7867vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
a70d6342 7868 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 7869 tree *def, enum vect_def_type *dt)
b8698a0f 7870{
ebfd146a
IR
7871 *def_stmt = NULL;
7872 *def = NULL_TREE;
3fc356dc 7873 *dt = vect_unknown_def_type;
b8698a0f 7874
73fbfcad 7875 if (dump_enabled_p ())
ebfd146a 7876 {
78c60e3d
SS
7877 dump_printf_loc (MSG_NOTE, vect_location,
7878 "vect_is_simple_use: operand ");
7879 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 7880 dump_printf (MSG_NOTE, "\n");
ebfd146a 7881 }
b8698a0f 7882
b758f602 7883 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
7884 {
7885 *dt = vect_constant_def;
7886 return true;
7887 }
b8698a0f 7888
ebfd146a
IR
7889 if (is_gimple_min_invariant (operand))
7890 {
7891 *def = operand;
8644a673 7892 *dt = vect_external_def;
ebfd146a
IR
7893 return true;
7894 }
7895
ebfd146a
IR
7896 if (TREE_CODE (operand) != SSA_NAME)
7897 {
73fbfcad 7898 if (dump_enabled_p ())
78c60e3d 7899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7900 "not ssa-name.\n");
ebfd146a
IR
7901 return false;
7902 }
b8698a0f 7903
3fc356dc 7904 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 7905 {
3fc356dc
RB
7906 *def = operand;
7907 *dt = vect_external_def;
7908 return true;
ebfd146a
IR
7909 }
7910
3fc356dc 7911 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 7912 if (dump_enabled_p ())
ebfd146a 7913 {
78c60e3d
SS
7914 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7915 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
7916 }
7917
3fc356dc
RB
7918 basic_block bb = gimple_bb (*def_stmt);
7919 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
7920 || (bb_vinfo
7921 && (bb != BB_VINFO_BB (bb_vinfo)
7922 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8644a673 7923 *dt = vect_external_def;
ebfd146a
IR
7924 else
7925 {
3fc356dc
RB
7926 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
7927 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
90dd6e3d
RB
7928 *dt = vect_external_def;
7929 else
7930 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
7931 }
7932
2e8ab70c
RB
7933 if (dump_enabled_p ())
7934 {
7935 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7936 switch (*dt)
7937 {
7938 case vect_uninitialized_def:
7939 dump_printf (MSG_NOTE, "uninitialized\n");
7940 break;
7941 case vect_constant_def:
7942 dump_printf (MSG_NOTE, "constant\n");
7943 break;
7944 case vect_external_def:
7945 dump_printf (MSG_NOTE, "external\n");
7946 break;
7947 case vect_internal_def:
7948 dump_printf (MSG_NOTE, "internal\n");
7949 break;
7950 case vect_induction_def:
7951 dump_printf (MSG_NOTE, "induction\n");
7952 break;
7953 case vect_reduction_def:
7954 dump_printf (MSG_NOTE, "reduction\n");
7955 break;
7956 case vect_double_reduction_def:
7957 dump_printf (MSG_NOTE, "double reduction\n");
7958 break;
7959 case vect_nested_cycle:
7960 dump_printf (MSG_NOTE, "nested cycle\n");
7961 break;
7962 case vect_unknown_def_type:
7963 dump_printf (MSG_NOTE, "unknown\n");
7964 break;
7965 }
7966 }
7967
24ee1384
IR
7968 if (*dt == vect_unknown_def_type
7969 || (stmt
7970 && *dt == vect_double_reduction_def
7971 && gimple_code (stmt) != GIMPLE_PHI))
ebfd146a 7972 {
73fbfcad 7973 if (dump_enabled_p ())
78c60e3d 7974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7975 "Unsupported pattern.\n");
ebfd146a
IR
7976 return false;
7977 }
7978
ebfd146a
IR
7979 switch (gimple_code (*def_stmt))
7980 {
7981 case GIMPLE_PHI:
7982 *def = gimple_phi_result (*def_stmt);
7983 break;
7984
7985 case GIMPLE_ASSIGN:
7986 *def = gimple_assign_lhs (*def_stmt);
7987 break;
7988
7989 case GIMPLE_CALL:
7990 *def = gimple_call_lhs (*def_stmt);
7991 if (*def != NULL)
7992 break;
7993 /* FALLTHRU */
7994 default:
73fbfcad 7995 if (dump_enabled_p ())
78c60e3d 7996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7997 "unsupported defining stmt:\n");
ebfd146a
IR
7998 return false;
7999 }
8000
8001 return true;
8002}
8003
b690cc0f
RG
8004/* Function vect_is_simple_use_1.
8005
8006 Same as vect_is_simple_use_1 but also determines the vector operand
8007 type of OPERAND and stores it to *VECTYPE. If the definition of
8008 OPERAND is vect_uninitialized_def, vect_constant_def or
8009 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8010 is responsible to compute the best suited vector type for the
8011 scalar operand. */
8012
8013bool
24ee1384 8014vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b690cc0f
RG
8015 bb_vec_info bb_vinfo, gimple *def_stmt,
8016 tree *def, enum vect_def_type *dt, tree *vectype)
8017{
24ee1384
IR
8018 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8019 def, dt))
b690cc0f
RG
8020 return false;
8021
8022 /* Now get a vector type if the def is internal, otherwise supply
8023 NULL_TREE and leave it up to the caller to figure out a proper
8024 type for the use stmt. */
8025 if (*dt == vect_internal_def
8026 || *dt == vect_induction_def
8027 || *dt == vect_reduction_def
8028 || *dt == vect_double_reduction_def
8029 || *dt == vect_nested_cycle)
8030 {
8031 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
8032
8033 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8034 && !STMT_VINFO_RELEVANT (stmt_info)
8035 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 8036 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 8037
b690cc0f
RG
8038 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8039 gcc_assert (*vectype != NULL_TREE);
8040 }
8041 else if (*dt == vect_uninitialized_def
8042 || *dt == vect_constant_def
8043 || *dt == vect_external_def)
8044 *vectype = NULL_TREE;
8045 else
8046 gcc_unreachable ();
8047
8048 return true;
8049}
8050
ebfd146a
IR
8051
8052/* Function supportable_widening_operation
8053
b8698a0f
L
8054 Check whether an operation represented by the code CODE is a
8055 widening operation that is supported by the target platform in
b690cc0f
RG
8056 vector form (i.e., when operating on arguments of type VECTYPE_IN
8057 producing a result of type VECTYPE_OUT).
b8698a0f 8058
ebfd146a
IR
8059 Widening operations we currently support are NOP (CONVERT), FLOAT
8060 and WIDEN_MULT. This function checks if these operations are supported
8061 by the target platform either directly (via vector tree-codes), or via
8062 target builtins.
8063
8064 Output:
b8698a0f
L
8065 - CODE1 and CODE2 are codes of vector operations to be used when
8066 vectorizing the operation, if available.
ebfd146a
IR
8067 - MULTI_STEP_CVT determines the number of required intermediate steps in
8068 case of multi-step conversion (like char->short->int - in that case
8069 MULTI_STEP_CVT will be 1).
b8698a0f
L
8070 - INTERM_TYPES contains the intermediate type required to perform the
8071 widening operation (short in the above example). */
ebfd146a
IR
8072
8073bool
b690cc0f
RG
8074supportable_widening_operation (enum tree_code code, gimple stmt,
8075 tree vectype_out, tree vectype_in,
ebfd146a
IR
8076 enum tree_code *code1, enum tree_code *code2,
8077 int *multi_step_cvt,
9771b263 8078 vec<tree> *interm_types)
ebfd146a
IR
8079{
8080 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8081 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 8082 struct loop *vect_loop = NULL;
ef4bddc2 8083 machine_mode vec_mode;
81f40b79 8084 enum insn_code icode1, icode2;
ebfd146a 8085 optab optab1, optab2;
b690cc0f
RG
8086 tree vectype = vectype_in;
8087 tree wide_vectype = vectype_out;
ebfd146a 8088 enum tree_code c1, c2;
4a00c761
JJ
8089 int i;
8090 tree prev_type, intermediate_type;
ef4bddc2 8091 machine_mode intermediate_mode, prev_mode;
4a00c761 8092 optab optab3, optab4;
ebfd146a 8093
4a00c761 8094 *multi_step_cvt = 0;
4ef69dfc
IR
8095 if (loop_info)
8096 vect_loop = LOOP_VINFO_LOOP (loop_info);
8097
ebfd146a
IR
8098 switch (code)
8099 {
8100 case WIDEN_MULT_EXPR:
6ae6116f
RH
8101 /* The result of a vectorized widening operation usually requires
8102 two vectors (because the widened results do not fit into one vector).
8103 The generated vector results would normally be expected to be
8104 generated in the same order as in the original scalar computation,
8105 i.e. if 8 results are generated in each vector iteration, they are
8106 to be organized as follows:
8107 vect1: [res1,res2,res3,res4],
8108 vect2: [res5,res6,res7,res8].
8109
8110 However, in the special case that the result of the widening
8111 operation is used in a reduction computation only, the order doesn't
8112 matter (because when vectorizing a reduction we change the order of
8113 the computation). Some targets can take advantage of this and
8114 generate more efficient code. For example, targets like Altivec,
8115 that support widen_mult using a sequence of {mult_even,mult_odd}
8116 generate the following vectors:
8117 vect1: [res1,res3,res5,res7],
8118 vect2: [res2,res4,res6,res8].
8119
8120 When vectorizing outer-loops, we execute the inner-loop sequentially
8121 (each vectorized inner-loop iteration contributes to VF outer-loop
8122 iterations in parallel). We therefore don't allow to change the
8123 order of the computation in the inner-loop during outer-loop
8124 vectorization. */
8125 /* TODO: Another case in which order doesn't *really* matter is when we
8126 widen and then contract again, e.g. (short)((int)x * y >> 8).
8127 Normally, pack_trunc performs an even/odd permute, whereas the
8128 repack from an even/odd expansion would be an interleave, which
8129 would be significantly simpler for e.g. AVX2. */
8130 /* In any case, in order to avoid duplicating the code below, recurse
8131 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8132 are properly set up for the caller. If we fail, we'll continue with
8133 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8134 if (vect_loop
8135 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8136 && !nested_in_vect_loop_p (vect_loop, stmt)
8137 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8138 stmt, vectype_out, vectype_in,
a86ec597
RH
8139 code1, code2, multi_step_cvt,
8140 interm_types))
ebc047a2
CH
8141 {
8142 /* Elements in a vector with vect_used_by_reduction property cannot
8143 be reordered if the use chain with this property does not have the
8144 same operation. One such an example is s += a * b, where elements
8145 in a and b cannot be reordered. Here we check if the vector defined
8146 by STMT is only directly used in the reduction statement. */
8147 tree lhs = gimple_assign_lhs (stmt);
8148 use_operand_p dummy;
8149 gimple use_stmt;
8150 stmt_vec_info use_stmt_info = NULL;
8151 if (single_imm_use (lhs, &dummy, &use_stmt)
8152 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8153 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8154 return true;
8155 }
4a00c761
JJ
8156 c1 = VEC_WIDEN_MULT_LO_EXPR;
8157 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
8158 break;
8159
6ae6116f
RH
8160 case VEC_WIDEN_MULT_EVEN_EXPR:
8161 /* Support the recursion induced just above. */
8162 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8163 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8164 break;
8165
36ba4aae 8166 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
8167 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8168 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
8169 break;
8170
ebfd146a 8171 CASE_CONVERT:
4a00c761
JJ
8172 c1 = VEC_UNPACK_LO_EXPR;
8173 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8174 break;
8175
8176 case FLOAT_EXPR:
4a00c761
JJ
8177 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8178 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8179 break;
8180
8181 case FIX_TRUNC_EXPR:
8182 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8183 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8184 computing the operation. */
8185 return false;
8186
8187 default:
8188 gcc_unreachable ();
8189 }
8190
6ae6116f 8191 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
4a00c761
JJ
8192 {
8193 enum tree_code ctmp = c1;
8194 c1 = c2;
8195 c2 = ctmp;
8196 }
8197
ebfd146a
IR
8198 if (code == FIX_TRUNC_EXPR)
8199 {
8200 /* The signedness is determined from output operand. */
b690cc0f
RG
8201 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8202 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8203 }
8204 else
8205 {
8206 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8207 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8208 }
8209
8210 if (!optab1 || !optab2)
8211 return false;
8212
8213 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8214 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8215 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8216 return false;
8217
4a00c761
JJ
8218 *code1 = c1;
8219 *code2 = c2;
8220
8221 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8222 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8223 return true;
8224
b8698a0f 8225 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8226 types. */
ebfd146a 8227
4a00c761
JJ
8228 prev_type = vectype;
8229 prev_mode = vec_mode;
b8698a0f 8230
4a00c761
JJ
8231 if (!CONVERT_EXPR_CODE_P (code))
8232 return false;
b8698a0f 8233
4a00c761
JJ
8234 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8235 intermediate steps in promotion sequence. We try
8236 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8237 not. */
9771b263 8238 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8239 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8240 {
8241 intermediate_mode = insn_data[icode1].operand[0].mode;
8242 intermediate_type
8243 = lang_hooks.types.type_for_mode (intermediate_mode,
8244 TYPE_UNSIGNED (prev_type));
8245 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8246 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8247
8248 if (!optab3 || !optab4
8249 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8250 || insn_data[icode1].operand[0].mode != intermediate_mode
8251 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8252 || insn_data[icode2].operand[0].mode != intermediate_mode
8253 || ((icode1 = optab_handler (optab3, intermediate_mode))
8254 == CODE_FOR_nothing)
8255 || ((icode2 = optab_handler (optab4, intermediate_mode))
8256 == CODE_FOR_nothing))
8257 break;
ebfd146a 8258
9771b263 8259 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8260 (*multi_step_cvt)++;
8261
8262 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8263 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8264 return true;
8265
8266 prev_type = intermediate_type;
8267 prev_mode = intermediate_mode;
ebfd146a
IR
8268 }
8269
9771b263 8270 interm_types->release ();
4a00c761 8271 return false;
ebfd146a
IR
8272}
8273
8274
8275/* Function supportable_narrowing_operation
8276
b8698a0f
L
8277 Check whether an operation represented by the code CODE is a
8278 narrowing operation that is supported by the target platform in
b690cc0f
RG
8279 vector form (i.e., when operating on arguments of type VECTYPE_IN
8280 and producing a result of type VECTYPE_OUT).
b8698a0f 8281
ebfd146a 8282 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 8283 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
8284 the target platform directly via vector tree-codes.
8285
8286 Output:
b8698a0f
L
8287 - CODE1 is the code of a vector operation to be used when
8288 vectorizing the operation, if available.
ebfd146a
IR
8289 - MULTI_STEP_CVT determines the number of required intermediate steps in
8290 case of multi-step conversion (like int->short->char - in that case
8291 MULTI_STEP_CVT will be 1).
8292 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 8293 narrowing operation (short in the above example). */
ebfd146a
IR
8294
8295bool
8296supportable_narrowing_operation (enum tree_code code,
b690cc0f 8297 tree vectype_out, tree vectype_in,
ebfd146a 8298 enum tree_code *code1, int *multi_step_cvt,
9771b263 8299 vec<tree> *interm_types)
ebfd146a 8300{
ef4bddc2 8301 machine_mode vec_mode;
ebfd146a
IR
8302 enum insn_code icode1;
8303 optab optab1, interm_optab;
b690cc0f
RG
8304 tree vectype = vectype_in;
8305 tree narrow_vectype = vectype_out;
ebfd146a 8306 enum tree_code c1;
4a00c761 8307 tree intermediate_type;
ef4bddc2 8308 machine_mode intermediate_mode, prev_mode;
ebfd146a 8309 int i;
4a00c761 8310 bool uns;
ebfd146a 8311
4a00c761 8312 *multi_step_cvt = 0;
ebfd146a
IR
8313 switch (code)
8314 {
8315 CASE_CONVERT:
8316 c1 = VEC_PACK_TRUNC_EXPR;
8317 break;
8318
8319 case FIX_TRUNC_EXPR:
8320 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8321 break;
8322
8323 case FLOAT_EXPR:
8324 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8325 tree code and optabs used for computing the operation. */
8326 return false;
8327
8328 default:
8329 gcc_unreachable ();
8330 }
8331
8332 if (code == FIX_TRUNC_EXPR)
8333 /* The signedness is determined from output operand. */
b690cc0f 8334 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
8335 else
8336 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8337
8338 if (!optab1)
8339 return false;
8340
8341 vec_mode = TYPE_MODE (vectype);
947131ba 8342 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8343 return false;
8344
4a00c761
JJ
8345 *code1 = c1;
8346
8347 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8348 return true;
8349
ebfd146a
IR
8350 /* Check if it's a multi-step conversion that can be done using intermediate
8351 types. */
4a00c761
JJ
8352 prev_mode = vec_mode;
8353 if (code == FIX_TRUNC_EXPR)
8354 uns = TYPE_UNSIGNED (vectype_out);
8355 else
8356 uns = TYPE_UNSIGNED (vectype);
8357
8358 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8359 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8360 costly than signed. */
8361 if (code == FIX_TRUNC_EXPR && uns)
8362 {
8363 enum insn_code icode2;
8364
8365 intermediate_type
8366 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8367 interm_optab
8368 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 8369 if (interm_optab != unknown_optab
4a00c761
JJ
8370 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8371 && insn_data[icode1].operand[0].mode
8372 == insn_data[icode2].operand[0].mode)
8373 {
8374 uns = false;
8375 optab1 = interm_optab;
8376 icode1 = icode2;
8377 }
8378 }
ebfd146a 8379
4a00c761
JJ
8380 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8381 intermediate steps in promotion sequence. We try
8382 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 8383 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8384 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8385 {
8386 intermediate_mode = insn_data[icode1].operand[0].mode;
8387 intermediate_type
8388 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8389 interm_optab
8390 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8391 optab_default);
8392 if (!interm_optab
8393 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8394 || insn_data[icode1].operand[0].mode != intermediate_mode
8395 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8396 == CODE_FOR_nothing))
8397 break;
8398
9771b263 8399 interm_types->quick_push (intermediate_type);
4a00c761
JJ
8400 (*multi_step_cvt)++;
8401
8402 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8403 return true;
8404
8405 prev_mode = intermediate_mode;
8406 optab1 = interm_optab;
ebfd146a
IR
8407 }
8408
9771b263 8409 interm_types->release ();
4a00c761 8410 return false;
ebfd146a 8411}