]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Make vec_perm_indices use new vector encoding
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
ebfd146a 53
7ee2468b
SB
54/* For lang_hooks.types.type_for_mode. */
55#include "langhooks.h"
ebfd146a 56
2de001ee
RS
57/* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63};
64
c3e7ee41
BS
65/* Return the vectorized type for the given statement. */
66
67tree
68stmt_vectype (struct _stmt_vec_info *stmt_info)
69{
70 return STMT_VINFO_VECTYPE (stmt_info);
71}
72
73/* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75bool
76stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77{
355fe088 78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89}
90
91/* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95unsigned
92345349 96record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 98 int misalign, enum vect_cost_model_location where)
c3e7ee41 99{
cc9fe6bb
JH
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
92345349 106 if (body_cost_vec)
c3e7ee41 107 {
92345349 108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
c3e7ee41 113 return (unsigned)
92345349 114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
115 }
116 else
310213d4
RB
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
119}
120
272c6793
RS
121/* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123static tree
124create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125{
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128}
129
130/* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135static tree
355fe088 136read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
137 tree array, unsigned HOST_WIDE_INT n)
138{
139 tree vect_type, vect, vect_name, array_ref;
355fe088 140 gimple *new_stmt;
272c6793
RS
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
153
154 return vect_name;
155}
156
157/* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161static void
355fe088 162write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
163 tree array, unsigned HOST_WIDE_INT n)
164{
165 tree array_ref;
355fe088 166 gimple *new_stmt;
272c6793
RS
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
174}
175
176/* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180static tree
44fc7854 181create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 182{
44fc7854 183 tree mem_ref;
272c6793 184
272c6793
RS
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
644ffefd 187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
188 return mem_ref;
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197static void
355fe088 198vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 199 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
200{
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 204 gimple *pattern_stmt;
ebfd146a 205
73fbfcad 206 if (dump_enabled_p ())
66c16fd9
RB
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
ebfd146a 212
83197f37
IR
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
ebfd146a
IR
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
97ecdb46
JJ
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
83197f37 223
97ecdb46
JJ
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
ebfd146a
IR
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
73fbfcad 244 if (dump_enabled_p ())
78c60e3d 245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 246 "already marked relevant/live.\n");
ebfd146a
IR
247 return;
248 }
249
9771b263 250 worklist->safe_push (stmt);
ebfd146a
IR
251}
252
253
b28ead45
AH
254/* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258bool
259is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260{
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
818
b8698a0f 819/* Function vect_model_simple_cost.
ebfd146a 820
b8698a0f 821 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825void
b8698a0f 826vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 827 enum vect_def_type *dt,
4fc5ebf1 828 int ndts,
92345349
BS
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
831{
832 int i;
92345349 833 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
4fc5ebf1
JG
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
92345349 843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 845 stmt_info, 0, vect_prologue);
c3e7ee41
BS
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
c3e7ee41 850
73fbfcad 851 if (dump_enabled_p ())
78c60e3d
SS
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
e645e942 854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
855}
856
857
8bd37302
BS
858/* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863static void
864vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866{
867 int i, tmp;
92345349 868 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
8bd37302
BS
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
c3e7ee41
BS
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
8bd37302
BS
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
c3e7ee41 886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
8bd37302
BS
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
92345349
BS
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
8bd37302 896
73fbfcad 897 if (dump_enabled_p ())
78c60e3d
SS
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
901}
902
ebfd146a
IR
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
92345349
BS
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a 914{
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 919
8644a673 920 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
ebfd146a 923
892a981f
RS
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
720f5239 927 {
892a981f
RS
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 930 }
ebfd146a 931
892a981f
RS
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
272c6793 937 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 939 access is instead being provided by a permute-and-store operation,
2de001ee
RS
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 943 {
e1377713
ES
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
892a981f 946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
ebfd146a 950
73fbfcad 951 if (dump_enabled_p ())
78c60e3d 952 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 953 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 954 group_size);
ebfd146a
IR
955 }
956
cee62fee 957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 958 /* Costs of the stores. */
067bc855
RB
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
2de001ee
RS
961 /* N scalar stores plus extracting the elements. */
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 scalar_store, stmt_info, 0, vect_body);
f2e2a985 965 else
892a981f 966 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 967
2de001ee
RS
968 if (memory_access_type == VMAT_ELEMENTWISE
969 || memory_access_type == VMAT_STRIDED_SLP)
cee62fee
MM
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
972 vec_to_scalar, stmt_info, 0, vect_body);
973
73fbfcad 974 if (dump_enabled_p ())
78c60e3d
SS
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
e645e942 977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
978}
979
980
720f5239
IR
981/* Calculate cost of DR's memory access. */
982void
983vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 984 unsigned int *inside_cost,
92345349 985 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
986{
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 988 gimple *stmt = DR_STMT (dr);
c3e7ee41 989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
990
991 switch (alignment_support_scheme)
992 {
993 case dr_aligned:
994 {
92345349
BS
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
720f5239 998
73fbfcad 999 if (dump_enabled_p ())
78c60e3d 1000 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1001 "vect_model_store_cost: aligned.\n");
720f5239
IR
1002 break;
1003 }
1004
1005 case dr_unaligned_supported:
1006 {
720f5239 1007 /* Here, we assign an additional cost for the unaligned store. */
92345349 1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1009 unaligned_store, stmt_info,
92345349 1010 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1011 if (dump_enabled_p ())
78c60e3d
SS
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
e645e942 1014 "hardware.\n");
720f5239
IR
1015 break;
1016 }
1017
38eec4c6
UW
1018 case dr_unaligned_unsupported:
1019 {
1020 *inside_cost = VECT_MAX_COST;
1021
73fbfcad 1022 if (dump_enabled_p ())
78c60e3d 1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1024 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1025 break;
1026 }
1027
720f5239
IR
1028 default:
1029 gcc_unreachable ();
1030 }
1031}
1032
1033
ebfd146a
IR
1034/* Function vect_model_load_cost
1035
892a981f
RS
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1038 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1039 access scheme chosen. */
1040
1041void
92345349 1042vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1043 vect_memory_access_type memory_access_type,
1044 slp_tree slp_node,
92345349
BS
1045 stmt_vector_for_cost *prologue_cost_vec,
1046 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1047{
892a981f
RS
1048 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1050 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1051 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1052
892a981f
RS
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node && grouped_access_p)
ebfd146a 1056 {
892a981f
RS
1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1058 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1059 }
1060
892a981f
RS
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065
272c6793 1066 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1068 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1069 include the cost of the permutes. */
1070 if (first_stmt_p
1071 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1072 {
2c23db6d
ES
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
892a981f 1075 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1076 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1077 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1078 stmt_info, 0, vect_body);
ebfd146a 1079
73fbfcad 1080 if (dump_enabled_p ())
e645e942
TJ
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1083 group_size);
ebfd146a
IR
1084 }
1085
1086 /* The loads themselves. */
067bc855
RB
1087 if (memory_access_type == VMAT_ELEMENTWISE
1088 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1089 {
a21892ad
BS
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1092 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1094 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1095 }
1096 else
892a981f 1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
720f5239 1104
73fbfcad 1105 if (dump_enabled_p ())
78c60e3d
SS
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
e645e942 1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1109}
1110
1111
1112/* Calculate cost of DR's memory access. */
1113void
1114vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1115 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
720f5239
IR
1120{
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1122 gimple *stmt = DR_STMT (dr);
c3e7ee41 1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1124
1125 switch (alignment_support_scheme)
ebfd146a
IR
1126 {
1127 case dr_aligned:
1128 {
92345349
BS
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
ebfd146a 1131
73fbfcad 1132 if (dump_enabled_p ())
78c60e3d 1133 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1134 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1135
1136 break;
1137 }
1138 case dr_unaligned_supported:
1139 {
720f5239 1140 /* Here, we assign an additional cost for the unaligned load. */
92345349 1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1142 unaligned_load, stmt_info,
92345349 1143 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1144
73fbfcad 1145 if (dump_enabled_p ())
78c60e3d
SS
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
e645e942 1148 "hardware.\n");
ebfd146a
IR
1149
1150 break;
1151 }
1152 case dr_explicit_realign:
1153 {
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1158
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
92345349 1161 prologue costs. */
ebfd146a 1162 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
ebfd146a 1165
73fbfcad 1166 if (dump_enabled_p ())
e645e942
TJ
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
8bd37302 1169
ebfd146a
IR
1170 break;
1171 }
1172 case dr_explicit_realign_optimized:
1173 {
73fbfcad 1174 if (dump_enabled_p ())
e645e942 1175 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1176 "vect_model_load_cost: unaligned software "
e645e942 1177 "pipelined.\n");
ebfd146a
IR
1178
1179 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1180 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1181 if this is an access in a group of loads, which provide grouped
ebfd146a 1182 access, then the above cost should only be considered for one
ff802fa1 1183 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1184 and a realignment op. */
1185
92345349 1186 if (add_realign_cost && record_prologue_costs)
ebfd146a 1187 {
92345349
BS
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
ebfd146a 1191 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
ebfd146a
IR
1195 }
1196
92345349
BS
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
8bd37302 1201
73fbfcad 1202 if (dump_enabled_p ())
78c60e3d 1203 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
8bd37302 1206
ebfd146a
IR
1207 break;
1208 }
1209
38eec4c6
UW
1210 case dr_unaligned_unsupported:
1211 {
1212 *inside_cost = VECT_MAX_COST;
1213
73fbfcad 1214 if (dump_enabled_p ())
78c60e3d 1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1216 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1217 break;
1218 }
1219
ebfd146a
IR
1220 default:
1221 gcc_unreachable ();
1222 }
ebfd146a
IR
1223}
1224
418b7df3
RG
1225/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1227
418b7df3 1228static void
355fe088 1229vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1230{
ebfd146a 1231 if (gsi)
418b7df3 1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1233 else
1234 {
418b7df3 1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1237
a70d6342
IR
1238 if (loop_vinfo)
1239 {
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1241 basic_block new_bb;
1242 edge pe;
a70d6342
IR
1243
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
b8698a0f 1246
a70d6342 1247 pe = loop_preheader_edge (loop);
418b7df3 1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1249 gcc_assert (!new_bb);
1250 }
1251 else
1252 {
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1256
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1259 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1261 }
ebfd146a
IR
1262 }
1263
73fbfcad 1264 if (dump_enabled_p ())
ebfd146a 1265 {
78c60e3d
SS
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1269 }
418b7df3
RG
1270}
1271
1272/* Function vect_init_vector.
ebfd146a 1273
5467ee52
RG
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
418b7df3
RG
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1281
1282tree
355fe088 1283vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1284{
355fe088 1285 gimple *init_stmt;
418b7df3
RG
1286 tree new_temp;
1287
e412ece4
RB
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1290 {
e412ece4
RB
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1293 {
5a308cf1
IE
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1297 {
b3d51f23
IE
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1300
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1304 {
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1310 }
1311 }
1312 else if (CONSTANT_CLASS_P (val))
42fd8198 1313 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1314 else
1315 {
b731b390 1316 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1324 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1325 val = new_temp;
418b7df3
RG
1326 }
1327 }
5467ee52 1328 val = build_vector_from_val (type, val);
418b7df3
RG
1329 }
1330
0e22bb5a
RB
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1333 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1334 return new_temp;
ebfd146a
IR
1335}
1336
c83a894c 1337/* Function vect_get_vec_def_for_operand_1.
a70d6342 1338
c83a894c
AH
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1341
1342tree
c83a894c 1343vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1344{
1345 tree vec_oprnd;
355fe088 1346 gimple *vec_stmt;
ebfd146a 1347 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1348
1349 switch (dt)
1350 {
81c40241 1351 /* operand is a constant or a loop invariant. */
ebfd146a 1352 case vect_constant_def:
81c40241 1353 case vect_external_def:
c83a894c
AH
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
ebfd146a 1356
81c40241 1357 /* operand is defined inside the loop. */
8644a673 1358 case vect_internal_def:
ebfd146a 1359 {
ebfd146a
IR
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1362
ebfd146a 1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1378 }
1379
c78e3652 1380 /* operand is defined by a loop header phi. */
ebfd146a 1381 case vect_reduction_def:
06066f92 1382 case vect_double_reduction_def:
7c5222ff 1383 case vect_nested_cycle:
ebfd146a
IR
1384 case vect_induction_def:
1385 {
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1387
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1395 return vec_oprnd;
1396 }
1397
1398 default:
1399 gcc_unreachable ();
1400 }
1401}
1402
1403
c83a894c
AH
1404/* Function vect_get_vec_def_for_operand.
1405
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1408
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1415
1416tree
1417vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1418{
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1424
1425 if (dump_enabled_p ())
1426 {
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1431 }
1432
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1436 {
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 }
1440
1441 if (dt == vect_constant_def || dt == vect_external_def)
1442 {
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1445
1446 if (vectype)
1447 vector_type = vectype;
2568d8a1 1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1453
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1456 }
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459}
1460
1461
ebfd146a
IR
1462/* Function vect_get_vec_def_for_stmt_copy
1463
ff802fa1 1464 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1467 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1469 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1470 DT is the type of the vector def VEC_OPRND.
1471
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1475 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1476 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1480 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1483
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1485
ebfd146a
IR
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1489 VS1.3: vx.3 = memref3
ebfd146a
IR
1490
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1495
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
b8698a0f
L
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1500 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1501 returns the vector-def 'vx.0'.
1502
b8698a0f
L
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507
b8698a0f
L
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517
1518tree
1519vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520{
355fe088 1521 gimple *vec_stmt_for_operand;
ebfd146a
IR
1522 stmt_vec_info def_stmt_info;
1523
1524 /* Do nothing; can reuse same def. */
8644a673 1525 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1526 return vec_oprnd;
1527
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1538}
1539
1540
1541/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1543
c78e3652 1544void
b8698a0f 1545vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
ebfd146a 1548{
9771b263 1549 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1550
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1552 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1553
9771b263 1554 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1555 {
9771b263 1556 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1558 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1559 }
1560}
1561
1562
c78e3652 1563/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1564
c78e3652 1565void
355fe088 1566vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
306b0c92 1569 slp_tree slp_node)
ebfd146a
IR
1570{
1571 if (slp_node)
d092494c
IR
1572 {
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1576
9771b263 1577 ops.quick_push (op0);
d092494c 1578 if (op1)
9771b263 1579 ops.quick_push (op1);
d092494c 1580
306b0c92 1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1582
37b5ec8f 1583 *vec_oprnds0 = vec_defs[0];
d092494c 1584 if (op1)
37b5ec8f 1585 *vec_oprnds1 = vec_defs[1];
d092494c 1586 }
ebfd146a
IR
1587 else
1588 {
1589 tree vec_oprnd;
1590
9771b263 1591 vec_oprnds0->create (1);
81c40241 1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1593 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1594
1595 if (op1)
1596 {
9771b263 1597 vec_oprnds1->create (1);
81c40241 1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1599 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1600 }
1601 }
1602}
1603
1604
1605/* Function vect_finish_stmt_generation.
1606
1607 Insert a new stmt. */
1608
1609void
355fe088 1610vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1611 gimple_stmt_iterator *gsi)
1612{
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1614 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1615
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617
54e8e2c3
RG
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1620 {
355fe088 1621 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 {
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 {
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1641 }
1642 }
1643 }
ebfd146a
IR
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645
310213d4 1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1647
73fbfcad 1648 if (dump_enabled_p ())
ebfd146a 1649 {
78c60e3d
SS
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1652 }
1653
ad885386 1654 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1655
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1662}
1663
70439f0d
RS
1664/* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1668
70439f0d
RS
1669static internal_fn
1670vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
ebfd146a 1672{
70439f0d
RS
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 {
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1682 {
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1687 return ifn;
1688 }
1689 }
1690 return IFN_LAST;
ebfd146a
IR
1691}
1692
5ce9450f 1693
355fe088 1694static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1695 gimple_stmt_iterator *);
1696
62da9e14
RS
1697/* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700
1701static int
1702compare_step_with_zero (gimple *stmt)
1703{
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
62da9e14
RS
1708}
1709
1710/* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1712
1713static tree
1714perm_mask_for_reverse (tree vectype)
1715{
1716 int i, nunits;
62da9e14
RS
1717
1718 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1719
e3342de4 1720 vec_perm_builder sel (nunits, nunits, 1);
62da9e14 1721 for (i = 0; i < nunits; ++i)
908a1a16 1722 sel.quick_push (nunits - 1 - i);
62da9e14 1723
e3342de4
RS
1724 vec_perm_indices indices (sel, 1, nunits);
1725 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1726 return NULL_TREE;
e3342de4 1727 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1728}
5ce9450f 1729
2de001ee
RS
1730/* A subroutine of get_load_store_type, with a subset of the same
1731 arguments. Handle the case where STMT is part of a grouped load
1732 or store.
1733
1734 For stores, the statements in the group are all consecutive
1735 and there is no gap at the end. For loads, the statements in the
1736 group might not be consecutive; there can be gaps between statements
1737 as well as at the end. */
1738
1739static bool
1740get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1741 vec_load_store_type vls_type,
1742 vect_memory_access_type *memory_access_type)
1743{
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1747 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1748 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1749 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1750 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1751 bool single_element_p = (stmt == first_stmt
1752 && !GROUP_NEXT_ELEMENT (stmt_info));
1753 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
522fcdd7 1754 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1755
1756 /* True if the vectorized statements would access beyond the last
1757 statement in the group. */
1758 bool overrun_p = false;
1759
1760 /* True if we can cope with such overrun by peeling for gaps, so that
1761 there is at least one final scalar iteration after the vector loop. */
1762 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1763
1764 /* There can only be a gap at the end of the group if the stride is
1765 known at compile time. */
1766 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1767
1768 /* Stores can't yet have gaps. */
1769 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1770
1771 if (slp)
1772 {
1773 if (STMT_VINFO_STRIDED_P (stmt_info))
1774 {
1775 /* Try to use consecutive accesses of GROUP_SIZE elements,
1776 separated by the stride, until we have a complete vector.
1777 Fall back to scalar accesses if that isn't possible. */
1778 if (nunits % group_size == 0)
1779 *memory_access_type = VMAT_STRIDED_SLP;
1780 else
1781 *memory_access_type = VMAT_ELEMENTWISE;
1782 }
1783 else
1784 {
1785 overrun_p = loop_vinfo && gap != 0;
1786 if (overrun_p && vls_type != VLS_LOAD)
1787 {
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "Grouped store with gaps requires"
1790 " non-consecutive accesses\n");
1791 return false;
1792 }
f702e7d4
RS
1793 /* An overrun is fine if the trailing elements are smaller
1794 than the alignment boundary B. Every vector access will
1795 be a multiple of B and so we are guaranteed to access a
1796 non-gap element in the same B-sized block. */
f9ef2c76 1797 if (overrun_p
f702e7d4
RS
1798 && gap < (vect_known_alignment_in_bytes (first_dr)
1799 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1800 overrun_p = false;
2de001ee
RS
1801 if (overrun_p && !can_overrun_p)
1802 {
1803 if (dump_enabled_p ())
1804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1805 "Peeling for outer loop is not supported\n");
1806 return false;
1807 }
1808 *memory_access_type = VMAT_CONTIGUOUS;
1809 }
1810 }
1811 else
1812 {
1813 /* We can always handle this case using elementwise accesses,
1814 but see if something more efficient is available. */
1815 *memory_access_type = VMAT_ELEMENTWISE;
1816
1817 /* If there is a gap at the end of the group then these optimizations
1818 would access excess elements in the last iteration. */
1819 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1820 /* An overrun is fine if the trailing elements are smaller than the
1821 alignment boundary B. Every vector access will be a multiple of B
1822 and so we are guaranteed to access a non-gap element in the
1823 same B-sized block. */
f9ef2c76 1824 if (would_overrun_p
f702e7d4
RS
1825 && gap < (vect_known_alignment_in_bytes (first_dr)
1826 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1827 would_overrun_p = false;
f702e7d4 1828
2de001ee 1829 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1830 && (can_overrun_p || !would_overrun_p)
1831 && compare_step_with_zero (stmt) > 0)
2de001ee
RS
1832 {
1833 /* First try using LOAD/STORE_LANES. */
1834 if (vls_type == VLS_LOAD
1835 ? vect_load_lanes_supported (vectype, group_size)
1836 : vect_store_lanes_supported (vectype, group_size))
1837 {
1838 *memory_access_type = VMAT_LOAD_STORE_LANES;
1839 overrun_p = would_overrun_p;
1840 }
1841
1842 /* If that fails, try using permuting loads. */
1843 if (*memory_access_type == VMAT_ELEMENTWISE
1844 && (vls_type == VLS_LOAD
1845 ? vect_grouped_load_supported (vectype, single_element_p,
1846 group_size)
1847 : vect_grouped_store_supported (vectype, group_size)))
1848 {
1849 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1850 overrun_p = would_overrun_p;
1851 }
1852 }
1853 }
1854
1855 if (vls_type != VLS_LOAD && first_stmt == stmt)
1856 {
1857 /* STMT is the leader of the group. Check the operands of all the
1858 stmts of the group. */
1859 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1860 while (next_stmt)
1861 {
1862 gcc_assert (gimple_assign_single_p (next_stmt));
1863 tree op = gimple_assign_rhs1 (next_stmt);
1864 gimple *def_stmt;
1865 enum vect_def_type dt;
1866 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1867 {
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1870 "use not simple.\n");
1871 return false;
1872 }
1873 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1874 }
1875 }
1876
1877 if (overrun_p)
1878 {
1879 gcc_assert (can_overrun_p);
1880 if (dump_enabled_p ())
1881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1882 "Data access with gaps requires scalar "
1883 "epilogue loop\n");
1884 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1885 }
1886
1887 return true;
1888}
1889
62da9e14
RS
1890/* A subroutine of get_load_store_type, with a subset of the same
1891 arguments. Handle the case where STMT is a load or store that
1892 accesses consecutive elements with a negative step. */
1893
1894static vect_memory_access_type
1895get_negative_load_store_type (gimple *stmt, tree vectype,
1896 vec_load_store_type vls_type,
1897 unsigned int ncopies)
1898{
1899 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1900 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1901 dr_alignment_support alignment_support_scheme;
1902
1903 if (ncopies > 1)
1904 {
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907 "multiple types with negative step.\n");
1908 return VMAT_ELEMENTWISE;
1909 }
1910
1911 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1912 if (alignment_support_scheme != dr_aligned
1913 && alignment_support_scheme != dr_unaligned_supported)
1914 {
1915 if (dump_enabled_p ())
1916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1917 "negative step but alignment required.\n");
1918 return VMAT_ELEMENTWISE;
1919 }
1920
1921 if (vls_type == VLS_STORE_INVARIANT)
1922 {
1923 if (dump_enabled_p ())
1924 dump_printf_loc (MSG_NOTE, vect_location,
1925 "negative step with invariant source;"
1926 " no permute needed.\n");
1927 return VMAT_CONTIGUOUS_DOWN;
1928 }
1929
1930 if (!perm_mask_for_reverse (vectype))
1931 {
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 "negative step and reversing not supported.\n");
1935 return VMAT_ELEMENTWISE;
1936 }
1937
1938 return VMAT_CONTIGUOUS_REVERSE;
1939}
1940
2de001ee
RS
1941/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1942 if there is a memory access type that the vectorized form can use,
1943 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1944 or scatters, fill in GS_INFO accordingly.
1945
1946 SLP says whether we're performing SLP rather than loop vectorization.
62da9e14
RS
1947 VECTYPE is the vector type that the vectorized statements will use.
1948 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
1949
1950static bool
1951get_load_store_type (gimple *stmt, tree vectype, bool slp,
62da9e14 1952 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
1953 vect_memory_access_type *memory_access_type,
1954 gather_scatter_info *gs_info)
1955{
1956 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1957 vec_info *vinfo = stmt_info->vinfo;
1958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1959 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1960 {
1961 *memory_access_type = VMAT_GATHER_SCATTER;
1962 gimple *def_stmt;
1963 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1964 gcc_unreachable ();
1965 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1966 &gs_info->offset_dt,
1967 &gs_info->offset_vectype))
1968 {
1969 if (dump_enabled_p ())
1970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1971 "%s index use not simple.\n",
1972 vls_type == VLS_LOAD ? "gather" : "scatter");
1973 return false;
1974 }
1975 }
1976 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1977 {
1978 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1979 memory_access_type))
1980 return false;
1981 }
1982 else if (STMT_VINFO_STRIDED_P (stmt_info))
1983 {
1984 gcc_assert (!slp);
1985 *memory_access_type = VMAT_ELEMENTWISE;
1986 }
1987 else
62da9e14
RS
1988 {
1989 int cmp = compare_step_with_zero (stmt);
1990 if (cmp < 0)
1991 *memory_access_type = get_negative_load_store_type
1992 (stmt, vectype, vls_type, ncopies);
1993 else if (cmp == 0)
1994 {
1995 gcc_assert (vls_type == VLS_LOAD);
1996 *memory_access_type = VMAT_INVARIANT;
1997 }
1998 else
1999 *memory_access_type = VMAT_CONTIGUOUS;
2000 }
2de001ee
RS
2001
2002 /* FIXME: At the moment the cost model seems to underestimate the
2003 cost of using elementwise accesses. This check preserves the
2004 traditional behavior until that can be fixed. */
2005 if (*memory_access_type == VMAT_ELEMENTWISE
2006 && !STMT_VINFO_STRIDED_P (stmt_info))
2007 {
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2010 "not falling back to elementwise accesses\n");
2011 return false;
2012 }
2013 return true;
2014}
2015
5ce9450f
JJ
2016/* Function vectorizable_mask_load_store.
2017
2018 Check if STMT performs a conditional load or store that can be vectorized.
2019 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2020 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2021 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2022
2023static bool
355fe088
TS
2024vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2025 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
2026{
2027 tree vec_dest = NULL;
2028 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2029 stmt_vec_info prev_stmt_info;
2030 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2031 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2032 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2033 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2034 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 2035 tree rhs_vectype = NULL_TREE;
045c1278 2036 tree mask_vectype;
5ce9450f 2037 tree elem_type;
355fe088 2038 gimple *new_stmt;
5ce9450f
JJ
2039 tree dummy;
2040 tree dataref_ptr = NULL_TREE;
355fe088 2041 gimple *ptr_incr;
5ce9450f
JJ
2042 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2043 int ncopies;
2044 int i, j;
2045 bool inv_p;
134c85ca 2046 gather_scatter_info gs_info;
2de001ee 2047 vec_load_store_type vls_type;
5ce9450f 2048 tree mask;
355fe088 2049 gimple *def_stmt;
5ce9450f
JJ
2050 enum vect_def_type dt;
2051
2052 if (slp_node != NULL)
2053 return false;
2054
e8f142e2 2055 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5ce9450f
JJ
2056 gcc_assert (ncopies >= 1);
2057
5ce9450f 2058 mask = gimple_call_arg (stmt, 2);
045c1278 2059
2568d8a1 2060 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
5ce9450f
JJ
2061 return false;
2062
2063 /* FORNOW. This restriction should be relaxed. */
2064 if (nested_in_vect_loop && ncopies > 1)
2065 {
2066 if (dump_enabled_p ())
2067 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2068 "multiple types in nested loop.");
2069 return false;
2070 }
2071
2072 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2073 return false;
2074
66c16fd9
RB
2075 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2076 && ! vec_stmt)
5ce9450f
JJ
2077 return false;
2078
2079 if (!STMT_VINFO_DATA_REF (stmt_info))
2080 return false;
2081
2082 elem_type = TREE_TYPE (vectype);
2083
045c1278
IE
2084 if (TREE_CODE (mask) != SSA_NAME)
2085 return false;
2086
2087 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2088 return false;
2089
2090 if (!mask_vectype)
2091 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2092
dc6a3147
IE
2093 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2094 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
045c1278
IE
2095 return false;
2096
2de001ee 2097 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
57e2f6ad
IE
2098 {
2099 tree rhs = gimple_call_arg (stmt, 3);
2100 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2101 return false;
2de001ee
RS
2102 if (dt == vect_constant_def || dt == vect_external_def)
2103 vls_type = VLS_STORE_INVARIANT;
2104 else
2105 vls_type = VLS_STORE;
57e2f6ad 2106 }
2de001ee
RS
2107 else
2108 vls_type = VLS_LOAD;
57e2f6ad 2109
2de001ee 2110 vect_memory_access_type memory_access_type;
62da9e14 2111 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2de001ee
RS
2112 &memory_access_type, &gs_info))
2113 return false;
03b9e8e4 2114
2de001ee
RS
2115 if (memory_access_type == VMAT_GATHER_SCATTER)
2116 {
134c85ca 2117 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
03b9e8e4
JJ
2118 tree masktype
2119 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2120 if (TREE_CODE (masktype) == INTEGER_TYPE)
2121 {
2122 if (dump_enabled_p ())
2123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2124 "masked gather with integer mask not supported.");
2125 return false;
2126 }
5ce9450f 2127 }
2de001ee
RS
2128 else if (memory_access_type != VMAT_CONTIGUOUS)
2129 {
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2132 "unsupported access type for masked %s.\n",
2133 vls_type == VLS_LOAD ? "load" : "store");
2134 return false;
2135 }
5ce9450f 2136 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
2137 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2138 TYPE_MODE (mask_vectype),
2de001ee 2139 vls_type == VLS_LOAD)
57e2f6ad
IE
2140 || (rhs_vectype
2141 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
2142 return false;
2143
5ce9450f
JJ
2144 if (!vec_stmt) /* transformation not required. */
2145 {
2de001ee 2146 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5ce9450f 2147 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2de001ee
RS
2148 if (vls_type == VLS_LOAD)
2149 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2150 NULL, NULL, NULL);
5ce9450f 2151 else
2de001ee
RS
2152 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2153 dt, NULL, NULL, NULL);
5ce9450f
JJ
2154 return true;
2155 }
2de001ee 2156 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5ce9450f 2157
67b8dbac 2158 /* Transform. */
5ce9450f 2159
2de001ee 2160 if (memory_access_type == VMAT_GATHER_SCATTER)
5ce9450f
JJ
2161 {
2162 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 2163 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5ce9450f 2164 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 2165 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 2166 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 2167 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
2168 edge pe = loop_preheader_edge (loop);
2169 gimple_seq seq;
2170 basic_block new_bb;
2171 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 2172 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5ce9450f 2173
134c85ca 2174 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
acdcd61b
JJ
2175 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2176 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2178 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2179 scaletype = TREE_VALUE (arglist);
2180 gcc_checking_assert (types_compatible_p (srctype, rettype)
2181 && types_compatible_p (srctype, masktype));
2182
5ce9450f
JJ
2183 if (nunits == gather_off_nunits)
2184 modifier = NONE;
2185 else if (nunits == gather_off_nunits / 2)
2186 {
5ce9450f
JJ
2187 modifier = WIDEN;
2188
e3342de4 2189 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
5ce9450f 2190 for (i = 0; i < gather_off_nunits; ++i)
908a1a16 2191 sel.quick_push (i | nunits);
5ce9450f 2192
e3342de4
RS
2193 vec_perm_indices indices (sel, 1, gather_off_nunits);
2194 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2195 indices);
5ce9450f
JJ
2196 }
2197 else if (nunits == gather_off_nunits * 2)
2198 {
5ce9450f
JJ
2199 modifier = NARROW;
2200
e3342de4 2201 vec_perm_builder sel (nunits, nunits, 1);
908a1a16 2202 sel.quick_grow (nunits);
5ce9450f
JJ
2203 for (i = 0; i < nunits; ++i)
2204 sel[i] = i < gather_off_nunits
2205 ? i : i + nunits - gather_off_nunits;
e3342de4
RS
2206 vec_perm_indices indices (sel, 2, nunits);
2207 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5ce9450f 2208
5ce9450f 2209 ncopies *= 2;
e3342de4 2210
acdcd61b
JJ
2211 for (i = 0; i < nunits; ++i)
2212 sel[i] = i | gather_off_nunits;
e3342de4
RS
2213 indices.new_vector (sel, 2, gather_off_nunits);
2214 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
5ce9450f
JJ
2215 }
2216 else
2217 gcc_unreachable ();
2218
5ce9450f
JJ
2219 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2220
134c85ca 2221 ptr = fold_convert (ptrtype, gs_info.base);
5ce9450f
JJ
2222 if (!is_gimple_min_invariant (ptr))
2223 {
2224 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2225 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2226 gcc_assert (!new_bb);
2227 }
2228
134c85ca 2229 scale = build_int_cst (scaletype, gs_info.scale);
5ce9450f
JJ
2230
2231 prev_stmt_info = NULL;
2232 for (j = 0; j < ncopies; ++j)
2233 {
2234 if (modifier == WIDEN && (j & 1))
2235 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2236 perm_mask, stmt, gsi);
2237 else if (j == 0)
2238 op = vec_oprnd0
134c85ca 2239 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5ce9450f
JJ
2240 else
2241 op = vec_oprnd0
134c85ca 2242 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
5ce9450f
JJ
2243
2244 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2245 {
2246 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2247 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 2248 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
2249 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2250 new_stmt
0d0e4a03 2251 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2252 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2253 op = var;
2254 }
2255
acdcd61b
JJ
2256 if (mask_perm_mask && (j & 1))
2257 mask_op = permute_vec_elements (mask_op, mask_op,
2258 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
2259 else
2260 {
acdcd61b 2261 if (j == 0)
81c40241 2262 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
2263 else
2264 {
81c40241 2265 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
2266 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2267 }
5ce9450f 2268
acdcd61b
JJ
2269 mask_op = vec_mask;
2270 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2271 {
2272 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2273 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 2274 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
2275 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2276 new_stmt
0d0e4a03 2277 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2279 mask_op = var;
2280 }
5ce9450f
JJ
2281 }
2282
2283 new_stmt
134c85ca 2284 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
5ce9450f
JJ
2285 scale);
2286
2287 if (!useless_type_conversion_p (vectype, rettype))
2288 {
2289 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2290 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 2291 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
2292 gimple_call_set_lhs (new_stmt, op);
2293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2294 var = make_ssa_name (vec_dest);
5ce9450f 2295 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2296 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2297 }
2298 else
2299 {
2300 var = make_ssa_name (vec_dest, new_stmt);
2301 gimple_call_set_lhs (new_stmt, var);
2302 }
2303
2304 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2305
2306 if (modifier == NARROW)
2307 {
2308 if ((j & 1) == 0)
2309 {
2310 prev_res = var;
2311 continue;
2312 }
2313 var = permute_vec_elements (prev_res, var,
2314 perm_mask, stmt, gsi);
2315 new_stmt = SSA_NAME_DEF_STMT (var);
2316 }
2317
2318 if (prev_stmt_info == NULL)
2319 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2320 else
2321 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2322 prev_stmt_info = vinfo_for_stmt (new_stmt);
2323 }
3efe2e2c
JJ
2324
2325 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2326 from the IL. */
e6f5c25d
IE
2327 if (STMT_VINFO_RELATED_STMT (stmt_info))
2328 {
2329 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2330 stmt_info = vinfo_for_stmt (stmt);
2331 }
3efe2e2c
JJ
2332 tree lhs = gimple_call_lhs (stmt);
2333 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2334 set_vinfo_for_stmt (new_stmt, stmt_info);
2335 set_vinfo_for_stmt (stmt, NULL);
2336 STMT_VINFO_STMT (stmt_info) = new_stmt;
2337 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2338 return true;
2339 }
2de001ee 2340 else if (vls_type != VLS_LOAD)
5ce9450f
JJ
2341 {
2342 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2343 prev_stmt_info = NULL;
2d4dc223 2344 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2345 for (i = 0; i < ncopies; i++)
2346 {
2347 unsigned align, misalign;
2348
2349 if (i == 0)
2350 {
2351 tree rhs = gimple_call_arg (stmt, 3);
81c40241 2352 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
7251b0bf
RS
2353 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2354 mask_vectype);
5ce9450f
JJ
2355 /* We should have catched mismatched types earlier. */
2356 gcc_assert (useless_type_conversion_p (vectype,
2357 TREE_TYPE (vec_rhs)));
2358 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2359 NULL_TREE, &dummy, gsi,
2360 &ptr_incr, false, &inv_p);
2361 gcc_assert (!inv_p);
2362 }
2363 else
2364 {
81c40241 2365 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2366 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2367 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2368 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2369 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2370 TYPE_SIZE_UNIT (vectype));
2371 }
2372
f702e7d4 2373 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2374 if (aligned_access_p (dr))
2375 misalign = 0;
2376 else if (DR_MISALIGNMENT (dr) == -1)
2377 {
2378 align = TYPE_ALIGN_UNIT (elem_type);
2379 misalign = 0;
2380 }
2381 else
2382 misalign = DR_MISALIGNMENT (dr);
2383 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2384 misalign);
08554c26 2385 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2386 misalign ? least_bit_hwi (misalign) : align);
a844293d 2387 gcall *call
5ce9450f 2388 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2389 ptr, vec_mask, vec_rhs);
a844293d
RS
2390 gimple_call_set_nothrow (call, true);
2391 new_stmt = call;
5ce9450f
JJ
2392 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2393 if (i == 0)
2394 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2395 else
2396 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2397 prev_stmt_info = vinfo_for_stmt (new_stmt);
2398 }
2399 }
2400 else
2401 {
2402 tree vec_mask = NULL_TREE;
2403 prev_stmt_info = NULL;
2404 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2405 for (i = 0; i < ncopies; i++)
2406 {
2407 unsigned align, misalign;
2408
2409 if (i == 0)
2410 {
7251b0bf
RS
2411 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2412 mask_vectype);
5ce9450f
JJ
2413 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2414 NULL_TREE, &dummy, gsi,
2415 &ptr_incr, false, &inv_p);
2416 gcc_assert (!inv_p);
2417 }
2418 else
2419 {
81c40241 2420 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2421 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2422 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2423 TYPE_SIZE_UNIT (vectype));
2424 }
2425
f702e7d4 2426 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2427 if (aligned_access_p (dr))
2428 misalign = 0;
2429 else if (DR_MISALIGNMENT (dr) == -1)
2430 {
2431 align = TYPE_ALIGN_UNIT (elem_type);
2432 misalign = 0;
2433 }
2434 else
2435 misalign = DR_MISALIGNMENT (dr);
2436 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2437 misalign);
08554c26 2438 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2439 misalign ? least_bit_hwi (misalign) : align);
a844293d 2440 gcall *call
5ce9450f 2441 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2442 ptr, vec_mask);
a844293d
RS
2443 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2444 gimple_call_set_nothrow (call, true);
2445 vect_finish_stmt_generation (stmt, call, gsi);
5ce9450f 2446 if (i == 0)
a844293d 2447 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
5ce9450f 2448 else
a844293d
RS
2449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2450 prev_stmt_info = vinfo_for_stmt (call);
5ce9450f
JJ
2451 }
2452 }
2453
2de001ee 2454 if (vls_type == VLS_LOAD)
3efe2e2c
JJ
2455 {
2456 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2457 from the IL. */
e6f5c25d
IE
2458 if (STMT_VINFO_RELATED_STMT (stmt_info))
2459 {
2460 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2461 stmt_info = vinfo_for_stmt (stmt);
2462 }
3efe2e2c
JJ
2463 tree lhs = gimple_call_lhs (stmt);
2464 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2465 set_vinfo_for_stmt (new_stmt, stmt_info);
2466 set_vinfo_for_stmt (stmt, NULL);
2467 STMT_VINFO_STMT (stmt_info) = new_stmt;
2468 gsi_replace (gsi, new_stmt, true);
2469 }
2470
5ce9450f
JJ
2471 return true;
2472}
2473
37b14185
RB
2474/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2475
2476static bool
2477vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2478 gimple **vec_stmt, slp_tree slp_node,
2479 tree vectype_in, enum vect_def_type *dt)
2480{
2481 tree op, vectype;
2482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2484 unsigned ncopies, nunits;
2485
2486 op = gimple_call_arg (stmt, 0);
2487 vectype = STMT_VINFO_VECTYPE (stmt_info);
2488 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2489
2490 /* Multiple types in SLP are handled by creating the appropriate number of
2491 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2492 case of SLP. */
2493 if (slp_node)
2494 ncopies = 1;
2495 else
e8f142e2 2496 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2497
2498 gcc_assert (ncopies >= 1);
2499
2500 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2501 if (! char_vectype)
2502 return false;
2503
794e3180 2504 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
794e3180 2505 unsigned word_bytes = num_bytes / nunits;
908a1a16 2506
e3342de4 2507 vec_perm_builder elts (num_bytes, num_bytes, 1);
37b14185
RB
2508 for (unsigned i = 0; i < nunits; ++i)
2509 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2510 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2511
e3342de4
RS
2512 vec_perm_indices indices (elts, 1, num_bytes);
2513 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2514 return false;
2515
2516 if (! vec_stmt)
2517 {
2518 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2519 if (dump_enabled_p ())
2520 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2521 "\n");
2522 if (! PURE_SLP_STMT (stmt_info))
2523 {
2524 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2525 1, vector_stmt, stmt_info, 0, vect_prologue);
2526 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2527 ncopies, vec_perm, stmt_info, 0, vect_body);
2528 }
2529 return true;
2530 }
2531
5ebaa477 2532 tree_vector_builder telts (char_vectype, num_bytes, 1);
794e3180
RS
2533 for (unsigned i = 0; i < num_bytes; ++i)
2534 telts.quick_push (build_int_cst (char_type_node, elts[i]));
5ebaa477 2535 tree bswap_vconst = telts.build ();
37b14185
RB
2536
2537 /* Transform. */
2538 vec<tree> vec_oprnds = vNULL;
2539 gimple *new_stmt = NULL;
2540 stmt_vec_info prev_stmt_info = NULL;
2541 for (unsigned j = 0; j < ncopies; j++)
2542 {
2543 /* Handle uses. */
2544 if (j == 0)
306b0c92 2545 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2546 else
2547 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2548
2549 /* Arguments are ready. create the new vector stmt. */
2550 unsigned i;
2551 tree vop;
2552 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2553 {
2554 tree tem = make_ssa_name (char_vectype);
2555 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2556 char_vectype, vop));
2557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2558 tree tem2 = make_ssa_name (char_vectype);
2559 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2560 tem, tem, bswap_vconst);
2561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2562 tem = make_ssa_name (vectype);
2563 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2564 vectype, tem2));
2565 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2566 if (slp_node)
2567 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2568 }
2569
2570 if (slp_node)
2571 continue;
2572
2573 if (j == 0)
2574 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2575 else
2576 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2577
2578 prev_stmt_info = vinfo_for_stmt (new_stmt);
2579 }
2580
2581 vec_oprnds.release ();
2582 return true;
2583}
2584
b1b6836e
RS
2585/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2586 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2587 in a single step. On success, store the binary pack code in
2588 *CONVERT_CODE. */
2589
2590static bool
2591simple_integer_narrowing (tree vectype_out, tree vectype_in,
2592 tree_code *convert_code)
2593{
2594 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2595 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2596 return false;
2597
2598 tree_code code;
2599 int multi_step_cvt = 0;
2600 auto_vec <tree, 8> interm_types;
2601 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2602 &code, &multi_step_cvt,
2603 &interm_types)
2604 || multi_step_cvt)
2605 return false;
2606
2607 *convert_code = code;
2608 return true;
2609}
5ce9450f 2610
ebfd146a
IR
2611/* Function vectorizable_call.
2612
538dd0b7 2613 Check if GS performs a function call that can be vectorized.
b8698a0f 2614 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2615 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2616 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2617
2618static bool
355fe088 2619vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2620 slp_tree slp_node)
ebfd146a 2621{
538dd0b7 2622 gcall *stmt;
ebfd146a
IR
2623 tree vec_dest;
2624 tree scalar_dest;
2625 tree op, type;
2626 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2627 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2628 tree vectype_out, vectype_in;
2629 int nunits_in;
2630 int nunits_out;
2631 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2633 vec_info *vinfo = stmt_info->vinfo;
81c40241 2634 tree fndecl, new_temp, rhs_type;
355fe088 2635 gimple *def_stmt;
0502fb85
UB
2636 enum vect_def_type dt[3]
2637 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2638 int ndts = 3;
355fe088 2639 gimple *new_stmt = NULL;
ebfd146a 2640 int ncopies, j;
6e1aa848 2641 vec<tree> vargs = vNULL;
ebfd146a
IR
2642 enum { NARROW, NONE, WIDEN } modifier;
2643 size_t i, nargs;
9d5e7640 2644 tree lhs;
ebfd146a 2645
190c2236 2646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2647 return false;
2648
66c16fd9
RB
2649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2650 && ! vec_stmt)
ebfd146a
IR
2651 return false;
2652
538dd0b7
DM
2653 /* Is GS a vectorizable call? */
2654 stmt = dyn_cast <gcall *> (gs);
2655 if (!stmt)
ebfd146a
IR
2656 return false;
2657
5ce9450f
JJ
2658 if (gimple_call_internal_p (stmt)
2659 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2660 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2661 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2662 slp_node);
2663
0136f8f0
AH
2664 if (gimple_call_lhs (stmt) == NULL_TREE
2665 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2666 return false;
2667
0136f8f0 2668 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2669
b690cc0f
RG
2670 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2671
ebfd146a
IR
2672 /* Process function arguments. */
2673 rhs_type = NULL_TREE;
b690cc0f 2674 vectype_in = NULL_TREE;
ebfd146a
IR
2675 nargs = gimple_call_num_args (stmt);
2676
1b1562a5
MM
2677 /* Bail out if the function has more than three arguments, we do not have
2678 interesting builtin functions to vectorize with more than two arguments
2679 except for fma. No arguments is also not good. */
2680 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2681 return false;
2682
74bf76ed
JJ
2683 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2684 if (gimple_call_internal_p (stmt)
2685 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2686 {
2687 nargs = 0;
2688 rhs_type = unsigned_type_node;
2689 }
2690
ebfd146a
IR
2691 for (i = 0; i < nargs; i++)
2692 {
b690cc0f
RG
2693 tree opvectype;
2694
ebfd146a
IR
2695 op = gimple_call_arg (stmt, i);
2696
2697 /* We can only handle calls with arguments of the same type. */
2698 if (rhs_type
8533c9d8 2699 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2700 {
73fbfcad 2701 if (dump_enabled_p ())
78c60e3d 2702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2703 "argument types differ.\n");
ebfd146a
IR
2704 return false;
2705 }
b690cc0f
RG
2706 if (!rhs_type)
2707 rhs_type = TREE_TYPE (op);
ebfd146a 2708
81c40241 2709 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2710 {
73fbfcad 2711 if (dump_enabled_p ())
78c60e3d 2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2713 "use not simple.\n");
ebfd146a
IR
2714 return false;
2715 }
ebfd146a 2716
b690cc0f
RG
2717 if (!vectype_in)
2718 vectype_in = opvectype;
2719 else if (opvectype
2720 && opvectype != vectype_in)
2721 {
73fbfcad 2722 if (dump_enabled_p ())
78c60e3d 2723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2724 "argument vector types differ.\n");
b690cc0f
RG
2725 return false;
2726 }
2727 }
2728 /* If all arguments are external or constant defs use a vector type with
2729 the same size as the output vector type. */
ebfd146a 2730 if (!vectype_in)
b690cc0f 2731 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2732 if (vec_stmt)
2733 gcc_assert (vectype_in);
2734 if (!vectype_in)
2735 {
73fbfcad 2736 if (dump_enabled_p ())
7d8930a0 2737 {
78c60e3d
SS
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2739 "no vectype for scalar type ");
2740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2742 }
2743
2744 return false;
2745 }
ebfd146a
IR
2746
2747 /* FORNOW */
b690cc0f
RG
2748 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2749 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2750 if (nunits_in == nunits_out / 2)
2751 modifier = NARROW;
2752 else if (nunits_out == nunits_in)
2753 modifier = NONE;
2754 else if (nunits_out == nunits_in / 2)
2755 modifier = WIDEN;
2756 else
2757 return false;
2758
70439f0d
RS
2759 /* We only handle functions that do not read or clobber memory. */
2760 if (gimple_vuse (stmt))
2761 {
2762 if (dump_enabled_p ())
2763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2764 "function reads from or writes to memory.\n");
2765 return false;
2766 }
2767
ebfd146a
IR
2768 /* For now, we only vectorize functions if a target specific builtin
2769 is available. TODO -- in some cases, it might be profitable to
2770 insert the calls for pieces of the vector, in order to be able
2771 to vectorize other operations in the loop. */
70439f0d
RS
2772 fndecl = NULL_TREE;
2773 internal_fn ifn = IFN_LAST;
2774 combined_fn cfn = gimple_call_combined_fn (stmt);
2775 tree callee = gimple_call_fndecl (stmt);
2776
2777 /* First try using an internal function. */
b1b6836e
RS
2778 tree_code convert_code = ERROR_MARK;
2779 if (cfn != CFN_LAST
2780 && (modifier == NONE
2781 || (modifier == NARROW
2782 && simple_integer_narrowing (vectype_out, vectype_in,
2783 &convert_code))))
70439f0d
RS
2784 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2785 vectype_in);
2786
2787 /* If that fails, try asking for a target-specific built-in function. */
2788 if (ifn == IFN_LAST)
2789 {
2790 if (cfn != CFN_LAST)
2791 fndecl = targetm.vectorize.builtin_vectorized_function
2792 (cfn, vectype_out, vectype_in);
2793 else
2794 fndecl = targetm.vectorize.builtin_md_vectorized_function
2795 (callee, vectype_out, vectype_in);
2796 }
2797
2798 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2799 {
70439f0d 2800 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2801 && !slp_node
2802 && loop_vinfo
2803 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2804 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2805 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2806 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2807 {
2808 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2809 { 0, 1, 2, ... vf - 1 } vector. */
2810 gcc_assert (nargs == 0);
2811 }
37b14185
RB
2812 else if (modifier == NONE
2813 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2814 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2815 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2816 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2817 vectype_in, dt);
74bf76ed
JJ
2818 else
2819 {
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2822 "function is not vectorizable.\n");
74bf76ed
JJ
2823 return false;
2824 }
ebfd146a
IR
2825 }
2826
fce57248 2827 if (slp_node)
190c2236 2828 ncopies = 1;
b1b6836e 2829 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2830 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2831 else
e8f142e2 2832 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2833
2834 /* Sanity check: make sure that at least one copy of the vectorized stmt
2835 needs to be generated. */
2836 gcc_assert (ncopies >= 1);
2837
2838 if (!vec_stmt) /* transformation not required. */
2839 {
2840 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2841 if (dump_enabled_p ())
e645e942
TJ
2842 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2843 "\n");
4fc5ebf1 2844 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2845 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2846 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2847 vec_promote_demote, stmt_info, 0, vect_body);
2848
ebfd146a
IR
2849 return true;
2850 }
2851
67b8dbac 2852 /* Transform. */
ebfd146a 2853
73fbfcad 2854 if (dump_enabled_p ())
e645e942 2855 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2856
2857 /* Handle def. */
2858 scalar_dest = gimple_call_lhs (stmt);
2859 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2860
2861 prev_stmt_info = NULL;
b1b6836e 2862 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2863 {
b1b6836e 2864 tree prev_res = NULL_TREE;
ebfd146a
IR
2865 for (j = 0; j < ncopies; ++j)
2866 {
2867 /* Build argument list for the vectorized call. */
2868 if (j == 0)
9771b263 2869 vargs.create (nargs);
ebfd146a 2870 else
9771b263 2871 vargs.truncate (0);
ebfd146a 2872
190c2236
JJ
2873 if (slp_node)
2874 {
ef062b13 2875 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2876 vec<tree> vec_oprnds0;
190c2236
JJ
2877
2878 for (i = 0; i < nargs; i++)
9771b263 2879 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2880 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2881 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2882
2883 /* Arguments are ready. Create the new vector stmt. */
9771b263 2884 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2885 {
2886 size_t k;
2887 for (k = 0; k < nargs; k++)
2888 {
37b5ec8f 2889 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2890 vargs[k] = vec_oprndsk[i];
190c2236 2891 }
b1b6836e
RS
2892 if (modifier == NARROW)
2893 {
2894 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2895 gcall *call
2896 = gimple_build_call_internal_vec (ifn, vargs);
2897 gimple_call_set_lhs (call, half_res);
2898 gimple_call_set_nothrow (call, true);
2899 new_stmt = call;
b1b6836e
RS
2900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2901 if ((i & 1) == 0)
2902 {
2903 prev_res = half_res;
2904 continue;
2905 }
2906 new_temp = make_ssa_name (vec_dest);
2907 new_stmt = gimple_build_assign (new_temp, convert_code,
2908 prev_res, half_res);
2909 }
70439f0d 2910 else
b1b6836e 2911 {
a844293d 2912 gcall *call;
b1b6836e 2913 if (ifn != IFN_LAST)
a844293d 2914 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2915 else
a844293d
RS
2916 call = gimple_build_call_vec (fndecl, vargs);
2917 new_temp = make_ssa_name (vec_dest, call);
2918 gimple_call_set_lhs (call, new_temp);
2919 gimple_call_set_nothrow (call, true);
2920 new_stmt = call;
b1b6836e 2921 }
190c2236 2922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2923 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2924 }
2925
2926 for (i = 0; i < nargs; i++)
2927 {
37b5ec8f 2928 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2929 vec_oprndsi.release ();
190c2236 2930 }
190c2236
JJ
2931 continue;
2932 }
2933
ebfd146a
IR
2934 for (i = 0; i < nargs; i++)
2935 {
2936 op = gimple_call_arg (stmt, i);
2937 if (j == 0)
2938 vec_oprnd0
81c40241 2939 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2940 else
63827fb8
IR
2941 {
2942 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2943 vec_oprnd0
2944 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2945 }
ebfd146a 2946
9771b263 2947 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2948 }
2949
74bf76ed
JJ
2950 if (gimple_call_internal_p (stmt)
2951 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2952 {
5ebaa477
RS
2953 tree_vector_builder v (vectype_out, 1, 3);
2954 for (int k = 0; k < 3; ++k)
794e3180
RS
2955 v.quick_push (build_int_cst (unsigned_type_node,
2956 j * nunits_out + k));
5ebaa477 2957 tree cst = v.build ();
74bf76ed 2958 tree new_var
0e22bb5a 2959 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2960 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2961 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2962 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2963 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2964 }
b1b6836e
RS
2965 else if (modifier == NARROW)
2966 {
2967 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2968 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2969 gimple_call_set_lhs (call, half_res);
2970 gimple_call_set_nothrow (call, true);
2971 new_stmt = call;
b1b6836e
RS
2972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2973 if ((j & 1) == 0)
2974 {
2975 prev_res = half_res;
2976 continue;
2977 }
2978 new_temp = make_ssa_name (vec_dest);
2979 new_stmt = gimple_build_assign (new_temp, convert_code,
2980 prev_res, half_res);
2981 }
74bf76ed
JJ
2982 else
2983 {
a844293d 2984 gcall *call;
70439f0d 2985 if (ifn != IFN_LAST)
a844293d 2986 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 2987 else
a844293d 2988 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 2989 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
2990 gimple_call_set_lhs (call, new_temp);
2991 gimple_call_set_nothrow (call, true);
2992 new_stmt = call;
74bf76ed 2993 }
ebfd146a
IR
2994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2995
b1b6836e 2996 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2997 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2998 else
2999 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3000
3001 prev_stmt_info = vinfo_for_stmt (new_stmt);
3002 }
b1b6836e
RS
3003 }
3004 else if (modifier == NARROW)
3005 {
ebfd146a
IR
3006 for (j = 0; j < ncopies; ++j)
3007 {
3008 /* Build argument list for the vectorized call. */
3009 if (j == 0)
9771b263 3010 vargs.create (nargs * 2);
ebfd146a 3011 else
9771b263 3012 vargs.truncate (0);
ebfd146a 3013
190c2236
JJ
3014 if (slp_node)
3015 {
ef062b13 3016 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3017 vec<tree> vec_oprnds0;
190c2236
JJ
3018
3019 for (i = 0; i < nargs; i++)
9771b263 3020 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3021 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3022 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3023
3024 /* Arguments are ready. Create the new vector stmt. */
9771b263 3025 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3026 {
3027 size_t k;
9771b263 3028 vargs.truncate (0);
190c2236
JJ
3029 for (k = 0; k < nargs; k++)
3030 {
37b5ec8f 3031 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3032 vargs.quick_push (vec_oprndsk[i]);
3033 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3034 }
a844293d 3035 gcall *call;
70439f0d 3036 if (ifn != IFN_LAST)
a844293d 3037 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3038 else
a844293d
RS
3039 call = gimple_build_call_vec (fndecl, vargs);
3040 new_temp = make_ssa_name (vec_dest, call);
3041 gimple_call_set_lhs (call, new_temp);
3042 gimple_call_set_nothrow (call, true);
3043 new_stmt = call;
190c2236 3044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3045 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3046 }
3047
3048 for (i = 0; i < nargs; i++)
3049 {
37b5ec8f 3050 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3051 vec_oprndsi.release ();
190c2236 3052 }
190c2236
JJ
3053 continue;
3054 }
3055
ebfd146a
IR
3056 for (i = 0; i < nargs; i++)
3057 {
3058 op = gimple_call_arg (stmt, i);
3059 if (j == 0)
3060 {
3061 vec_oprnd0
81c40241 3062 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3063 vec_oprnd1
63827fb8 3064 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3065 }
3066 else
3067 {
336ecb65 3068 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3069 vec_oprnd0
63827fb8 3070 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3071 vec_oprnd1
63827fb8 3072 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3073 }
3074
9771b263
DN
3075 vargs.quick_push (vec_oprnd0);
3076 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3077 }
3078
b1b6836e 3079 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3080 new_temp = make_ssa_name (vec_dest, new_stmt);
3081 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083
3084 if (j == 0)
3085 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3086 else
3087 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3088
3089 prev_stmt_info = vinfo_for_stmt (new_stmt);
3090 }
3091
3092 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3093 }
b1b6836e
RS
3094 else
3095 /* No current target implements this case. */
3096 return false;
ebfd146a 3097
9771b263 3098 vargs.release ();
ebfd146a 3099
ebfd146a
IR
3100 /* The call in STMT might prevent it from being removed in dce.
3101 We however cannot remove it here, due to the way the ssa name
3102 it defines is mapped to the new definition. So just replace
3103 rhs of the statement with something harmless. */
3104
dd34c087
JJ
3105 if (slp_node)
3106 return true;
3107
ebfd146a 3108 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3109 if (is_pattern_stmt_p (stmt_info))
3110 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3111 else
3112 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3113
9d5e7640 3114 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3115 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3116 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3117 STMT_VINFO_STMT (stmt_info) = new_stmt;
3118 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3119
3120 return true;
3121}
3122
3123
0136f8f0
AH
3124struct simd_call_arg_info
3125{
3126 tree vectype;
3127 tree op;
0136f8f0 3128 HOST_WIDE_INT linear_step;
34e82342 3129 enum vect_def_type dt;
0136f8f0 3130 unsigned int align;
17b658af 3131 bool simd_lane_linear;
0136f8f0
AH
3132};
3133
17b658af
JJ
3134/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3135 is linear within simd lane (but not within whole loop), note it in
3136 *ARGINFO. */
3137
3138static void
3139vect_simd_lane_linear (tree op, struct loop *loop,
3140 struct simd_call_arg_info *arginfo)
3141{
355fe088 3142 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3143
3144 if (!is_gimple_assign (def_stmt)
3145 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3146 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3147 return;
3148
3149 tree base = gimple_assign_rhs1 (def_stmt);
3150 HOST_WIDE_INT linear_step = 0;
3151 tree v = gimple_assign_rhs2 (def_stmt);
3152 while (TREE_CODE (v) == SSA_NAME)
3153 {
3154 tree t;
3155 def_stmt = SSA_NAME_DEF_STMT (v);
3156 if (is_gimple_assign (def_stmt))
3157 switch (gimple_assign_rhs_code (def_stmt))
3158 {
3159 case PLUS_EXPR:
3160 t = gimple_assign_rhs2 (def_stmt);
3161 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3162 return;
3163 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3164 v = gimple_assign_rhs1 (def_stmt);
3165 continue;
3166 case MULT_EXPR:
3167 t = gimple_assign_rhs2 (def_stmt);
3168 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3169 return;
3170 linear_step = tree_to_shwi (t);
3171 v = gimple_assign_rhs1 (def_stmt);
3172 continue;
3173 CASE_CONVERT:
3174 t = gimple_assign_rhs1 (def_stmt);
3175 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3176 || (TYPE_PRECISION (TREE_TYPE (v))
3177 < TYPE_PRECISION (TREE_TYPE (t))))
3178 return;
3179 if (!linear_step)
3180 linear_step = 1;
3181 v = t;
3182 continue;
3183 default:
3184 return;
3185 }
8e4284d0 3186 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3187 && loop->simduid
3188 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3189 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3190 == loop->simduid))
3191 {
3192 if (!linear_step)
3193 linear_step = 1;
3194 arginfo->linear_step = linear_step;
3195 arginfo->op = base;
3196 arginfo->simd_lane_linear = true;
3197 return;
3198 }
3199 }
3200}
3201
0136f8f0
AH
3202/* Function vectorizable_simd_clone_call.
3203
3204 Check if STMT performs a function call that can be vectorized
3205 by calling a simd clone of the function.
3206 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3207 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3208 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3209
3210static bool
355fe088
TS
3211vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3212 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3213{
3214 tree vec_dest;
3215 tree scalar_dest;
3216 tree op, type;
3217 tree vec_oprnd0 = NULL_TREE;
3218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3219 tree vectype;
3220 unsigned int nunits;
3221 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3222 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3223 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3224 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3225 tree fndecl, new_temp;
355fe088
TS
3226 gimple *def_stmt;
3227 gimple *new_stmt = NULL;
0136f8f0 3228 int ncopies, j;
00426f9a 3229 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3230 vec<tree> vargs = vNULL;
3231 size_t i, nargs;
3232 tree lhs, rtype, ratype;
e7a74006 3233 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3234
3235 /* Is STMT a vectorizable call? */
3236 if (!is_gimple_call (stmt))
3237 return false;
3238
3239 fndecl = gimple_call_fndecl (stmt);
3240 if (fndecl == NULL_TREE)
3241 return false;
3242
d52f5295 3243 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3244 if (node == NULL || node->simd_clones == NULL)
3245 return false;
3246
3247 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3248 return false;
3249
66c16fd9
RB
3250 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3251 && ! vec_stmt)
0136f8f0
AH
3252 return false;
3253
3254 if (gimple_call_lhs (stmt)
3255 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3256 return false;
3257
3258 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3259
3260 vectype = STMT_VINFO_VECTYPE (stmt_info);
3261
3262 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3263 return false;
3264
3265 /* FORNOW */
fce57248 3266 if (slp_node)
0136f8f0
AH
3267 return false;
3268
3269 /* Process function arguments. */
3270 nargs = gimple_call_num_args (stmt);
3271
3272 /* Bail out if the function has zero arguments. */
3273 if (nargs == 0)
3274 return false;
3275
00426f9a 3276 arginfo.reserve (nargs, true);
0136f8f0
AH
3277
3278 for (i = 0; i < nargs; i++)
3279 {
3280 simd_call_arg_info thisarginfo;
3281 affine_iv iv;
3282
3283 thisarginfo.linear_step = 0;
3284 thisarginfo.align = 0;
3285 thisarginfo.op = NULL_TREE;
17b658af 3286 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3287
3288 op = gimple_call_arg (stmt, i);
81c40241
RB
3289 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3290 &thisarginfo.vectype)
0136f8f0
AH
3291 || thisarginfo.dt == vect_uninitialized_def)
3292 {
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295 "use not simple.\n");
0136f8f0
AH
3296 return false;
3297 }
3298
3299 if (thisarginfo.dt == vect_constant_def
3300 || thisarginfo.dt == vect_external_def)
3301 gcc_assert (thisarginfo.vectype == NULL_TREE);
3302 else
3303 gcc_assert (thisarginfo.vectype != NULL_TREE);
3304
6c9e85fb
JJ
3305 /* For linear arguments, the analyze phase should have saved
3306 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3307 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3308 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3309 {
3310 gcc_assert (vec_stmt);
3311 thisarginfo.linear_step
17b658af 3312 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3313 thisarginfo.op
17b658af
JJ
3314 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3315 thisarginfo.simd_lane_linear
3316 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3317 == boolean_true_node);
6c9e85fb
JJ
3318 /* If loop has been peeled for alignment, we need to adjust it. */
3319 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3320 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3321 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3322 {
3323 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3324 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3325 tree opt = TREE_TYPE (thisarginfo.op);
3326 bias = fold_convert (TREE_TYPE (step), bias);
3327 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3328 thisarginfo.op
3329 = fold_build2 (POINTER_TYPE_P (opt)
3330 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3331 thisarginfo.op, bias);
3332 }
3333 }
3334 else if (!vec_stmt
3335 && thisarginfo.dt != vect_constant_def
3336 && thisarginfo.dt != vect_external_def
3337 && loop_vinfo
3338 && TREE_CODE (op) == SSA_NAME
3339 && simple_iv (loop, loop_containing_stmt (stmt), op,
3340 &iv, false)
3341 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3342 {
3343 thisarginfo.linear_step = tree_to_shwi (iv.step);
3344 thisarginfo.op = iv.base;
3345 }
3346 else if ((thisarginfo.dt == vect_constant_def
3347 || thisarginfo.dt == vect_external_def)
3348 && POINTER_TYPE_P (TREE_TYPE (op)))
3349 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3350 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3351 linear too. */
3352 if (POINTER_TYPE_P (TREE_TYPE (op))
3353 && !thisarginfo.linear_step
3354 && !vec_stmt
3355 && thisarginfo.dt != vect_constant_def
3356 && thisarginfo.dt != vect_external_def
3357 && loop_vinfo
3358 && !slp_node
3359 && TREE_CODE (op) == SSA_NAME)
3360 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3361
3362 arginfo.quick_push (thisarginfo);
3363 }
3364
3365 unsigned int badness = 0;
3366 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3367 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3368 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3369 else
3370 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3371 n = n->simdclone->next_clone)
3372 {
3373 unsigned int this_badness = 0;
3374 if (n->simdclone->simdlen
3375 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3376 || n->simdclone->nargs != nargs)
3377 continue;
3378 if (n->simdclone->simdlen
3379 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3380 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3381 - exact_log2 (n->simdclone->simdlen)) * 1024;
3382 if (n->simdclone->inbranch)
3383 this_badness += 2048;
3384 int target_badness = targetm.simd_clone.usable (n);
3385 if (target_badness < 0)
3386 continue;
3387 this_badness += target_badness * 512;
3388 /* FORNOW: Have to add code to add the mask argument. */
3389 if (n->simdclone->inbranch)
3390 continue;
3391 for (i = 0; i < nargs; i++)
3392 {
3393 switch (n->simdclone->args[i].arg_type)
3394 {
3395 case SIMD_CLONE_ARG_TYPE_VECTOR:
3396 if (!useless_type_conversion_p
3397 (n->simdclone->args[i].orig_type,
3398 TREE_TYPE (gimple_call_arg (stmt, i))))
3399 i = -1;
3400 else if (arginfo[i].dt == vect_constant_def
3401 || arginfo[i].dt == vect_external_def
3402 || arginfo[i].linear_step)
3403 this_badness += 64;
3404 break;
3405 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3406 if (arginfo[i].dt != vect_constant_def
3407 && arginfo[i].dt != vect_external_def)
3408 i = -1;
3409 break;
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3411 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3412 if (arginfo[i].dt == vect_constant_def
3413 || arginfo[i].dt == vect_external_def
3414 || (arginfo[i].linear_step
3415 != n->simdclone->args[i].linear_step))
3416 i = -1;
3417 break;
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3423 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3424 /* FORNOW */
3425 i = -1;
3426 break;
3427 case SIMD_CLONE_ARG_TYPE_MASK:
3428 gcc_unreachable ();
3429 }
3430 if (i == (size_t) -1)
3431 break;
3432 if (n->simdclone->args[i].alignment > arginfo[i].align)
3433 {
3434 i = -1;
3435 break;
3436 }
3437 if (arginfo[i].align)
3438 this_badness += (exact_log2 (arginfo[i].align)
3439 - exact_log2 (n->simdclone->args[i].alignment));
3440 }
3441 if (i == (size_t) -1)
3442 continue;
3443 if (bestn == NULL || this_badness < badness)
3444 {
3445 bestn = n;
3446 badness = this_badness;
3447 }
3448 }
3449
3450 if (bestn == NULL)
00426f9a 3451 return false;
0136f8f0
AH
3452
3453 for (i = 0; i < nargs; i++)
3454 if ((arginfo[i].dt == vect_constant_def
3455 || arginfo[i].dt == vect_external_def)
3456 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3457 {
3458 arginfo[i].vectype
3459 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3460 i)));
3461 if (arginfo[i].vectype == NULL
3462 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3463 > bestn->simdclone->simdlen))
00426f9a 3464 return false;
0136f8f0
AH
3465 }
3466
3467 fndecl = bestn->decl;
3468 nunits = bestn->simdclone->simdlen;
3469 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3470
3471 /* If the function isn't const, only allow it in simd loops where user
3472 has asserted that at least nunits consecutive iterations can be
3473 performed using SIMD instructions. */
3474 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3475 && gimple_vuse (stmt))
00426f9a 3476 return false;
0136f8f0
AH
3477
3478 /* Sanity check: make sure that at least one copy of the vectorized stmt
3479 needs to be generated. */
3480 gcc_assert (ncopies >= 1);
3481
3482 if (!vec_stmt) /* transformation not required. */
3483 {
6c9e85fb
JJ
3484 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3485 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3486 if ((bestn->simdclone->args[i].arg_type
3487 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3488 || (bestn->simdclone->args[i].arg_type
3489 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3490 {
17b658af 3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3492 + 1);
3493 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3494 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3495 ? size_type_node : TREE_TYPE (arginfo[i].op);
3496 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3497 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3498 tree sll = arginfo[i].simd_lane_linear
3499 ? boolean_true_node : boolean_false_node;
3500 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3501 }
0136f8f0
AH
3502 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3503 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_NOTE, vect_location,
3505 "=== vectorizable_simd_clone_call ===\n");
3506/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3507 return true;
3508 }
3509
67b8dbac 3510 /* Transform. */
0136f8f0
AH
3511
3512 if (dump_enabled_p ())
3513 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3514
3515 /* Handle def. */
3516 scalar_dest = gimple_call_lhs (stmt);
3517 vec_dest = NULL_TREE;
3518 rtype = NULL_TREE;
3519 ratype = NULL_TREE;
3520 if (scalar_dest)
3521 {
3522 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3523 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3524 if (TREE_CODE (rtype) == ARRAY_TYPE)
3525 {
3526 ratype = rtype;
3527 rtype = TREE_TYPE (ratype);
3528 }
3529 }
3530
3531 prev_stmt_info = NULL;
3532 for (j = 0; j < ncopies; ++j)
3533 {
3534 /* Build argument list for the vectorized call. */
3535 if (j == 0)
3536 vargs.create (nargs);
3537 else
3538 vargs.truncate (0);
3539
3540 for (i = 0; i < nargs; i++)
3541 {
3542 unsigned int k, l, m, o;
3543 tree atype;
3544 op = gimple_call_arg (stmt, i);
3545 switch (bestn->simdclone->args[i].arg_type)
3546 {
3547 case SIMD_CLONE_ARG_TYPE_VECTOR:
3548 atype = bestn->simdclone->args[i].vector_type;
3549 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3550 for (m = j * o; m < (j + 1) * o; m++)
3551 {
3552 if (TYPE_VECTOR_SUBPARTS (atype)
3553 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3554 {
3555 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3556 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3557 / TYPE_VECTOR_SUBPARTS (atype));
3558 gcc_assert ((k & (k - 1)) == 0);
3559 if (m == 0)
3560 vec_oprnd0
81c40241 3561 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3562 else
3563 {
3564 vec_oprnd0 = arginfo[i].op;
3565 if ((m & (k - 1)) == 0)
3566 vec_oprnd0
3567 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3568 vec_oprnd0);
3569 }
3570 arginfo[i].op = vec_oprnd0;
3571 vec_oprnd0
3572 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3573 bitsize_int (prec),
0136f8f0
AH
3574 bitsize_int ((m & (k - 1)) * prec));
3575 new_stmt
b731b390 3576 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3577 vec_oprnd0);
3578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3579 vargs.safe_push (gimple_assign_lhs (new_stmt));
3580 }
3581 else
3582 {
3583 k = (TYPE_VECTOR_SUBPARTS (atype)
3584 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3585 gcc_assert ((k & (k - 1)) == 0);
3586 vec<constructor_elt, va_gc> *ctor_elts;
3587 if (k != 1)
3588 vec_alloc (ctor_elts, k);
3589 else
3590 ctor_elts = NULL;
3591 for (l = 0; l < k; l++)
3592 {
3593 if (m == 0 && l == 0)
3594 vec_oprnd0
81c40241 3595 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3596 else
3597 vec_oprnd0
3598 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3599 arginfo[i].op);
3600 arginfo[i].op = vec_oprnd0;
3601 if (k == 1)
3602 break;
3603 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3604 vec_oprnd0);
3605 }
3606 if (k == 1)
3607 vargs.safe_push (vec_oprnd0);
3608 else
3609 {
3610 vec_oprnd0 = build_constructor (atype, ctor_elts);
3611 new_stmt
b731b390 3612 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3613 vec_oprnd0);
3614 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3615 vargs.safe_push (gimple_assign_lhs (new_stmt));
3616 }
3617 }
3618 }
3619 break;
3620 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3621 vargs.safe_push (op);
3622 break;
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3624 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3625 if (j == 0)
3626 {
3627 gimple_seq stmts;
3628 arginfo[i].op
3629 = force_gimple_operand (arginfo[i].op, &stmts, true,
3630 NULL_TREE);
3631 if (stmts != NULL)
3632 {
3633 basic_block new_bb;
3634 edge pe = loop_preheader_edge (loop);
3635 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3636 gcc_assert (!new_bb);
3637 }
17b658af
JJ
3638 if (arginfo[i].simd_lane_linear)
3639 {
3640 vargs.safe_push (arginfo[i].op);
3641 break;
3642 }
b731b390 3643 tree phi_res = copy_ssa_name (op);
538dd0b7 3644 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3645 set_vinfo_for_stmt (new_phi,
310213d4 3646 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3647 add_phi_arg (new_phi, arginfo[i].op,
3648 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3649 enum tree_code code
3650 = POINTER_TYPE_P (TREE_TYPE (op))
3651 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3652 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3653 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3654 widest_int cst
3655 = wi::mul (bestn->simdclone->args[i].linear_step,
3656 ncopies * nunits);
3657 tree tcst = wide_int_to_tree (type, cst);
b731b390 3658 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3659 new_stmt
3660 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3661 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3662 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3663 set_vinfo_for_stmt (new_stmt,
310213d4 3664 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3665 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3666 UNKNOWN_LOCATION);
3667 arginfo[i].op = phi_res;
3668 vargs.safe_push (phi_res);
3669 }
3670 else
3671 {
3672 enum tree_code code
3673 = POINTER_TYPE_P (TREE_TYPE (op))
3674 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3675 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3676 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3677 widest_int cst
3678 = wi::mul (bestn->simdclone->args[i].linear_step,
3679 j * nunits);
3680 tree tcst = wide_int_to_tree (type, cst);
b731b390 3681 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3682 new_stmt = gimple_build_assign (new_temp, code,
3683 arginfo[i].op, tcst);
0136f8f0
AH
3684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3685 vargs.safe_push (new_temp);
3686 }
3687 break;
7adb26f2
JJ
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3690 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3693 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3694 default:
3695 gcc_unreachable ();
3696 }
3697 }
3698
3699 new_stmt = gimple_build_call_vec (fndecl, vargs);
3700 if (vec_dest)
3701 {
3702 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3703 if (ratype)
b731b390 3704 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3705 else if (TYPE_VECTOR_SUBPARTS (vectype)
3706 == TYPE_VECTOR_SUBPARTS (rtype))
3707 new_temp = make_ssa_name (vec_dest, new_stmt);
3708 else
3709 new_temp = make_ssa_name (rtype, new_stmt);
3710 gimple_call_set_lhs (new_stmt, new_temp);
3711 }
3712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713
3714 if (vec_dest)
3715 {
3716 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3717 {
3718 unsigned int k, l;
3719 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3720 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3721 gcc_assert ((k & (k - 1)) == 0);
3722 for (l = 0; l < k; l++)
3723 {
3724 tree t;
3725 if (ratype)
3726 {
3727 t = build_fold_addr_expr (new_temp);
3728 t = build2 (MEM_REF, vectype, t,
3729 build_int_cst (TREE_TYPE (t),
3730 l * prec / BITS_PER_UNIT));
3731 }
3732 else
3733 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3734 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3735 new_stmt
b731b390 3736 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3738 if (j == 0 && l == 0)
3739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3740 else
3741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3742
3743 prev_stmt_info = vinfo_for_stmt (new_stmt);
3744 }
3745
3746 if (ratype)
3747 {
3748 tree clobber = build_constructor (ratype, NULL);
3749 TREE_THIS_VOLATILE (clobber) = 1;
3750 new_stmt = gimple_build_assign (new_temp, clobber);
3751 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3752 }
3753 continue;
3754 }
3755 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3756 {
3757 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3758 / TYPE_VECTOR_SUBPARTS (rtype));
3759 gcc_assert ((k & (k - 1)) == 0);
3760 if ((j & (k - 1)) == 0)
3761 vec_alloc (ret_ctor_elts, k);
3762 if (ratype)
3763 {
3764 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3765 for (m = 0; m < o; m++)
3766 {
3767 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3768 size_int (m), NULL_TREE, NULL_TREE);
3769 new_stmt
b731b390 3770 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3771 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3772 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3773 gimple_assign_lhs (new_stmt));
3774 }
3775 tree clobber = build_constructor (ratype, NULL);
3776 TREE_THIS_VOLATILE (clobber) = 1;
3777 new_stmt = gimple_build_assign (new_temp, clobber);
3778 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3779 }
3780 else
3781 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3782 if ((j & (k - 1)) != k - 1)
3783 continue;
3784 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3785 new_stmt
b731b390 3786 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3787 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3788
3789 if ((unsigned) j == k - 1)
3790 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3791 else
3792 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3793
3794 prev_stmt_info = vinfo_for_stmt (new_stmt);
3795 continue;
3796 }
3797 else if (ratype)
3798 {
3799 tree t = build_fold_addr_expr (new_temp);
3800 t = build2 (MEM_REF, vectype, t,
3801 build_int_cst (TREE_TYPE (t), 0));
3802 new_stmt
b731b390 3803 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3805 tree clobber = build_constructor (ratype, NULL);
3806 TREE_THIS_VOLATILE (clobber) = 1;
3807 vect_finish_stmt_generation (stmt,
3808 gimple_build_assign (new_temp,
3809 clobber), gsi);
3810 }
3811 }
3812
3813 if (j == 0)
3814 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3815 else
3816 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3817
3818 prev_stmt_info = vinfo_for_stmt (new_stmt);
3819 }
3820
3821 vargs.release ();
3822
3823 /* The call in STMT might prevent it from being removed in dce.
3824 We however cannot remove it here, due to the way the ssa name
3825 it defines is mapped to the new definition. So just replace
3826 rhs of the statement with something harmless. */
3827
3828 if (slp_node)
3829 return true;
3830
3831 if (scalar_dest)
3832 {
3833 type = TREE_TYPE (scalar_dest);
3834 if (is_pattern_stmt_p (stmt_info))
3835 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3836 else
3837 lhs = gimple_call_lhs (stmt);
3838 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3839 }
3840 else
3841 new_stmt = gimple_build_nop ();
3842 set_vinfo_for_stmt (new_stmt, stmt_info);
3843 set_vinfo_for_stmt (stmt, NULL);
3844 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3845 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3846 unlink_stmt_vdef (stmt);
3847
3848 return true;
3849}
3850
3851
ebfd146a
IR
3852/* Function vect_gen_widened_results_half
3853
3854 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3855 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3856 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3857 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3858 needs to be created (DECL is a function-decl of a target-builtin).
3859 STMT is the original scalar stmt that we are vectorizing. */
3860
355fe088 3861static gimple *
ebfd146a
IR
3862vect_gen_widened_results_half (enum tree_code code,
3863 tree decl,
3864 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3865 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3866 gimple *stmt)
b8698a0f 3867{
355fe088 3868 gimple *new_stmt;
b8698a0f
L
3869 tree new_temp;
3870
3871 /* Generate half of the widened result: */
3872 if (code == CALL_EXPR)
3873 {
3874 /* Target specific support */
ebfd146a
IR
3875 if (op_type == binary_op)
3876 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3877 else
3878 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3879 new_temp = make_ssa_name (vec_dest, new_stmt);
3880 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3881 }
3882 else
ebfd146a 3883 {
b8698a0f
L
3884 /* Generic support */
3885 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3886 if (op_type != binary_op)
3887 vec_oprnd1 = NULL;
0d0e4a03 3888 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3889 new_temp = make_ssa_name (vec_dest, new_stmt);
3890 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3891 }
ebfd146a
IR
3892 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3893
ebfd146a
IR
3894 return new_stmt;
3895}
3896
4a00c761
JJ
3897
3898/* Get vectorized definitions for loop-based vectorization. For the first
3899 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3900 scalar operand), and for the rest we get a copy with
3901 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3902 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3903 The vectors are collected into VEC_OPRNDS. */
3904
3905static void
355fe088 3906vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3907 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3908{
3909 tree vec_oprnd;
3910
3911 /* Get first vector operand. */
3912 /* All the vector operands except the very first one (that is scalar oprnd)
3913 are stmt copies. */
3914 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3915 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3916 else
3917 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3918
9771b263 3919 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3920
3921 /* Get second vector operand. */
3922 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3923 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3924
3925 *oprnd = vec_oprnd;
3926
3927 /* For conversion in multiple steps, continue to get operands
3928 recursively. */
3929 if (multi_step_cvt)
3930 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3931}
3932
3933
3934/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3935 For multi-step conversions store the resulting vectors and call the function
3936 recursively. */
3937
3938static void
9771b263 3939vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3940 int multi_step_cvt, gimple *stmt,
9771b263 3941 vec<tree> vec_dsts,
4a00c761
JJ
3942 gimple_stmt_iterator *gsi,
3943 slp_tree slp_node, enum tree_code code,
3944 stmt_vec_info *prev_stmt_info)
3945{
3946 unsigned int i;
3947 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3948 gimple *new_stmt;
4a00c761
JJ
3949 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3950
9771b263 3951 vec_dest = vec_dsts.pop ();
4a00c761 3952
9771b263 3953 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3954 {
3955 /* Create demotion operation. */
9771b263
DN
3956 vop0 = (*vec_oprnds)[i];
3957 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3958 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3959 new_tmp = make_ssa_name (vec_dest, new_stmt);
3960 gimple_assign_set_lhs (new_stmt, new_tmp);
3961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3962
3963 if (multi_step_cvt)
3964 /* Store the resulting vector for next recursive call. */
9771b263 3965 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3966 else
3967 {
3968 /* This is the last step of the conversion sequence. Store the
3969 vectors in SLP_NODE or in vector info of the scalar statement
3970 (or in STMT_VINFO_RELATED_STMT chain). */
3971 if (slp_node)
9771b263 3972 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3973 else
c689ce1e
RB
3974 {
3975 if (!*prev_stmt_info)
3976 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3977 else
3978 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3979
c689ce1e
RB
3980 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3981 }
4a00c761
JJ
3982 }
3983 }
3984
3985 /* For multi-step demotion operations we first generate demotion operations
3986 from the source type to the intermediate types, and then combine the
3987 results (stored in VEC_OPRNDS) in demotion operation to the destination
3988 type. */
3989 if (multi_step_cvt)
3990 {
3991 /* At each level of recursion we have half of the operands we had at the
3992 previous level. */
9771b263 3993 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3994 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3995 stmt, vec_dsts, gsi, slp_node,
3996 VEC_PACK_TRUNC_EXPR,
3997 prev_stmt_info);
3998 }
3999
9771b263 4000 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4001}
4002
4003
4004/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4005 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4006 the resulting vectors and call the function recursively. */
4007
4008static void
9771b263
DN
4009vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4010 vec<tree> *vec_oprnds1,
355fe088 4011 gimple *stmt, tree vec_dest,
4a00c761
JJ
4012 gimple_stmt_iterator *gsi,
4013 enum tree_code code1,
4014 enum tree_code code2, tree decl1,
4015 tree decl2, int op_type)
4016{
4017 int i;
4018 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4019 gimple *new_stmt1, *new_stmt2;
6e1aa848 4020 vec<tree> vec_tmp = vNULL;
4a00c761 4021
9771b263
DN
4022 vec_tmp.create (vec_oprnds0->length () * 2);
4023 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4024 {
4025 if (op_type == binary_op)
9771b263 4026 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4027 else
4028 vop1 = NULL_TREE;
4029
4030 /* Generate the two halves of promotion operation. */
4031 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4032 op_type, vec_dest, gsi, stmt);
4033 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4034 op_type, vec_dest, gsi, stmt);
4035 if (is_gimple_call (new_stmt1))
4036 {
4037 new_tmp1 = gimple_call_lhs (new_stmt1);
4038 new_tmp2 = gimple_call_lhs (new_stmt2);
4039 }
4040 else
4041 {
4042 new_tmp1 = gimple_assign_lhs (new_stmt1);
4043 new_tmp2 = gimple_assign_lhs (new_stmt2);
4044 }
4045
4046 /* Store the results for the next step. */
9771b263
DN
4047 vec_tmp.quick_push (new_tmp1);
4048 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4049 }
4050
689eaba3 4051 vec_oprnds0->release ();
4a00c761
JJ
4052 *vec_oprnds0 = vec_tmp;
4053}
4054
4055
b8698a0f
L
4056/* Check if STMT performs a conversion operation, that can be vectorized.
4057 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4058 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4059 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4060
4061static bool
355fe088
TS
4062vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4063 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4064{
4065 tree vec_dest;
4066 tree scalar_dest;
4a00c761 4067 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4068 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4069 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4071 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4072 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4073 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4074 tree new_temp;
355fe088 4075 gimple *def_stmt;
ebfd146a 4076 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4077 int ndts = 2;
355fe088 4078 gimple *new_stmt = NULL;
ebfd146a
IR
4079 stmt_vec_info prev_stmt_info;
4080 int nunits_in;
4081 int nunits_out;
4082 tree vectype_out, vectype_in;
4a00c761
JJ
4083 int ncopies, i, j;
4084 tree lhs_type, rhs_type;
ebfd146a 4085 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4086 vec<tree> vec_oprnds0 = vNULL;
4087 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4088 tree vop0;
4a00c761 4089 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4090 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4091 int multi_step_cvt = 0;
6e1aa848 4092 vec<tree> interm_types = vNULL;
4a00c761
JJ
4093 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4094 int op_type;
4a00c761 4095 unsigned short fltsz;
ebfd146a
IR
4096
4097 /* Is STMT a vectorizable conversion? */
4098
4a00c761 4099 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4100 return false;
4101
66c16fd9
RB
4102 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4103 && ! vec_stmt)
ebfd146a
IR
4104 return false;
4105
4106 if (!is_gimple_assign (stmt))
4107 return false;
4108
4109 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4110 return false;
4111
4112 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4113 if (!CONVERT_EXPR_CODE_P (code)
4114 && code != FIX_TRUNC_EXPR
4115 && code != FLOAT_EXPR
4116 && code != WIDEN_MULT_EXPR
4117 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4118 return false;
4119
4a00c761
JJ
4120 op_type = TREE_CODE_LENGTH (code);
4121
ebfd146a 4122 /* Check types of lhs and rhs. */
b690cc0f 4123 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4124 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4125 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4126
ebfd146a
IR
4127 op0 = gimple_assign_rhs1 (stmt);
4128 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4129
4130 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4131 && !((INTEGRAL_TYPE_P (lhs_type)
4132 && INTEGRAL_TYPE_P (rhs_type))
4133 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4134 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4135 return false;
4136
e6f5c25d
IE
4137 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4138 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4139 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4140 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4141 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4142 {
73fbfcad 4143 if (dump_enabled_p ())
78c60e3d 4144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4145 "type conversion to/from bit-precision unsupported."
4146 "\n");
4a00c761
JJ
4147 return false;
4148 }
4149
b690cc0f 4150 /* Check the operands of the operation. */
81c40241 4151 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4152 {
73fbfcad 4153 if (dump_enabled_p ())
78c60e3d 4154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4155 "use not simple.\n");
b690cc0f
RG
4156 return false;
4157 }
4a00c761
JJ
4158 if (op_type == binary_op)
4159 {
4160 bool ok;
4161
4162 op1 = gimple_assign_rhs2 (stmt);
4163 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4164 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4165 OP1. */
4166 if (CONSTANT_CLASS_P (op0))
81c40241 4167 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4168 else
81c40241 4169 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4170
4171 if (!ok)
4172 {
73fbfcad 4173 if (dump_enabled_p ())
78c60e3d 4174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4175 "use not simple.\n");
4a00c761
JJ
4176 return false;
4177 }
4178 }
4179
b690cc0f
RG
4180 /* If op0 is an external or constant defs use a vector type of
4181 the same size as the output vector type. */
ebfd146a 4182 if (!vectype_in)
b690cc0f 4183 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4184 if (vec_stmt)
4185 gcc_assert (vectype_in);
4186 if (!vectype_in)
4187 {
73fbfcad 4188 if (dump_enabled_p ())
4a00c761 4189 {
78c60e3d
SS
4190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4191 "no vectype for scalar type ");
4192 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4193 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4194 }
7d8930a0
IR
4195
4196 return false;
4197 }
ebfd146a 4198
e6f5c25d
IE
4199 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4200 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4201 {
4202 if (dump_enabled_p ())
4203 {
4204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4205 "can't convert between boolean and non "
4206 "boolean vectors");
4207 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4208 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4209 }
4210
4211 return false;
4212 }
4213
b690cc0f
RG
4214 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4215 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 4216 if (nunits_in < nunits_out)
ebfd146a
IR
4217 modifier = NARROW;
4218 else if (nunits_out == nunits_in)
4219 modifier = NONE;
ebfd146a 4220 else
4a00c761 4221 modifier = WIDEN;
ebfd146a 4222
ff802fa1
IR
4223 /* Multiple types in SLP are handled by creating the appropriate number of
4224 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4225 case of SLP. */
fce57248 4226 if (slp_node)
ebfd146a 4227 ncopies = 1;
4a00c761 4228 else if (modifier == NARROW)
e8f142e2 4229 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4230 else
e8f142e2 4231 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4232
ebfd146a
IR
4233 /* Sanity check: make sure that at least one copy of the vectorized stmt
4234 needs to be generated. */
4235 gcc_assert (ncopies >= 1);
4236
16d22000
RS
4237 bool found_mode = false;
4238 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4239 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4240 opt_scalar_mode rhs_mode_iter;
b397965c 4241
ebfd146a 4242 /* Supportable by target? */
4a00c761 4243 switch (modifier)
ebfd146a 4244 {
4a00c761
JJ
4245 case NONE:
4246 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4247 return false;
4248 if (supportable_convert_operation (code, vectype_out, vectype_in,
4249 &decl1, &code1))
4250 break;
4251 /* FALLTHRU */
4252 unsupported:
73fbfcad 4253 if (dump_enabled_p ())
78c60e3d 4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4255 "conversion not supported by target.\n");
ebfd146a 4256 return false;
ebfd146a 4257
4a00c761
JJ
4258 case WIDEN:
4259 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4260 &code1, &code2, &multi_step_cvt,
4261 &interm_types))
4a00c761
JJ
4262 {
4263 /* Binary widening operation can only be supported directly by the
4264 architecture. */
4265 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4266 break;
4267 }
4268
4269 if (code != FLOAT_EXPR
b397965c 4270 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4271 goto unsupported;
4272
b397965c 4273 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4274 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4275 {
16d22000 4276 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4277 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4278 break;
4279
4a00c761
JJ
4280 cvt_type
4281 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4282 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4283 if (cvt_type == NULL_TREE)
4284 goto unsupported;
4285
4286 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4287 {
4288 if (!supportable_convert_operation (code, vectype_out,
4289 cvt_type, &decl1, &codecvt1))
4290 goto unsupported;
4291 }
4292 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4293 cvt_type, &codecvt1,
4294 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4295 &interm_types))
4296 continue;
4297 else
4298 gcc_assert (multi_step_cvt == 0);
4299
4300 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4301 vectype_in, &code1, &code2,
4302 &multi_step_cvt, &interm_types))
16d22000
RS
4303 {
4304 found_mode = true;
4305 break;
4306 }
4a00c761
JJ
4307 }
4308
16d22000 4309 if (!found_mode)
4a00c761
JJ
4310 goto unsupported;
4311
4312 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4313 codecvt2 = ERROR_MARK;
4314 else
4315 {
4316 multi_step_cvt++;
9771b263 4317 interm_types.safe_push (cvt_type);
4a00c761
JJ
4318 cvt_type = NULL_TREE;
4319 }
4320 break;
4321
4322 case NARROW:
4323 gcc_assert (op_type == unary_op);
4324 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4325 &code1, &multi_step_cvt,
4326 &interm_types))
4327 break;
4328
4329 if (code != FIX_TRUNC_EXPR
b397965c 4330 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4331 goto unsupported;
4332
4a00c761
JJ
4333 cvt_type
4334 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4335 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4336 if (cvt_type == NULL_TREE)
4337 goto unsupported;
4338 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4339 &decl1, &codecvt1))
4340 goto unsupported;
4341 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4342 &code1, &multi_step_cvt,
4343 &interm_types))
4344 break;
4345 goto unsupported;
4346
4347 default:
4348 gcc_unreachable ();
ebfd146a
IR
4349 }
4350
4351 if (!vec_stmt) /* transformation not required. */
4352 {
73fbfcad 4353 if (dump_enabled_p ())
78c60e3d 4354 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4355 "=== vectorizable_conversion ===\n");
4a00c761 4356 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4357 {
4358 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4359 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4360 }
4a00c761
JJ
4361 else if (modifier == NARROW)
4362 {
4363 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4364 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4365 }
4366 else
4367 {
4368 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4369 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4370 }
9771b263 4371 interm_types.release ();
ebfd146a
IR
4372 return true;
4373 }
4374
67b8dbac 4375 /* Transform. */
73fbfcad 4376 if (dump_enabled_p ())
78c60e3d 4377 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4378 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4379
4a00c761
JJ
4380 if (op_type == binary_op)
4381 {
4382 if (CONSTANT_CLASS_P (op0))
4383 op0 = fold_convert (TREE_TYPE (op1), op0);
4384 else if (CONSTANT_CLASS_P (op1))
4385 op1 = fold_convert (TREE_TYPE (op0), op1);
4386 }
4387
4388 /* In case of multi-step conversion, we first generate conversion operations
4389 to the intermediate types, and then from that types to the final one.
4390 We create vector destinations for the intermediate type (TYPES) received
4391 from supportable_*_operation, and store them in the correct order
4392 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4393 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4394 vec_dest = vect_create_destination_var (scalar_dest,
4395 (cvt_type && modifier == WIDEN)
4396 ? cvt_type : vectype_out);
9771b263 4397 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4398
4399 if (multi_step_cvt)
4400 {
9771b263
DN
4401 for (i = interm_types.length () - 1;
4402 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4403 {
4404 vec_dest = vect_create_destination_var (scalar_dest,
4405 intermediate_type);
9771b263 4406 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4407 }
4408 }
ebfd146a 4409
4a00c761 4410 if (cvt_type)
82294ec1
JJ
4411 vec_dest = vect_create_destination_var (scalar_dest,
4412 modifier == WIDEN
4413 ? vectype_out : cvt_type);
4a00c761
JJ
4414
4415 if (!slp_node)
4416 {
30862efc 4417 if (modifier == WIDEN)
4a00c761 4418 {
c3284718 4419 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4420 if (op_type == binary_op)
9771b263 4421 vec_oprnds1.create (1);
4a00c761 4422 }
30862efc 4423 else if (modifier == NARROW)
9771b263
DN
4424 vec_oprnds0.create (
4425 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4426 }
4427 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4428 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4429
4a00c761 4430 last_oprnd = op0;
ebfd146a
IR
4431 prev_stmt_info = NULL;
4432 switch (modifier)
4433 {
4434 case NONE:
4435 for (j = 0; j < ncopies; j++)
4436 {
ebfd146a 4437 if (j == 0)
306b0c92 4438 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4439 else
4440 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4441
9771b263 4442 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4443 {
4444 /* Arguments are ready, create the new vector stmt. */
4445 if (code1 == CALL_EXPR)
4446 {
4447 new_stmt = gimple_build_call (decl1, 1, vop0);
4448 new_temp = make_ssa_name (vec_dest, new_stmt);
4449 gimple_call_set_lhs (new_stmt, new_temp);
4450 }
4451 else
4452 {
4453 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4454 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4455 new_temp = make_ssa_name (vec_dest, new_stmt);
4456 gimple_assign_set_lhs (new_stmt, new_temp);
4457 }
4458
4459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4460 if (slp_node)
9771b263 4461 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4462 else
4463 {
4464 if (!prev_stmt_info)
4465 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4466 else
4467 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4468 prev_stmt_info = vinfo_for_stmt (new_stmt);
4469 }
4a00c761 4470 }
ebfd146a
IR
4471 }
4472 break;
4473
4474 case WIDEN:
4475 /* In case the vectorization factor (VF) is bigger than the number
4476 of elements that we can fit in a vectype (nunits), we have to
4477 generate more than one vector stmt - i.e - we need to "unroll"
4478 the vector stmt by a factor VF/nunits. */
4479 for (j = 0; j < ncopies; j++)
4480 {
4a00c761 4481 /* Handle uses. */
ebfd146a 4482 if (j == 0)
4a00c761
JJ
4483 {
4484 if (slp_node)
4485 {
4486 if (code == WIDEN_LSHIFT_EXPR)
4487 {
4488 unsigned int k;
ebfd146a 4489
4a00c761
JJ
4490 vec_oprnd1 = op1;
4491 /* Store vec_oprnd1 for every vector stmt to be created
4492 for SLP_NODE. We check during the analysis that all
4493 the shift arguments are the same. */
4494 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4495 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4496
4497 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4498 slp_node);
4a00c761
JJ
4499 }
4500 else
4501 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4502 &vec_oprnds1, slp_node);
4a00c761
JJ
4503 }
4504 else
4505 {
81c40241 4506 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4507 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4508 if (op_type == binary_op)
4509 {
4510 if (code == WIDEN_LSHIFT_EXPR)
4511 vec_oprnd1 = op1;
4512 else
81c40241 4513 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4514 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4515 }
4516 }
4517 }
ebfd146a 4518 else
4a00c761
JJ
4519 {
4520 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4521 vec_oprnds0.truncate (0);
4522 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4523 if (op_type == binary_op)
4524 {
4525 if (code == WIDEN_LSHIFT_EXPR)
4526 vec_oprnd1 = op1;
4527 else
4528 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4529 vec_oprnd1);
9771b263
DN
4530 vec_oprnds1.truncate (0);
4531 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4532 }
4533 }
ebfd146a 4534
4a00c761
JJ
4535 /* Arguments are ready. Create the new vector stmts. */
4536 for (i = multi_step_cvt; i >= 0; i--)
4537 {
9771b263 4538 tree this_dest = vec_dsts[i];
4a00c761
JJ
4539 enum tree_code c1 = code1, c2 = code2;
4540 if (i == 0 && codecvt2 != ERROR_MARK)
4541 {
4542 c1 = codecvt1;
4543 c2 = codecvt2;
4544 }
4545 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4546 &vec_oprnds1,
4547 stmt, this_dest, gsi,
4548 c1, c2, decl1, decl2,
4549 op_type);
4550 }
4551
9771b263 4552 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4553 {
4554 if (cvt_type)
4555 {
4556 if (codecvt1 == CALL_EXPR)
4557 {
4558 new_stmt = gimple_build_call (decl1, 1, vop0);
4559 new_temp = make_ssa_name (vec_dest, new_stmt);
4560 gimple_call_set_lhs (new_stmt, new_temp);
4561 }
4562 else
4563 {
4564 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4565 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4566 new_stmt = gimple_build_assign (new_temp, codecvt1,
4567 vop0);
4a00c761
JJ
4568 }
4569
4570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571 }
4572 else
4573 new_stmt = SSA_NAME_DEF_STMT (vop0);
4574
4575 if (slp_node)
9771b263 4576 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4577 else
c689ce1e
RB
4578 {
4579 if (!prev_stmt_info)
4580 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4581 else
4582 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4583 prev_stmt_info = vinfo_for_stmt (new_stmt);
4584 }
4a00c761 4585 }
ebfd146a 4586 }
4a00c761
JJ
4587
4588 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4589 break;
4590
4591 case NARROW:
4592 /* In case the vectorization factor (VF) is bigger than the number
4593 of elements that we can fit in a vectype (nunits), we have to
4594 generate more than one vector stmt - i.e - we need to "unroll"
4595 the vector stmt by a factor VF/nunits. */
4596 for (j = 0; j < ncopies; j++)
4597 {
4598 /* Handle uses. */
4a00c761
JJ
4599 if (slp_node)
4600 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4601 slp_node);
ebfd146a
IR
4602 else
4603 {
9771b263 4604 vec_oprnds0.truncate (0);
4a00c761
JJ
4605 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4606 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4607 }
4608
4a00c761
JJ
4609 /* Arguments are ready. Create the new vector stmts. */
4610 if (cvt_type)
9771b263 4611 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4612 {
4613 if (codecvt1 == CALL_EXPR)
4614 {
4615 new_stmt = gimple_build_call (decl1, 1, vop0);
4616 new_temp = make_ssa_name (vec_dest, new_stmt);
4617 gimple_call_set_lhs (new_stmt, new_temp);
4618 }
4619 else
4620 {
4621 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4622 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4623 new_stmt = gimple_build_assign (new_temp, codecvt1,
4624 vop0);
4a00c761 4625 }
ebfd146a 4626
4a00c761 4627 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4628 vec_oprnds0[i] = new_temp;
4a00c761 4629 }
ebfd146a 4630
4a00c761
JJ
4631 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4632 stmt, vec_dsts, gsi,
4633 slp_node, code1,
4634 &prev_stmt_info);
ebfd146a
IR
4635 }
4636
4637 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4638 break;
ebfd146a
IR
4639 }
4640
9771b263
DN
4641 vec_oprnds0.release ();
4642 vec_oprnds1.release ();
9771b263 4643 interm_types.release ();
ebfd146a
IR
4644
4645 return true;
4646}
ff802fa1
IR
4647
4648
ebfd146a
IR
4649/* Function vectorizable_assignment.
4650
b8698a0f
L
4651 Check if STMT performs an assignment (copy) that can be vectorized.
4652 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4653 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4654 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4655
4656static bool
355fe088
TS
4657vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4658 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4659{
4660 tree vec_dest;
4661 tree scalar_dest;
4662 tree op;
4663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4664 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4665 tree new_temp;
355fe088 4666 gimple *def_stmt;
4fc5ebf1
JG
4667 enum vect_def_type dt[1] = {vect_unknown_def_type};
4668 int ndts = 1;
ebfd146a 4669 int ncopies;
f18b55bd 4670 int i, j;
6e1aa848 4671 vec<tree> vec_oprnds = vNULL;
ebfd146a 4672 tree vop;
a70d6342 4673 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4674 vec_info *vinfo = stmt_info->vinfo;
355fe088 4675 gimple *new_stmt = NULL;
f18b55bd 4676 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4677 enum tree_code code;
4678 tree vectype_in;
ebfd146a 4679
a70d6342 4680 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4681 return false;
4682
66c16fd9
RB
4683 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4684 && ! vec_stmt)
ebfd146a
IR
4685 return false;
4686
4687 /* Is vectorizable assignment? */
4688 if (!is_gimple_assign (stmt))
4689 return false;
4690
4691 scalar_dest = gimple_assign_lhs (stmt);
4692 if (TREE_CODE (scalar_dest) != SSA_NAME)
4693 return false;
4694
fde9c428 4695 code = gimple_assign_rhs_code (stmt);
ebfd146a 4696 if (gimple_assign_single_p (stmt)
fde9c428
RG
4697 || code == PAREN_EXPR
4698 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4699 op = gimple_assign_rhs1 (stmt);
4700 else
4701 return false;
4702
7b7ec6c5
RG
4703 if (code == VIEW_CONVERT_EXPR)
4704 op = TREE_OPERAND (op, 0);
4705
465c8c19
JJ
4706 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4707 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4708
4709 /* Multiple types in SLP are handled by creating the appropriate number of
4710 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4711 case of SLP. */
fce57248 4712 if (slp_node)
465c8c19
JJ
4713 ncopies = 1;
4714 else
e8f142e2 4715 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4716
4717 gcc_assert (ncopies >= 1);
4718
81c40241 4719 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4720 {
73fbfcad 4721 if (dump_enabled_p ())
78c60e3d 4722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4723 "use not simple.\n");
ebfd146a
IR
4724 return false;
4725 }
4726
fde9c428
RG
4727 /* We can handle NOP_EXPR conversions that do not change the number
4728 of elements or the vector size. */
7b7ec6c5
RG
4729 if ((CONVERT_EXPR_CODE_P (code)
4730 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4731 && (!vectype_in
4732 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4733 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4734 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4735 return false;
4736
7b7b1813
RG
4737 /* We do not handle bit-precision changes. */
4738 if ((CONVERT_EXPR_CODE_P (code)
4739 || code == VIEW_CONVERT_EXPR)
4740 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4741 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4742 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4743 /* But a conversion that does not change the bit-pattern is ok. */
4744 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4745 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4746 && TYPE_UNSIGNED (TREE_TYPE (op)))
4747 /* Conversion between boolean types of different sizes is
4748 a simple assignment in case their vectypes are same
4749 boolean vectors. */
4750 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4751 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4752 {
73fbfcad 4753 if (dump_enabled_p ())
78c60e3d
SS
4754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4755 "type conversion to/from bit-precision "
e645e942 4756 "unsupported.\n");
7b7b1813
RG
4757 return false;
4758 }
4759
ebfd146a
IR
4760 if (!vec_stmt) /* transformation not required. */
4761 {
4762 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4763 if (dump_enabled_p ())
78c60e3d 4764 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4765 "=== vectorizable_assignment ===\n");
4fc5ebf1 4766 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4767 return true;
4768 }
4769
67b8dbac 4770 /* Transform. */
73fbfcad 4771 if (dump_enabled_p ())
e645e942 4772 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4773
4774 /* Handle def. */
4775 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4776
4777 /* Handle use. */
f18b55bd 4778 for (j = 0; j < ncopies; j++)
ebfd146a 4779 {
f18b55bd
IR
4780 /* Handle uses. */
4781 if (j == 0)
306b0c92 4782 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4783 else
4784 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4785
4786 /* Arguments are ready. create the new vector stmt. */
9771b263 4787 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4788 {
7b7ec6c5
RG
4789 if (CONVERT_EXPR_CODE_P (code)
4790 || code == VIEW_CONVERT_EXPR)
4a73490d 4791 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4792 new_stmt = gimple_build_assign (vec_dest, vop);
4793 new_temp = make_ssa_name (vec_dest, new_stmt);
4794 gimple_assign_set_lhs (new_stmt, new_temp);
4795 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4796 if (slp_node)
9771b263 4797 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4798 }
ebfd146a
IR
4799
4800 if (slp_node)
f18b55bd
IR
4801 continue;
4802
4803 if (j == 0)
4804 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4805 else
4806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4807
4808 prev_stmt_info = vinfo_for_stmt (new_stmt);
4809 }
b8698a0f 4810
9771b263 4811 vec_oprnds.release ();
ebfd146a
IR
4812 return true;
4813}
4814
9dc3f7de 4815
1107f3ae
IR
4816/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4817 either as shift by a scalar or by a vector. */
4818
4819bool
4820vect_supportable_shift (enum tree_code code, tree scalar_type)
4821{
4822
ef4bddc2 4823 machine_mode vec_mode;
1107f3ae
IR
4824 optab optab;
4825 int icode;
4826 tree vectype;
4827
4828 vectype = get_vectype_for_scalar_type (scalar_type);
4829 if (!vectype)
4830 return false;
4831
4832 optab = optab_for_tree_code (code, vectype, optab_scalar);
4833 if (!optab
4834 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4835 {
4836 optab = optab_for_tree_code (code, vectype, optab_vector);
4837 if (!optab
4838 || (optab_handler (optab, TYPE_MODE (vectype))
4839 == CODE_FOR_nothing))
4840 return false;
4841 }
4842
4843 vec_mode = TYPE_MODE (vectype);
4844 icode = (int) optab_handler (optab, vec_mode);
4845 if (icode == CODE_FOR_nothing)
4846 return false;
4847
4848 return true;
4849}
4850
4851
9dc3f7de
IR
4852/* Function vectorizable_shift.
4853
4854 Check if STMT performs a shift operation that can be vectorized.
4855 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4856 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4857 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4858
4859static bool
355fe088
TS
4860vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4861 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4862{
4863 tree vec_dest;
4864 tree scalar_dest;
4865 tree op0, op1 = NULL;
4866 tree vec_oprnd1 = NULL_TREE;
4867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4868 tree vectype;
4869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4870 enum tree_code code;
ef4bddc2 4871 machine_mode vec_mode;
9dc3f7de
IR
4872 tree new_temp;
4873 optab optab;
4874 int icode;
ef4bddc2 4875 machine_mode optab_op2_mode;
355fe088 4876 gimple *def_stmt;
9dc3f7de 4877 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4878 int ndts = 2;
355fe088 4879 gimple *new_stmt = NULL;
9dc3f7de
IR
4880 stmt_vec_info prev_stmt_info;
4881 int nunits_in;
4882 int nunits_out;
4883 tree vectype_out;
cede2577 4884 tree op1_vectype;
9dc3f7de
IR
4885 int ncopies;
4886 int j, i;
6e1aa848
DN
4887 vec<tree> vec_oprnds0 = vNULL;
4888 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4889 tree vop0, vop1;
4890 unsigned int k;
49eab32e 4891 bool scalar_shift_arg = true;
9dc3f7de 4892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4893 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4894
4895 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4896 return false;
4897
66c16fd9
RB
4898 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4899 && ! vec_stmt)
9dc3f7de
IR
4900 return false;
4901
4902 /* Is STMT a vectorizable binary/unary operation? */
4903 if (!is_gimple_assign (stmt))
4904 return false;
4905
4906 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4907 return false;
4908
4909 code = gimple_assign_rhs_code (stmt);
4910
4911 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4912 || code == RROTATE_EXPR))
4913 return false;
4914
4915 scalar_dest = gimple_assign_lhs (stmt);
4916 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 4917 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 4918 {
73fbfcad 4919 if (dump_enabled_p ())
78c60e3d 4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4921 "bit-precision shifts not supported.\n");
7b7b1813
RG
4922 return false;
4923 }
9dc3f7de
IR
4924
4925 op0 = gimple_assign_rhs1 (stmt);
81c40241 4926 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4927 {
73fbfcad 4928 if (dump_enabled_p ())
78c60e3d 4929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4930 "use not simple.\n");
9dc3f7de
IR
4931 return false;
4932 }
4933 /* If op0 is an external or constant def use a vector type with
4934 the same size as the output vector type. */
4935 if (!vectype)
4936 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4937 if (vec_stmt)
4938 gcc_assert (vectype);
4939 if (!vectype)
4940 {
73fbfcad 4941 if (dump_enabled_p ())
78c60e3d 4942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4943 "no vectype for scalar type\n");
9dc3f7de
IR
4944 return false;
4945 }
4946
4947 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4948 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4949 if (nunits_out != nunits_in)
4950 return false;
4951
4952 op1 = gimple_assign_rhs2 (stmt);
81c40241 4953 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4954 {
73fbfcad 4955 if (dump_enabled_p ())
78c60e3d 4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4957 "use not simple.\n");
9dc3f7de
IR
4958 return false;
4959 }
4960
9dc3f7de
IR
4961 /* Multiple types in SLP are handled by creating the appropriate number of
4962 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4963 case of SLP. */
fce57248 4964 if (slp_node)
9dc3f7de
IR
4965 ncopies = 1;
4966 else
e8f142e2 4967 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
4968
4969 gcc_assert (ncopies >= 1);
4970
4971 /* Determine whether the shift amount is a vector, or scalar. If the
4972 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4973
dbfa87aa
YR
4974 if ((dt[1] == vect_internal_def
4975 || dt[1] == vect_induction_def)
4976 && !slp_node)
49eab32e
JJ
4977 scalar_shift_arg = false;
4978 else if (dt[1] == vect_constant_def
4979 || dt[1] == vect_external_def
4980 || dt[1] == vect_internal_def)
4981 {
4982 /* In SLP, need to check whether the shift count is the same,
4983 in loops if it is a constant or invariant, it is always
4984 a scalar shift. */
4985 if (slp_node)
4986 {
355fe088
TS
4987 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4988 gimple *slpstmt;
49eab32e 4989
9771b263 4990 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4991 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4992 scalar_shift_arg = false;
4993 }
60d393e8
RB
4994
4995 /* If the shift amount is computed by a pattern stmt we cannot
4996 use the scalar amount directly thus give up and use a vector
4997 shift. */
4998 if (dt[1] == vect_internal_def)
4999 {
5000 gimple *def = SSA_NAME_DEF_STMT (op1);
5001 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5002 scalar_shift_arg = false;
5003 }
49eab32e
JJ
5004 }
5005 else
5006 {
73fbfcad 5007 if (dump_enabled_p ())
78c60e3d 5008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5009 "operand mode requires invariant argument.\n");
49eab32e
JJ
5010 return false;
5011 }
5012
9dc3f7de 5013 /* Vector shifted by vector. */
49eab32e 5014 if (!scalar_shift_arg)
9dc3f7de
IR
5015 {
5016 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5017 if (dump_enabled_p ())
78c60e3d 5018 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5019 "vector/vector shift/rotate found.\n");
78c60e3d 5020
aa948027
JJ
5021 if (!op1_vectype)
5022 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5023 if (op1_vectype == NULL_TREE
5024 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5025 {
73fbfcad 5026 if (dump_enabled_p ())
78c60e3d
SS
5027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5028 "unusable type for last operand in"
e645e942 5029 " vector/vector shift/rotate.\n");
cede2577
JJ
5030 return false;
5031 }
9dc3f7de
IR
5032 }
5033 /* See if the machine has a vector shifted by scalar insn and if not
5034 then see if it has a vector shifted by vector insn. */
49eab32e 5035 else
9dc3f7de
IR
5036 {
5037 optab = optab_for_tree_code (code, vectype, optab_scalar);
5038 if (optab
5039 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5040 {
73fbfcad 5041 if (dump_enabled_p ())
78c60e3d 5042 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5043 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5044 }
5045 else
5046 {
5047 optab = optab_for_tree_code (code, vectype, optab_vector);
5048 if (optab
5049 && (optab_handler (optab, TYPE_MODE (vectype))
5050 != CODE_FOR_nothing))
5051 {
49eab32e
JJ
5052 scalar_shift_arg = false;
5053
73fbfcad 5054 if (dump_enabled_p ())
78c60e3d 5055 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5056 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5057
5058 /* Unlike the other binary operators, shifts/rotates have
5059 the rhs being int, instead of the same type as the lhs,
5060 so make sure the scalar is the right type if we are
aa948027 5061 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5062 if (dt[1] == vect_constant_def)
5063 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5064 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5065 TREE_TYPE (op1)))
5066 {
5067 if (slp_node
5068 && TYPE_MODE (TREE_TYPE (vectype))
5069 != TYPE_MODE (TREE_TYPE (op1)))
5070 {
73fbfcad 5071 if (dump_enabled_p ())
78c60e3d
SS
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5073 "unusable type for last operand in"
e645e942 5074 " vector/vector shift/rotate.\n");
21c0a521 5075 return false;
aa948027
JJ
5076 }
5077 if (vec_stmt && !slp_node)
5078 {
5079 op1 = fold_convert (TREE_TYPE (vectype), op1);
5080 op1 = vect_init_vector (stmt, op1,
5081 TREE_TYPE (vectype), NULL);
5082 }
5083 }
9dc3f7de
IR
5084 }
5085 }
5086 }
9dc3f7de
IR
5087
5088 /* Supportable by target? */
5089 if (!optab)
5090 {
73fbfcad 5091 if (dump_enabled_p ())
78c60e3d 5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5093 "no optab.\n");
9dc3f7de
IR
5094 return false;
5095 }
5096 vec_mode = TYPE_MODE (vectype);
5097 icode = (int) optab_handler (optab, vec_mode);
5098 if (icode == CODE_FOR_nothing)
5099 {
73fbfcad 5100 if (dump_enabled_p ())
78c60e3d 5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5102 "op not supported by target.\n");
9dc3f7de
IR
5103 /* Check only during analysis. */
5104 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb
RS
5105 || (!vec_stmt
5106 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5107 return false;
73fbfcad 5108 if (dump_enabled_p ())
e645e942
TJ
5109 dump_printf_loc (MSG_NOTE, vect_location,
5110 "proceeding using word mode.\n");
9dc3f7de
IR
5111 }
5112
5113 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5114 if (!vec_stmt
5115 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5116 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5117 {
73fbfcad 5118 if (dump_enabled_p ())
78c60e3d 5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5120 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5121 return false;
5122 }
5123
5124 if (!vec_stmt) /* transformation not required. */
5125 {
5126 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5127 if (dump_enabled_p ())
e645e942
TJ
5128 dump_printf_loc (MSG_NOTE, vect_location,
5129 "=== vectorizable_shift ===\n");
4fc5ebf1 5130 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5131 return true;
5132 }
5133
67b8dbac 5134 /* Transform. */
9dc3f7de 5135
73fbfcad 5136 if (dump_enabled_p ())
78c60e3d 5137 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5138 "transform binary/unary operation.\n");
9dc3f7de
IR
5139
5140 /* Handle def. */
5141 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5142
9dc3f7de
IR
5143 prev_stmt_info = NULL;
5144 for (j = 0; j < ncopies; j++)
5145 {
5146 /* Handle uses. */
5147 if (j == 0)
5148 {
5149 if (scalar_shift_arg)
5150 {
5151 /* Vector shl and shr insn patterns can be defined with scalar
5152 operand 2 (shift operand). In this case, use constant or loop
5153 invariant op1 directly, without extending it to vector mode
5154 first. */
5155 optab_op2_mode = insn_data[icode].operand[2].mode;
5156 if (!VECTOR_MODE_P (optab_op2_mode))
5157 {
73fbfcad 5158 if (dump_enabled_p ())
78c60e3d 5159 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5160 "operand 1 using scalar mode.\n");
9dc3f7de 5161 vec_oprnd1 = op1;
8930f723 5162 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5163 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5164 if (slp_node)
5165 {
5166 /* Store vec_oprnd1 for every vector stmt to be created
5167 for SLP_NODE. We check during the analysis that all
5168 the shift arguments are the same.
5169 TODO: Allow different constants for different vector
5170 stmts generated for an SLP instance. */
5171 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5172 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5173 }
5174 }
5175 }
5176
5177 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5178 (a special case for certain kind of vector shifts); otherwise,
5179 operand 1 should be of a vector type (the usual case). */
5180 if (vec_oprnd1)
5181 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5182 slp_node);
9dc3f7de
IR
5183 else
5184 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5185 slp_node);
9dc3f7de
IR
5186 }
5187 else
5188 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5189
5190 /* Arguments are ready. Create the new vector stmt. */
9771b263 5191 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5192 {
9771b263 5193 vop1 = vec_oprnds1[i];
0d0e4a03 5194 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5195 new_temp = make_ssa_name (vec_dest, new_stmt);
5196 gimple_assign_set_lhs (new_stmt, new_temp);
5197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5198 if (slp_node)
9771b263 5199 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5200 }
5201
5202 if (slp_node)
5203 continue;
5204
5205 if (j == 0)
5206 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5207 else
5208 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5209 prev_stmt_info = vinfo_for_stmt (new_stmt);
5210 }
5211
9771b263
DN
5212 vec_oprnds0.release ();
5213 vec_oprnds1.release ();
9dc3f7de
IR
5214
5215 return true;
5216}
5217
5218
ebfd146a
IR
5219/* Function vectorizable_operation.
5220
16949072
RG
5221 Check if STMT performs a binary, unary or ternary operation that can
5222 be vectorized.
b8698a0f 5223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5226
5227static bool
355fe088
TS
5228vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5229 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5230{
00f07b86 5231 tree vec_dest;
ebfd146a 5232 tree scalar_dest;
16949072 5233 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5235 tree vectype;
ebfd146a 5236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5237 enum tree_code code, orig_code;
ef4bddc2 5238 machine_mode vec_mode;
ebfd146a
IR
5239 tree new_temp;
5240 int op_type;
00f07b86 5241 optab optab;
523ba738 5242 bool target_support_p;
355fe088 5243 gimple *def_stmt;
16949072
RG
5244 enum vect_def_type dt[3]
5245 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5246 int ndts = 3;
355fe088 5247 gimple *new_stmt = NULL;
ebfd146a 5248 stmt_vec_info prev_stmt_info;
b690cc0f 5249 int nunits_in;
ebfd146a
IR
5250 int nunits_out;
5251 tree vectype_out;
5252 int ncopies;
5253 int j, i;
6e1aa848
DN
5254 vec<tree> vec_oprnds0 = vNULL;
5255 vec<tree> vec_oprnds1 = vNULL;
5256 vec<tree> vec_oprnds2 = vNULL;
16949072 5257 tree vop0, vop1, vop2;
a70d6342 5258 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5259 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5260
a70d6342 5261 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5262 return false;
5263
66c16fd9
RB
5264 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5265 && ! vec_stmt)
ebfd146a
IR
5266 return false;
5267
5268 /* Is STMT a vectorizable binary/unary operation? */
5269 if (!is_gimple_assign (stmt))
5270 return false;
5271
5272 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5273 return false;
5274
0eb952ea 5275 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5276
1af4ebf5
MG
5277 /* For pointer addition and subtraction, we should use the normal
5278 plus and minus for the vector operation. */
ebfd146a
IR
5279 if (code == POINTER_PLUS_EXPR)
5280 code = PLUS_EXPR;
1af4ebf5
MG
5281 if (code == POINTER_DIFF_EXPR)
5282 code = MINUS_EXPR;
ebfd146a
IR
5283
5284 /* Support only unary or binary operations. */
5285 op_type = TREE_CODE_LENGTH (code);
16949072 5286 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5287 {
73fbfcad 5288 if (dump_enabled_p ())
78c60e3d 5289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5290 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5291 op_type);
ebfd146a
IR
5292 return false;
5293 }
5294
b690cc0f
RG
5295 scalar_dest = gimple_assign_lhs (stmt);
5296 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5297
7b7b1813
RG
5298 /* Most operations cannot handle bit-precision types without extra
5299 truncations. */
045c1278 5300 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5301 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5302 /* Exception are bitwise binary operations. */
5303 && code != BIT_IOR_EXPR
5304 && code != BIT_XOR_EXPR
5305 && code != BIT_AND_EXPR)
5306 {
73fbfcad 5307 if (dump_enabled_p ())
78c60e3d 5308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5309 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5310 return false;
5311 }
5312
ebfd146a 5313 op0 = gimple_assign_rhs1 (stmt);
81c40241 5314 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5315 {
73fbfcad 5316 if (dump_enabled_p ())
78c60e3d 5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5318 "use not simple.\n");
ebfd146a
IR
5319 return false;
5320 }
b690cc0f
RG
5321 /* If op0 is an external or constant def use a vector type with
5322 the same size as the output vector type. */
5323 if (!vectype)
b036c6c5
IE
5324 {
5325 /* For boolean type we cannot determine vectype by
5326 invariant value (don't know whether it is a vector
5327 of booleans or vector of integers). We use output
5328 vectype because operations on boolean don't change
5329 type. */
2568d8a1 5330 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5331 {
2568d8a1 5332 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5333 {
5334 if (dump_enabled_p ())
5335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5336 "not supported operation on bool value.\n");
5337 return false;
5338 }
5339 vectype = vectype_out;
5340 }
5341 else
5342 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5343 }
7d8930a0
IR
5344 if (vec_stmt)
5345 gcc_assert (vectype);
5346 if (!vectype)
5347 {
73fbfcad 5348 if (dump_enabled_p ())
7d8930a0 5349 {
78c60e3d
SS
5350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5351 "no vectype for scalar type ");
5352 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5353 TREE_TYPE (op0));
e645e942 5354 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5355 }
5356
5357 return false;
5358 }
b690cc0f
RG
5359
5360 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5361 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5362 if (nunits_out != nunits_in)
5363 return false;
ebfd146a 5364
16949072 5365 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5366 {
5367 op1 = gimple_assign_rhs2 (stmt);
81c40241 5368 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5369 {
73fbfcad 5370 if (dump_enabled_p ())
78c60e3d 5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5372 "use not simple.\n");
ebfd146a
IR
5373 return false;
5374 }
5375 }
16949072
RG
5376 if (op_type == ternary_op)
5377 {
5378 op2 = gimple_assign_rhs3 (stmt);
81c40241 5379 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5380 {
73fbfcad 5381 if (dump_enabled_p ())
78c60e3d 5382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5383 "use not simple.\n");
16949072
RG
5384 return false;
5385 }
5386 }
ebfd146a 5387
b690cc0f 5388 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5389 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5390 case of SLP. */
fce57248 5391 if (slp_node)
b690cc0f
RG
5392 ncopies = 1;
5393 else
e8f142e2 5394 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5395
5396 gcc_assert (ncopies >= 1);
5397
9dc3f7de 5398 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5399 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5400 || code == RROTATE_EXPR)
9dc3f7de 5401 return false;
ebfd146a 5402
ebfd146a 5403 /* Supportable by target? */
00f07b86
RH
5404
5405 vec_mode = TYPE_MODE (vectype);
5406 if (code == MULT_HIGHPART_EXPR)
523ba738 5407 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5408 else
5409 {
5410 optab = optab_for_tree_code (code, vectype, optab_default);
5411 if (!optab)
5deb57cb 5412 {
73fbfcad 5413 if (dump_enabled_p ())
78c60e3d 5414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5415 "no optab.\n");
00f07b86 5416 return false;
5deb57cb 5417 }
523ba738
RS
5418 target_support_p = (optab_handler (optab, vec_mode)
5419 != CODE_FOR_nothing);
5deb57cb
JJ
5420 }
5421
523ba738 5422 if (!target_support_p)
ebfd146a 5423 {
73fbfcad 5424 if (dump_enabled_p ())
78c60e3d 5425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5426 "op not supported by target.\n");
ebfd146a
IR
5427 /* Check only during analysis. */
5428 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb 5429 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5430 return false;
73fbfcad 5431 if (dump_enabled_p ())
e645e942
TJ
5432 dump_printf_loc (MSG_NOTE, vect_location,
5433 "proceeding using word mode.\n");
383d9c83
IR
5434 }
5435
4a00c761 5436 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5437 if (!VECTOR_MODE_P (vec_mode)
5438 && !vec_stmt
ca09abcb 5439 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5440 {
73fbfcad 5441 if (dump_enabled_p ())
78c60e3d 5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5443 "not worthwhile without SIMD support.\n");
e34842c6 5444 return false;
7d8930a0 5445 }
ebfd146a 5446
ebfd146a
IR
5447 if (!vec_stmt) /* transformation not required. */
5448 {
4a00c761 5449 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5450 if (dump_enabled_p ())
78c60e3d 5451 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5452 "=== vectorizable_operation ===\n");
4fc5ebf1 5453 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5454 return true;
5455 }
5456
67b8dbac 5457 /* Transform. */
ebfd146a 5458
73fbfcad 5459 if (dump_enabled_p ())
78c60e3d 5460 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5461 "transform binary/unary operation.\n");
383d9c83 5462
ebfd146a 5463 /* Handle def. */
00f07b86 5464 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5465
0eb952ea
JJ
5466 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5467 vectors with unsigned elements, but the result is signed. So, we
5468 need to compute the MINUS_EXPR into vectype temporary and
5469 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5470 tree vec_cvt_dest = NULL_TREE;
5471 if (orig_code == POINTER_DIFF_EXPR)
5472 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5473
ebfd146a
IR
5474 /* In case the vectorization factor (VF) is bigger than the number
5475 of elements that we can fit in a vectype (nunits), we have to generate
5476 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5477 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5478 from one copy of the vector stmt to the next, in the field
5479 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5480 stages to find the correct vector defs to be used when vectorizing
5481 stmts that use the defs of the current stmt. The example below
5482 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5483 we need to create 4 vectorized stmts):
5484
5485 before vectorization:
5486 RELATED_STMT VEC_STMT
5487 S1: x = memref - -
5488 S2: z = x + 1 - -
5489
5490 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5491 there):
5492 RELATED_STMT VEC_STMT
5493 VS1_0: vx0 = memref0 VS1_1 -
5494 VS1_1: vx1 = memref1 VS1_2 -
5495 VS1_2: vx2 = memref2 VS1_3 -
5496 VS1_3: vx3 = memref3 - -
5497 S1: x = load - VS1_0
5498 S2: z = x + 1 - -
5499
5500 step2: vectorize stmt S2 (done here):
5501 To vectorize stmt S2 we first need to find the relevant vector
5502 def for the first operand 'x'. This is, as usual, obtained from
5503 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5504 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5505 relevant vector def 'vx0'. Having found 'vx0' we can generate
5506 the vector stmt VS2_0, and as usual, record it in the
5507 STMT_VINFO_VEC_STMT of stmt S2.
5508 When creating the second copy (VS2_1), we obtain the relevant vector
5509 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5510 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5511 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5512 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5513 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5514 chain of stmts and pointers:
5515 RELATED_STMT VEC_STMT
5516 VS1_0: vx0 = memref0 VS1_1 -
5517 VS1_1: vx1 = memref1 VS1_2 -
5518 VS1_2: vx2 = memref2 VS1_3 -
5519 VS1_3: vx3 = memref3 - -
5520 S1: x = load - VS1_0
5521 VS2_0: vz0 = vx0 + v1 VS2_1 -
5522 VS2_1: vz1 = vx1 + v1 VS2_2 -
5523 VS2_2: vz2 = vx2 + v1 VS2_3 -
5524 VS2_3: vz3 = vx3 + v1 - -
5525 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5526
5527 prev_stmt_info = NULL;
5528 for (j = 0; j < ncopies; j++)
5529 {
5530 /* Handle uses. */
5531 if (j == 0)
4a00c761
JJ
5532 {
5533 if (op_type == binary_op || op_type == ternary_op)
5534 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5535 slp_node);
4a00c761
JJ
5536 else
5537 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5538 slp_node);
4a00c761 5539 if (op_type == ternary_op)
c392943c 5540 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5541 slp_node);
4a00c761 5542 }
ebfd146a 5543 else
4a00c761
JJ
5544 {
5545 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5546 if (op_type == ternary_op)
5547 {
9771b263
DN
5548 tree vec_oprnd = vec_oprnds2.pop ();
5549 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5550 vec_oprnd));
4a00c761
JJ
5551 }
5552 }
5553
5554 /* Arguments are ready. Create the new vector stmt. */
9771b263 5555 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5556 {
4a00c761 5557 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5558 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5559 vop2 = ((op_type == ternary_op)
9771b263 5560 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5561 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5562 new_temp = make_ssa_name (vec_dest, new_stmt);
5563 gimple_assign_set_lhs (new_stmt, new_temp);
5564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5565 if (vec_cvt_dest)
5566 {
5567 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5568 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5569 new_temp);
5570 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5571 gimple_assign_set_lhs (new_stmt, new_temp);
5572 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5573 }
4a00c761 5574 if (slp_node)
9771b263 5575 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5576 }
5577
4a00c761
JJ
5578 if (slp_node)
5579 continue;
5580
5581 if (j == 0)
5582 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5583 else
5584 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5585 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5586 }
5587
9771b263
DN
5588 vec_oprnds0.release ();
5589 vec_oprnds1.release ();
5590 vec_oprnds2.release ();
ebfd146a 5591
ebfd146a
IR
5592 return true;
5593}
5594
f702e7d4 5595/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5596
5597static void
f702e7d4 5598ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5599{
5600 if (!dr->aux)
5601 return;
5602
52639a61 5603 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5604 {
52639a61 5605 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5606
f702e7d4
RS
5607 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5608
428f0c67 5609 if (decl_in_symtab_p (base_decl))
f702e7d4 5610 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5611 else
5612 {
f702e7d4 5613 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5614 DECL_USER_ALIGN (base_decl) = 1;
5615 }
52639a61 5616 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5617 }
5618}
5619
ebfd146a 5620
44fc7854
BE
5621/* Function get_group_alias_ptr_type.
5622
5623 Return the alias type for the group starting at FIRST_STMT. */
5624
5625static tree
5626get_group_alias_ptr_type (gimple *first_stmt)
5627{
5628 struct data_reference *first_dr, *next_dr;
5629 gimple *next_stmt;
5630
5631 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5632 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5633 while (next_stmt)
5634 {
5635 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5636 if (get_alias_set (DR_REF (first_dr))
5637 != get_alias_set (DR_REF (next_dr)))
5638 {
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE, vect_location,
5641 "conflicting alias set types.\n");
5642 return ptr_type_node;
5643 }
5644 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5645 }
5646 return reference_alias_ptr_type (DR_REF (first_dr));
5647}
5648
5649
ebfd146a
IR
5650/* Function vectorizable_store.
5651
b8698a0f
L
5652 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5653 can be vectorized.
5654 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5655 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5656 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5657
5658static bool
355fe088 5659vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5660 slp_tree slp_node)
ebfd146a
IR
5661{
5662 tree scalar_dest;
5663 tree data_ref;
5664 tree op;
5665 tree vec_oprnd = NULL_TREE;
5666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5667 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5668 tree elem_type;
ebfd146a 5669 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5670 struct loop *loop = NULL;
ef4bddc2 5671 machine_mode vec_mode;
ebfd146a
IR
5672 tree dummy;
5673 enum dr_alignment_support alignment_support_scheme;
355fe088 5674 gimple *def_stmt;
ebfd146a
IR
5675 enum vect_def_type dt;
5676 stmt_vec_info prev_stmt_info = NULL;
5677 tree dataref_ptr = NULL_TREE;
74bf76ed 5678 tree dataref_offset = NULL_TREE;
355fe088 5679 gimple *ptr_incr = NULL;
ebfd146a
IR
5680 int ncopies;
5681 int j;
2de001ee
RS
5682 gimple *next_stmt, *first_stmt;
5683 bool grouped_store;
ebfd146a 5684 unsigned int group_size, i;
6e1aa848
DN
5685 vec<tree> oprnds = vNULL;
5686 vec<tree> result_chain = vNULL;
ebfd146a 5687 bool inv_p;
09dfa495 5688 tree offset = NULL_TREE;
6e1aa848 5689 vec<tree> vec_oprnds = vNULL;
ebfd146a 5690 bool slp = (slp_node != NULL);
ebfd146a 5691 unsigned int vec_num;
a70d6342 5692 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5693 vec_info *vinfo = stmt_info->vinfo;
272c6793 5694 tree aggr_type;
134c85ca 5695 gather_scatter_info gs_info;
3bab6342 5696 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5697 gimple *new_stmt;
b17dc4d4 5698 int vf;
2de001ee 5699 vec_load_store_type vls_type;
44fc7854 5700 tree ref_type;
a70d6342 5701
a70d6342 5702 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5703 return false;
5704
66c16fd9
RB
5705 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5706 && ! vec_stmt)
ebfd146a
IR
5707 return false;
5708
5709 /* Is vectorizable store? */
5710
5711 if (!is_gimple_assign (stmt))
5712 return false;
5713
5714 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5715 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5716 && is_pattern_stmt_p (stmt_info))
5717 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5718 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5719 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5720 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5721 && TREE_CODE (scalar_dest) != COMPONENT_REF
5722 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5723 && TREE_CODE (scalar_dest) != REALPART_EXPR
5724 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5725 return false;
5726
fce57248
RS
5727 /* Cannot have hybrid store SLP -- that would mean storing to the
5728 same location twice. */
5729 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5730
ebfd146a 5731 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5732
f4d09712 5733 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
465c8c19
JJ
5734 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5735
5736 if (loop_vinfo)
b17dc4d4
RB
5737 {
5738 loop = LOOP_VINFO_LOOP (loop_vinfo);
5739 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5740 }
5741 else
5742 vf = 1;
465c8c19
JJ
5743
5744 /* Multiple types in SLP are handled by creating the appropriate number of
5745 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5746 case of SLP. */
fce57248 5747 if (slp)
465c8c19
JJ
5748 ncopies = 1;
5749 else
e8f142e2 5750 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5751
5752 gcc_assert (ncopies >= 1);
5753
5754 /* FORNOW. This restriction should be relaxed. */
5755 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5756 {
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5759 "multiple types in nested loop.\n");
5760 return false;
5761 }
5762
ebfd146a 5763 op = gimple_assign_rhs1 (stmt);
f4d09712 5764
2f391428 5765 /* In the case this is a store from a constant make sure
11a82e25 5766 native_encode_expr can handle it. */
2f391428 5767 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
11a82e25
RB
5768 return false;
5769
f4d09712 5770 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5771 {
73fbfcad 5772 if (dump_enabled_p ())
78c60e3d 5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5774 "use not simple.\n");
ebfd146a
IR
5775 return false;
5776 }
5777
2de001ee
RS
5778 if (dt == vect_constant_def || dt == vect_external_def)
5779 vls_type = VLS_STORE_INVARIANT;
5780 else
5781 vls_type = VLS_STORE;
5782
f4d09712
KY
5783 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5784 return false;
5785
272c6793 5786 elem_type = TREE_TYPE (vectype);
ebfd146a 5787 vec_mode = TYPE_MODE (vectype);
7b7b1813 5788
ebfd146a
IR
5789 /* FORNOW. In some cases can vectorize even if data-type not supported
5790 (e.g. - array initialization with 0). */
947131ba 5791 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5792 return false;
5793
5794 if (!STMT_VINFO_DATA_REF (stmt_info))
5795 return false;
5796
2de001ee 5797 vect_memory_access_type memory_access_type;
62da9e14 5798 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
2de001ee
RS
5799 &memory_access_type, &gs_info))
5800 return false;
3bab6342 5801
ebfd146a
IR
5802 if (!vec_stmt) /* transformation not required. */
5803 {
2de001ee 5804 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 5805 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5806 /* The SLP costs are calculated during SLP analysis. */
5807 if (!PURE_SLP_STMT (stmt_info))
2de001ee 5808 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
2e8ab70c 5809 NULL, NULL, NULL);
ebfd146a
IR
5810 return true;
5811 }
2de001ee 5812 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5813
67b8dbac 5814 /* Transform. */
ebfd146a 5815
f702e7d4 5816 ensure_base_align (dr);
c716e67f 5817
2de001ee 5818 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342
AT
5819 {
5820 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
134c85ca 5821 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5822 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5823 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5824 edge pe = loop_preheader_edge (loop);
5825 gimple_seq seq;
5826 basic_block new_bb;
5827 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 5828 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342
AT
5829
5830 if (nunits == (unsigned int) scatter_off_nunits)
5831 modifier = NONE;
5832 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5833 {
3bab6342
AT
5834 modifier = WIDEN;
5835
e3342de4 5836 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
3bab6342 5837 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
908a1a16 5838 sel.quick_push (i | nunits);
3bab6342 5839
e3342de4
RS
5840 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5841 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5842 indices);
3bab6342
AT
5843 gcc_assert (perm_mask != NULL_TREE);
5844 }
5845 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5846 {
3bab6342
AT
5847 modifier = NARROW;
5848
e3342de4 5849 vec_perm_builder sel (nunits, nunits, 1);
3bab6342 5850 for (i = 0; i < (unsigned int) nunits; ++i)
908a1a16 5851 sel.quick_push (i | scatter_off_nunits);
3bab6342 5852
e3342de4
RS
5853 vec_perm_indices indices (sel, 2, nunits);
5854 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5855 gcc_assert (perm_mask != NULL_TREE);
5856 ncopies *= 2;
5857 }
5858 else
5859 gcc_unreachable ();
5860
134c85ca 5861 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5862 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5863 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5864 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5865 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5866 scaletype = TREE_VALUE (arglist);
5867
5868 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5869 && TREE_CODE (rettype) == VOID_TYPE);
5870
134c85ca 5871 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
5872 if (!is_gimple_min_invariant (ptr))
5873 {
5874 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5875 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5876 gcc_assert (!new_bb);
5877 }
5878
5879 /* Currently we support only unconditional scatter stores,
5880 so mask should be all ones. */
5881 mask = build_int_cst (masktype, -1);
5882 mask = vect_init_vector (stmt, mask, masktype, NULL);
5883
134c85ca 5884 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
5885
5886 prev_stmt_info = NULL;
5887 for (j = 0; j < ncopies; ++j)
5888 {
5889 if (j == 0)
5890 {
5891 src = vec_oprnd1
81c40241 5892 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5893 op = vec_oprnd0
134c85ca 5894 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
5895 }
5896 else if (modifier != NONE && (j & 1))
5897 {
5898 if (modifier == WIDEN)
5899 {
5900 src = vec_oprnd1
5901 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5902 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5903 stmt, gsi);
5904 }
5905 else if (modifier == NARROW)
5906 {
5907 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5908 stmt, gsi);
5909 op = vec_oprnd0
134c85ca
RS
5910 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5911 vec_oprnd0);
3bab6342
AT
5912 }
5913 else
5914 gcc_unreachable ();
5915 }
5916 else
5917 {
5918 src = vec_oprnd1
5919 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5920 op = vec_oprnd0
134c85ca
RS
5921 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5922 vec_oprnd0);
3bab6342
AT
5923 }
5924
5925 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5926 {
5927 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5928 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5929 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5930 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5931 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5933 src = var;
5934 }
5935
5936 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5937 {
5938 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5939 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5940 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5941 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5942 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5943 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5944 op = var;
5945 }
5946
5947 new_stmt
134c85ca 5948 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
5949
5950 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5951
5952 if (prev_stmt_info == NULL)
5953 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5954 else
5955 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5956 prev_stmt_info = vinfo_for_stmt (new_stmt);
5957 }
5958 return true;
5959 }
5960
2de001ee 5961 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
0d0293ac 5962 if (grouped_store)
ebfd146a 5963 {
2de001ee 5964 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 5965 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5966 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5967
e14c1050 5968 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5969
5970 /* FORNOW */
a70d6342 5971 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5972
5973 /* We vectorize all the stmts of the interleaving group when we
5974 reach the last stmt in the group. */
e14c1050
IR
5975 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5976 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5977 && !slp)
5978 {
5979 *vec_stmt = NULL;
5980 return true;
5981 }
5982
5983 if (slp)
4b5caab7 5984 {
0d0293ac 5985 grouped_store = false;
4b5caab7
IR
5986 /* VEC_NUM is the number of vect stmts to be created for this
5987 group. */
5988 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5989 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5990 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5991 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5992 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5993 }
ebfd146a 5994 else
4b5caab7
IR
5995 /* VEC_NUM is the number of vect stmts to be created for this
5996 group. */
ebfd146a 5997 vec_num = group_size;
44fc7854
BE
5998
5999 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6000 }
b8698a0f 6001 else
ebfd146a
IR
6002 {
6003 first_stmt = stmt;
6004 first_dr = dr;
6005 group_size = vec_num = 1;
44fc7854 6006 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a 6007 }
b8698a0f 6008
73fbfcad 6009 if (dump_enabled_p ())
78c60e3d 6010 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6011 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6012
2de001ee
RS
6013 if (memory_access_type == VMAT_ELEMENTWISE
6014 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6015 {
6016 gimple_stmt_iterator incr_gsi;
6017 bool insert_after;
355fe088 6018 gimple *incr;
f2e2a985
MM
6019 tree offvar;
6020 tree ivstep;
6021 tree running_off;
6022 gimple_seq stmts = NULL;
6023 tree stride_base, stride_step, alias_off;
6024 tree vec_oprnd;
f502d50e 6025 unsigned int g;
f2e2a985
MM
6026
6027 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6028
6029 stride_base
6030 = fold_build_pointer_plus
f502d50e 6031 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6032 size_binop (PLUS_EXPR,
f502d50e 6033 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6034 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6035 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6036
6037 /* For a store with loop-invariant (but other than power-of-2)
6038 stride (i.e. not a grouped access) like so:
6039
6040 for (i = 0; i < n; i += stride)
6041 array[i] = ...;
6042
6043 we generate a new induction variable and new stores from
6044 the components of the (vectorized) rhs:
6045
6046 for (j = 0; ; j += VF*stride)
6047 vectemp = ...;
6048 tmp1 = vectemp[0];
6049 array[j] = tmp1;
6050 tmp2 = vectemp[1];
6051 array[j + stride] = tmp2;
6052 ...
6053 */
6054
cee62fee 6055 unsigned nstores = nunits;
b17dc4d4 6056 unsigned lnel = 1;
cee62fee 6057 tree ltype = elem_type;
04199738 6058 tree lvectype = vectype;
cee62fee
MM
6059 if (slp)
6060 {
b17dc4d4
RB
6061 if (group_size < nunits
6062 && nunits % group_size == 0)
6063 {
6064 nstores = nunits / group_size;
6065 lnel = group_size;
6066 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6067 lvectype = vectype;
6068
6069 /* First check if vec_extract optab doesn't support extraction
6070 of vector elts directly. */
b397965c 6071 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6072 machine_mode vmode;
6073 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6074 || !VECTOR_MODE_P (vmode)
04199738
RB
6075 || (convert_optab_handler (vec_extract_optab,
6076 TYPE_MODE (vectype), vmode)
6077 == CODE_FOR_nothing))
6078 {
6079 /* Try to avoid emitting an extract of vector elements
6080 by performing the extracts using an integer type of the
6081 same size, extracting from a vector of those and then
6082 re-interpreting it as the original vector type if
6083 supported. */
6084 unsigned lsize
6085 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6086 elmode = int_mode_for_size (lsize, 0).require ();
04199738
RB
6087 /* If we can't construct such a vector fall back to
6088 element extracts from the original vector type and
6089 element size stores. */
9da15d40
RS
6090 if (mode_for_vector (elmode,
6091 nunits / group_size).exists (&vmode)
6092 && VECTOR_MODE_P (vmode)
04199738
RB
6093 && (convert_optab_handler (vec_extract_optab,
6094 vmode, elmode)
6095 != CODE_FOR_nothing))
6096 {
6097 nstores = nunits / group_size;
6098 lnel = group_size;
6099 ltype = build_nonstandard_integer_type (lsize, 1);
6100 lvectype = build_vector_type (ltype, nstores);
6101 }
6102 /* Else fall back to vector extraction anyway.
6103 Fewer stores are more important than avoiding spilling
6104 of the vector we extract from. Compared to the
6105 construction case in vectorizable_load no store-forwarding
6106 issue exists here for reasonable archs. */
6107 }
b17dc4d4
RB
6108 }
6109 else if (group_size >= nunits
6110 && group_size % nunits == 0)
6111 {
6112 nstores = 1;
6113 lnel = nunits;
6114 ltype = vectype;
04199738 6115 lvectype = vectype;
b17dc4d4 6116 }
cee62fee
MM
6117 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6118 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6119 }
6120
f2e2a985
MM
6121 ivstep = stride_step;
6122 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6123 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6124
6125 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6126
6127 create_iv (stride_base, ivstep, NULL,
6128 loop, &incr_gsi, insert_after,
6129 &offvar, NULL);
6130 incr = gsi_stmt (incr_gsi);
310213d4 6131 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6132
6133 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6134 if (stmts)
6135 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6136
6137 prev_stmt_info = NULL;
44fc7854 6138 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6139 next_stmt = first_stmt;
6140 for (g = 0; g < group_size; g++)
f2e2a985 6141 {
f502d50e
MM
6142 running_off = offvar;
6143 if (g)
f2e2a985 6144 {
f502d50e
MM
6145 tree size = TYPE_SIZE_UNIT (ltype);
6146 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6147 size);
f502d50e 6148 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6149 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6150 running_off, pos);
f2e2a985 6151 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6152 running_off = newoff;
f502d50e 6153 }
b17dc4d4
RB
6154 unsigned int group_el = 0;
6155 unsigned HOST_WIDE_INT
6156 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6157 for (j = 0; j < ncopies; j++)
6158 {
6159 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6160 and first_stmt == stmt. */
6161 if (j == 0)
6162 {
6163 if (slp)
6164 {
6165 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6166 slp_node);
f502d50e
MM
6167 vec_oprnd = vec_oprnds[0];
6168 }
6169 else
6170 {
6171 gcc_assert (gimple_assign_single_p (next_stmt));
6172 op = gimple_assign_rhs1 (next_stmt);
81c40241 6173 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6174 }
6175 }
f2e2a985 6176 else
f502d50e
MM
6177 {
6178 if (slp)
6179 vec_oprnd = vec_oprnds[j];
6180 else
c079cbac 6181 {
81c40241 6182 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6183 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6184 }
f502d50e 6185 }
04199738
RB
6186 /* Pun the vector to extract from if necessary. */
6187 if (lvectype != vectype)
6188 {
6189 tree tem = make_ssa_name (lvectype);
6190 gimple *pun
6191 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6192 lvectype, vec_oprnd));
6193 vect_finish_stmt_generation (stmt, pun, gsi);
6194 vec_oprnd = tem;
6195 }
f502d50e
MM
6196 for (i = 0; i < nstores; i++)
6197 {
6198 tree newref, newoff;
355fe088 6199 gimple *incr, *assign;
f502d50e
MM
6200 tree size = TYPE_SIZE (ltype);
6201 /* Extract the i'th component. */
6202 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6203 bitsize_int (i), size);
6204 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6205 size, pos);
6206
6207 elem = force_gimple_operand_gsi (gsi, elem, true,
6208 NULL_TREE, true,
6209 GSI_SAME_STMT);
6210
b17dc4d4
RB
6211 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6212 group_el * elsz);
f502d50e 6213 newref = build2 (MEM_REF, ltype,
b17dc4d4 6214 running_off, this_off);
f502d50e
MM
6215
6216 /* And store it to *running_off. */
6217 assign = gimple_build_assign (newref, elem);
6218 vect_finish_stmt_generation (stmt, assign, gsi);
6219
b17dc4d4
RB
6220 group_el += lnel;
6221 if (! slp
6222 || group_el == group_size)
6223 {
6224 newoff = copy_ssa_name (running_off, NULL);
6225 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6226 running_off, stride_step);
6227 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6228
b17dc4d4
RB
6229 running_off = newoff;
6230 group_el = 0;
6231 }
225ce44b
RB
6232 if (g == group_size - 1
6233 && !slp)
f502d50e
MM
6234 {
6235 if (j == 0 && i == 0)
225ce44b
RB
6236 STMT_VINFO_VEC_STMT (stmt_info)
6237 = *vec_stmt = assign;
f502d50e
MM
6238 else
6239 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6240 prev_stmt_info = vinfo_for_stmt (assign);
6241 }
6242 }
f2e2a985 6243 }
f502d50e 6244 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6245 if (slp)
6246 break;
f2e2a985 6247 }
778dd3b6
RB
6248
6249 vec_oprnds.release ();
f2e2a985
MM
6250 return true;
6251 }
6252
8c681247 6253 auto_vec<tree> dr_chain (group_size);
9771b263 6254 oprnds.create (group_size);
ebfd146a 6255
720f5239 6256 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6257 gcc_assert (alignment_support_scheme);
272c6793
RS
6258 /* Targets with store-lane instructions must not require explicit
6259 realignment. */
2de001ee 6260 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
6261 || alignment_support_scheme == dr_aligned
6262 || alignment_support_scheme == dr_unaligned_supported);
6263
62da9e14
RS
6264 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6265 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6266 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6267
2de001ee 6268 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6269 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6270 else
6271 aggr_type = vectype;
ebfd146a
IR
6272
6273 /* In case the vectorization factor (VF) is bigger than the number
6274 of elements that we can fit in a vectype (nunits), we have to generate
6275 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6276 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6277 vect_get_vec_def_for_copy_stmt. */
6278
0d0293ac 6279 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6280
6281 S1: &base + 2 = x2
6282 S2: &base = x0
6283 S3: &base + 1 = x1
6284 S4: &base + 3 = x3
6285
6286 We create vectorized stores starting from base address (the access of the
6287 first stmt in the chain (S2 in the above example), when the last store stmt
6288 of the chain (S4) is reached:
6289
6290 VS1: &base = vx2
6291 VS2: &base + vec_size*1 = vx0
6292 VS3: &base + vec_size*2 = vx1
6293 VS4: &base + vec_size*3 = vx3
6294
6295 Then permutation statements are generated:
6296
3fcc1b55
JJ
6297 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6298 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6299 ...
b8698a0f 6300
ebfd146a
IR
6301 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6302 (the order of the data-refs in the output of vect_permute_store_chain
6303 corresponds to the order of scalar stmts in the interleaving chain - see
6304 the documentation of vect_permute_store_chain()).
6305
6306 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6307 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6308 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6309 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6310 */
6311
6312 prev_stmt_info = NULL;
6313 for (j = 0; j < ncopies; j++)
6314 {
ebfd146a
IR
6315
6316 if (j == 0)
6317 {
6318 if (slp)
6319 {
6320 /* Get vectorized arguments for SLP_NODE. */
d092494c 6321 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6322 NULL, slp_node);
ebfd146a 6323
9771b263 6324 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6325 }
6326 else
6327 {
b8698a0f
L
6328 /* For interleaved stores we collect vectorized defs for all the
6329 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6330 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6331 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6332
0d0293ac 6333 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6334 OPRNDS are of size 1. */
b8698a0f 6335 next_stmt = first_stmt;
ebfd146a
IR
6336 for (i = 0; i < group_size; i++)
6337 {
b8698a0f
L
6338 /* Since gaps are not supported for interleaved stores,
6339 GROUP_SIZE is the exact number of stmts in the chain.
6340 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6341 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
6342 iteration of the loop will be executed. */
6343 gcc_assert (next_stmt
6344 && gimple_assign_single_p (next_stmt));
6345 op = gimple_assign_rhs1 (next_stmt);
6346
81c40241 6347 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6348 dr_chain.quick_push (vec_oprnd);
6349 oprnds.quick_push (vec_oprnd);
e14c1050 6350 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
6351 }
6352 }
6353
6354 /* We should have catched mismatched types earlier. */
6355 gcc_assert (useless_type_conversion_p (vectype,
6356 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6357 bool simd_lane_access_p
6358 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6359 if (simd_lane_access_p
6360 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6361 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6362 && integer_zerop (DR_OFFSET (first_dr))
6363 && integer_zerop (DR_INIT (first_dr))
6364 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6365 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6366 {
6367 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6368 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6369 inv_p = false;
74bf76ed
JJ
6370 }
6371 else
6372 dataref_ptr
6373 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6374 simd_lane_access_p ? loop : NULL,
09dfa495 6375 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6376 simd_lane_access_p, &inv_p);
a70d6342 6377 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6378 }
b8698a0f 6379 else
ebfd146a 6380 {
b8698a0f
L
6381 /* For interleaved stores we created vectorized defs for all the
6382 defs stored in OPRNDS in the previous iteration (previous copy).
6383 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6384 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6385 next copy.
0d0293ac 6386 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6387 OPRNDS are of size 1. */
6388 for (i = 0; i < group_size; i++)
6389 {
9771b263 6390 op = oprnds[i];
81c40241 6391 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6392 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6393 dr_chain[i] = vec_oprnd;
6394 oprnds[i] = vec_oprnd;
ebfd146a 6395 }
74bf76ed
JJ
6396 if (dataref_offset)
6397 dataref_offset
6398 = int_const_binop (PLUS_EXPR, dataref_offset,
6399 TYPE_SIZE_UNIT (aggr_type));
6400 else
6401 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6402 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6403 }
6404
2de001ee 6405 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6406 {
272c6793 6407 tree vec_array;
267d3070 6408
272c6793
RS
6409 /* Combine all the vectors into an array. */
6410 vec_array = create_vector_array (vectype, vec_num);
6411 for (i = 0; i < vec_num; i++)
c2d7ab2a 6412 {
9771b263 6413 vec_oprnd = dr_chain[i];
272c6793 6414 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6415 }
b8698a0f 6416
272c6793
RS
6417 /* Emit:
6418 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
44fc7854 6419 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
6420 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6421 vec_array);
6422 gimple_call_set_lhs (call, data_ref);
6423 gimple_call_set_nothrow (call, true);
6424 new_stmt = call;
267d3070 6425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6426 }
6427 else
6428 {
6429 new_stmt = NULL;
0d0293ac 6430 if (grouped_store)
272c6793 6431 {
b6b9227d
JJ
6432 if (j == 0)
6433 result_chain.create (group_size);
272c6793
RS
6434 /* Permute. */
6435 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6436 &result_chain);
6437 }
c2d7ab2a 6438
272c6793
RS
6439 next_stmt = first_stmt;
6440 for (i = 0; i < vec_num; i++)
6441 {
644ffefd 6442 unsigned align, misalign;
272c6793
RS
6443
6444 if (i > 0)
6445 /* Bump the vector pointer. */
6446 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6447 stmt, NULL_TREE);
6448
6449 if (slp)
9771b263 6450 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6451 else if (grouped_store)
6452 /* For grouped stores vectorized defs are interleaved in
272c6793 6453 vect_permute_store_chain(). */
9771b263 6454 vec_oprnd = result_chain[i];
272c6793 6455
69a2e8a1 6456 data_ref = fold_build2 (MEM_REF, vectype,
aed93b23
RB
6457 dataref_ptr,
6458 dataref_offset
6459 ? dataref_offset
44fc7854 6460 : build_int_cst (ref_type, 0));
f702e7d4 6461 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6462 if (aligned_access_p (first_dr))
644ffefd 6463 misalign = 0;
272c6793
RS
6464 else if (DR_MISALIGNMENT (first_dr) == -1)
6465 {
25f68d90 6466 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6467 misalign = 0;
272c6793
RS
6468 TREE_TYPE (data_ref)
6469 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6470 align * BITS_PER_UNIT);
272c6793
RS
6471 }
6472 else
6473 {
6474 TREE_TYPE (data_ref)
6475 = build_aligned_type (TREE_TYPE (data_ref),
6476 TYPE_ALIGN (elem_type));
644ffefd 6477 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6478 }
aed93b23
RB
6479 if (dataref_offset == NULL_TREE
6480 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6481 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6482 misalign);
c2d7ab2a 6483
62da9e14 6484 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6485 {
6486 tree perm_mask = perm_mask_for_reverse (vectype);
6487 tree perm_dest
6488 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6489 vectype);
b731b390 6490 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6491
6492 /* Generate the permute statement. */
355fe088 6493 gimple *perm_stmt
0d0e4a03
JJ
6494 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6495 vec_oprnd, perm_mask);
09dfa495
BM
6496 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6497
6498 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6499 vec_oprnd = new_temp;
6500 }
6501
272c6793
RS
6502 /* Arguments are ready. Create the new vector stmt. */
6503 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6504 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6505
6506 if (slp)
6507 continue;
6508
e14c1050 6509 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6510 if (!next_stmt)
6511 break;
6512 }
ebfd146a 6513 }
1da0876c
RS
6514 if (!slp)
6515 {
6516 if (j == 0)
6517 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6518 else
6519 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6520 prev_stmt_info = vinfo_for_stmt (new_stmt);
6521 }
ebfd146a
IR
6522 }
6523
9771b263
DN
6524 oprnds.release ();
6525 result_chain.release ();
6526 vec_oprnds.release ();
ebfd146a
IR
6527
6528 return true;
6529}
6530
557be5a8
AL
6531/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6532 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6533 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6534 vect_gen_perm_mask_checked. */
a1e53f3f 6535
3fcc1b55 6536tree
4aae3cb3 6537vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6538{
5ebaa477 6539 tree mask_elt_type, mask_type;
a1e53f3f 6540
908a1a16
RS
6541 unsigned int nunits = sel.length ();
6542 gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
22e4dee7 6543
96f9265a 6544 mask_elt_type = lang_hooks.types.type_for_mode
304b9962 6545 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
22e4dee7 6546 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 6547
5ebaa477 6548 tree_vector_builder mask_elts (mask_type, nunits, 1);
908a1a16 6549 for (unsigned int i = 0; i < nunits; ++i)
794e3180 6550 mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i]));
5ebaa477 6551 return mask_elts.build ();
a1e53f3f
L
6552}
6553
7ac7e286 6554/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6555 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6556
6557tree
4aae3cb3 6558vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6559{
7ac7e286 6560 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6561 return vect_gen_perm_mask_any (vectype, sel);
6562}
6563
aec7ae7d
JJ
6564/* Given a vector variable X and Y, that was generated for the scalar
6565 STMT, generate instructions to permute the vector elements of X and Y
6566 using permutation mask MASK_VEC, insert them at *GSI and return the
6567 permuted vector variable. */
a1e53f3f
L
6568
6569static tree
355fe088 6570permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6571 gimple_stmt_iterator *gsi)
a1e53f3f
L
6572{
6573 tree vectype = TREE_TYPE (x);
aec7ae7d 6574 tree perm_dest, data_ref;
355fe088 6575 gimple *perm_stmt;
a1e53f3f 6576
acdcd61b 6577 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6578 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6579
6580 /* Generate the permute statement. */
0d0e4a03 6581 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6582 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6583
6584 return data_ref;
6585}
6586
6b916b36
RB
6587/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6588 inserting them on the loops preheader edge. Returns true if we
6589 were successful in doing so (and thus STMT can be moved then),
6590 otherwise returns false. */
6591
6592static bool
355fe088 6593hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6594{
6595 ssa_op_iter i;
6596 tree op;
6597 bool any = false;
6598
6599 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6600 {
355fe088 6601 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6602 if (!gimple_nop_p (def_stmt)
6603 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6604 {
6605 /* Make sure we don't need to recurse. While we could do
6606 so in simple cases when there are more complex use webs
6607 we don't have an easy way to preserve stmt order to fulfil
6608 dependencies within them. */
6609 tree op2;
6610 ssa_op_iter i2;
d1417442
JJ
6611 if (gimple_code (def_stmt) == GIMPLE_PHI)
6612 return false;
6b916b36
RB
6613 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6614 {
355fe088 6615 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6616 if (!gimple_nop_p (def_stmt2)
6617 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6618 return false;
6619 }
6620 any = true;
6621 }
6622 }
6623
6624 if (!any)
6625 return true;
6626
6627 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6628 {
355fe088 6629 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6630 if (!gimple_nop_p (def_stmt)
6631 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6632 {
6633 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6634 gsi_remove (&gsi, false);
6635 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6636 }
6637 }
6638
6639 return true;
6640}
6641
ebfd146a
IR
6642/* vectorizable_load.
6643
b8698a0f
L
6644 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6645 can be vectorized.
6646 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6647 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6648 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6649
6650static bool
355fe088 6651vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6652 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6653{
6654 tree scalar_dest;
6655 tree vec_dest = NULL;
6656 tree data_ref = NULL;
6657 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6658 stmt_vec_info prev_stmt_info;
ebfd146a 6659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6660 struct loop *loop = NULL;
ebfd146a 6661 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6662 bool nested_in_vect_loop = false;
c716e67f 6663 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6664 tree elem_type;
ebfd146a 6665 tree new_temp;
ef4bddc2 6666 machine_mode mode;
355fe088 6667 gimple *new_stmt = NULL;
ebfd146a
IR
6668 tree dummy;
6669 enum dr_alignment_support alignment_support_scheme;
6670 tree dataref_ptr = NULL_TREE;
74bf76ed 6671 tree dataref_offset = NULL_TREE;
355fe088 6672 gimple *ptr_incr = NULL;
ebfd146a 6673 int ncopies;
44fc7854 6674 int i, j, group_size, group_gap_adj;
ebfd146a
IR
6675 tree msq = NULL_TREE, lsq;
6676 tree offset = NULL_TREE;
356bbc4c 6677 tree byte_offset = NULL_TREE;
ebfd146a 6678 tree realignment_token = NULL_TREE;
538dd0b7 6679 gphi *phi = NULL;
6e1aa848 6680 vec<tree> dr_chain = vNULL;
0d0293ac 6681 bool grouped_load = false;
355fe088 6682 gimple *first_stmt;
4f0a0218 6683 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6684 bool inv_p;
6685 bool compute_in_loop = false;
6686 struct loop *at_loop;
6687 int vec_num;
6688 bool slp = (slp_node != NULL);
6689 bool slp_perm = false;
6690 enum tree_code code;
a70d6342
IR
6691 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6692 int vf;
272c6793 6693 tree aggr_type;
134c85ca 6694 gather_scatter_info gs_info;
310213d4 6695 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6696 tree ref_type;
a70d6342 6697
465c8c19
JJ
6698 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6699 return false;
6700
66c16fd9
RB
6701 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6702 && ! vec_stmt)
465c8c19
JJ
6703 return false;
6704
6705 /* Is vectorizable load? */
6706 if (!is_gimple_assign (stmt))
6707 return false;
6708
6709 scalar_dest = gimple_assign_lhs (stmt);
6710 if (TREE_CODE (scalar_dest) != SSA_NAME)
6711 return false;
6712
6713 code = gimple_assign_rhs_code (stmt);
6714 if (code != ARRAY_REF
6715 && code != BIT_FIELD_REF
6716 && code != INDIRECT_REF
6717 && code != COMPONENT_REF
6718 && code != IMAGPART_EXPR
6719 && code != REALPART_EXPR
6720 && code != MEM_REF
6721 && TREE_CODE_CLASS (code) != tcc_declaration)
6722 return false;
6723
6724 if (!STMT_VINFO_DATA_REF (stmt_info))
6725 return false;
6726
6727 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6728 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6729
a70d6342
IR
6730 if (loop_vinfo)
6731 {
6732 loop = LOOP_VINFO_LOOP (loop_vinfo);
6733 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6734 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6735 }
6736 else
3533e503 6737 vf = 1;
ebfd146a
IR
6738
6739 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6740 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6741 case of SLP. */
fce57248 6742 if (slp)
ebfd146a
IR
6743 ncopies = 1;
6744 else
e8f142e2 6745 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6746
6747 gcc_assert (ncopies >= 1);
6748
6749 /* FORNOW. This restriction should be relaxed. */
6750 if (nested_in_vect_loop && ncopies > 1)
6751 {
73fbfcad 6752 if (dump_enabled_p ())
78c60e3d 6753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6754 "multiple types in nested loop.\n");
ebfd146a
IR
6755 return false;
6756 }
6757
f2556b68
RB
6758 /* Invalidate assumptions made by dependence analysis when vectorization
6759 on the unrolled body effectively re-orders stmts. */
6760 if (ncopies > 1
6761 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6762 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6763 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6764 {
6765 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6767 "cannot perform implicit CSE when unrolling "
6768 "with negative dependence distance\n");
6769 return false;
6770 }
6771
7b7b1813 6772 elem_type = TREE_TYPE (vectype);
947131ba 6773 mode = TYPE_MODE (vectype);
ebfd146a
IR
6774
6775 /* FORNOW. In some cases can vectorize even if data-type not supported
6776 (e.g. - data copies). */
947131ba 6777 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6778 {
73fbfcad 6779 if (dump_enabled_p ())
78c60e3d 6780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6781 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6782 return false;
6783 }
6784
ebfd146a 6785 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6786 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6787 {
0d0293ac 6788 grouped_load = true;
ebfd146a 6789 /* FORNOW */
2de001ee
RS
6790 gcc_assert (!nested_in_vect_loop);
6791 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6792
e14c1050 6793 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 6794 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 6795
b1af7da6
RB
6796 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6797 slp_perm = true;
6798
f2556b68
RB
6799 /* Invalidate assumptions made by dependence analysis when vectorization
6800 on the unrolled body effectively re-orders stmts. */
6801 if (!PURE_SLP_STMT (stmt_info)
6802 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6803 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6804 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6805 {
6806 if (dump_enabled_p ())
6807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6808 "cannot perform implicit CSE when performing "
6809 "group loads with negative dependence distance\n");
6810 return false;
6811 }
96bb56b2
RB
6812
6813 /* Similarly when the stmt is a load that is both part of a SLP
6814 instance and a loop vectorized stmt via the same-dr mechanism
6815 we have to give up. */
6816 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6817 && (STMT_SLP_TYPE (stmt_info)
6818 != STMT_SLP_TYPE (vinfo_for_stmt
6819 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6820 {
6821 if (dump_enabled_p ())
6822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6823 "conflicting SLP types for CSEd load\n");
6824 return false;
6825 }
ebfd146a
IR
6826 }
6827
2de001ee 6828 vect_memory_access_type memory_access_type;
62da9e14 6829 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
2de001ee
RS
6830 &memory_access_type, &gs_info))
6831 return false;
a1e53f3f 6832
ebfd146a
IR
6833 if (!vec_stmt) /* transformation not required. */
6834 {
2de001ee
RS
6835 if (!slp)
6836 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 6837 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6838 /* The SLP costs are calculated during SLP analysis. */
6839 if (!PURE_SLP_STMT (stmt_info))
2de001ee 6840 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 6841 NULL, NULL, NULL);
ebfd146a
IR
6842 return true;
6843 }
6844
2de001ee
RS
6845 if (!slp)
6846 gcc_assert (memory_access_type
6847 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6848
73fbfcad 6849 if (dump_enabled_p ())
78c60e3d 6850 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6851 "transform load. ncopies = %d\n", ncopies);
ebfd146a 6852
67b8dbac 6853 /* Transform. */
ebfd146a 6854
f702e7d4 6855 ensure_base_align (dr);
c716e67f 6856
2de001ee 6857 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d
JJ
6858 {
6859 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 6860 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
aec7ae7d 6861 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6862 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6863 edge pe = loop_preheader_edge (loop);
6864 gimple_seq seq;
6865 basic_block new_bb;
6866 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 6867 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
aec7ae7d
JJ
6868
6869 if (nunits == gather_off_nunits)
6870 modifier = NONE;
6871 else if (nunits == gather_off_nunits / 2)
6872 {
aec7ae7d
JJ
6873 modifier = WIDEN;
6874
e3342de4 6875 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
aec7ae7d 6876 for (i = 0; i < gather_off_nunits; ++i)
908a1a16 6877 sel.quick_push (i | nunits);
aec7ae7d 6878
e3342de4
RS
6879 vec_perm_indices indices (sel, 1, gather_off_nunits);
6880 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6881 indices);
aec7ae7d
JJ
6882 }
6883 else if (nunits == gather_off_nunits * 2)
6884 {
aec7ae7d
JJ
6885 modifier = NARROW;
6886
e3342de4 6887 vec_perm_builder sel (nunits, nunits, 1);
aec7ae7d 6888 for (i = 0; i < nunits; ++i)
908a1a16
RS
6889 sel.quick_push (i < gather_off_nunits
6890 ? i : i + nunits - gather_off_nunits);
aec7ae7d 6891
e3342de4
RS
6892 vec_perm_indices indices (sel, 2, nunits);
6893 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
aec7ae7d
JJ
6894 ncopies *= 2;
6895 }
6896 else
6897 gcc_unreachable ();
6898
134c85ca 6899 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
aec7ae7d
JJ
6900 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6901 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6902 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6903 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6904 scaletype = TREE_VALUE (arglist);
d3c2fee0 6905 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6906
6907 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6908
134c85ca 6909 ptr = fold_convert (ptrtype, gs_info.base);
aec7ae7d
JJ
6910 if (!is_gimple_min_invariant (ptr))
6911 {
6912 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6913 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6914 gcc_assert (!new_bb);
6915 }
6916
6917 /* Currently we support only unconditional gather loads,
6918 so mask should be all ones. */
d3c2fee0
AI
6919 if (TREE_CODE (masktype) == INTEGER_TYPE)
6920 mask = build_int_cst (masktype, -1);
6921 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6922 {
6923 mask = build_int_cst (TREE_TYPE (masktype), -1);
6924 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6925 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6926 }
aec7ae7d
JJ
6927 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6928 {
6929 REAL_VALUE_TYPE r;
6930 long tmp[6];
6931 for (j = 0; j < 6; ++j)
6932 tmp[j] = -1;
6933 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6934 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6935 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6936 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6937 }
6938 else
6939 gcc_unreachable ();
aec7ae7d 6940
134c85ca 6941 scale = build_int_cst (scaletype, gs_info.scale);
aec7ae7d 6942
d3c2fee0
AI
6943 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6944 merge = build_int_cst (TREE_TYPE (rettype), 0);
6945 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6946 {
6947 REAL_VALUE_TYPE r;
6948 long tmp[6];
6949 for (j = 0; j < 6; ++j)
6950 tmp[j] = 0;
6951 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6952 merge = build_real (TREE_TYPE (rettype), r);
6953 }
6954 else
6955 gcc_unreachable ();
6956 merge = build_vector_from_val (rettype, merge);
6957 merge = vect_init_vector (stmt, merge, rettype, NULL);
6958
aec7ae7d
JJ
6959 prev_stmt_info = NULL;
6960 for (j = 0; j < ncopies; ++j)
6961 {
6962 if (modifier == WIDEN && (j & 1))
6963 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6964 perm_mask, stmt, gsi);
6965 else if (j == 0)
6966 op = vec_oprnd0
134c85ca 6967 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
aec7ae7d
JJ
6968 else
6969 op = vec_oprnd0
134c85ca 6970 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
aec7ae7d
JJ
6971
6972 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6973 {
6974 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6975 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 6976 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
6977 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6978 new_stmt
0d0e4a03 6979 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6980 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6981 op = var;
6982 }
6983
6984 new_stmt
134c85ca 6985 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6986
6987 if (!useless_type_conversion_p (vectype, rettype))
6988 {
6989 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6990 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 6991 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
6992 gimple_call_set_lhs (new_stmt, op);
6993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6994 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6995 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6996 new_stmt
0d0e4a03 6997 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6998 }
6999 else
7000 {
7001 var = make_ssa_name (vec_dest, new_stmt);
7002 gimple_call_set_lhs (new_stmt, var);
7003 }
7004
7005 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7006
7007 if (modifier == NARROW)
7008 {
7009 if ((j & 1) == 0)
7010 {
7011 prev_res = var;
7012 continue;
7013 }
7014 var = permute_vec_elements (prev_res, var,
7015 perm_mask, stmt, gsi);
7016 new_stmt = SSA_NAME_DEF_STMT (var);
7017 }
7018
7019 if (prev_stmt_info == NULL)
7020 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7021 else
7022 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7023 prev_stmt_info = vinfo_for_stmt (new_stmt);
7024 }
7025 return true;
7026 }
2de001ee
RS
7027
7028 if (memory_access_type == VMAT_ELEMENTWISE
7029 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7030 {
7031 gimple_stmt_iterator incr_gsi;
7032 bool insert_after;
355fe088 7033 gimple *incr;
7d75abc8 7034 tree offvar;
7d75abc8
MM
7035 tree ivstep;
7036 tree running_off;
9771b263 7037 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 7038 gimple_seq stmts = NULL;
14ac6aa2
RB
7039 tree stride_base, stride_step, alias_off;
7040
7041 gcc_assert (!nested_in_vect_loop);
7d75abc8 7042
f502d50e 7043 if (slp && grouped_load)
44fc7854
BE
7044 {
7045 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7046 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7047 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7048 ref_type = get_group_alias_ptr_type (first_stmt);
7049 }
ab313a8c 7050 else
44fc7854
BE
7051 {
7052 first_stmt = stmt;
7053 first_dr = dr;
7054 group_size = 1;
7055 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7056 }
ab313a8c 7057
14ac6aa2
RB
7058 stride_base
7059 = fold_build_pointer_plus
ab313a8c 7060 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7061 size_binop (PLUS_EXPR,
ab313a8c
RB
7062 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7063 convert_to_ptrofftype (DR_INIT (first_dr))));
7064 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7065
7066 /* For a load with loop-invariant (but other than power-of-2)
7067 stride (i.e. not a grouped access) like so:
7068
7069 for (i = 0; i < n; i += stride)
7070 ... = array[i];
7071
7072 we generate a new induction variable and new accesses to
7073 form a new vector (or vectors, depending on ncopies):
7074
7075 for (j = 0; ; j += VF*stride)
7076 tmp1 = array[j];
7077 tmp2 = array[j + stride];
7078 ...
7079 vectemp = {tmp1, tmp2, ...}
7080 */
7081
ab313a8c
RB
7082 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7083 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7084
7085 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7086
ab313a8c 7087 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7088 loop, &incr_gsi, insert_after,
7089 &offvar, NULL);
7090 incr = gsi_stmt (incr_gsi);
310213d4 7091 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7092
ab313a8c
RB
7093 stride_step = force_gimple_operand (unshare_expr (stride_step),
7094 &stmts, true, NULL_TREE);
7d75abc8
MM
7095 if (stmts)
7096 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7097
7098 prev_stmt_info = NULL;
7099 running_off = offvar;
44fc7854 7100 alias_off = build_int_cst (ref_type, 0);
7b5fc413 7101 int nloads = nunits;
e09b4c37 7102 int lnel = 1;
7b5fc413 7103 tree ltype = TREE_TYPE (vectype);
ea60dd34 7104 tree lvectype = vectype;
b266b968 7105 auto_vec<tree> dr_chain;
2de001ee 7106 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7107 {
2de001ee 7108 if (group_size < nunits)
e09b4c37 7109 {
ff03930a
JJ
7110 /* First check if vec_init optab supports construction from
7111 vector elts directly. */
b397965c 7112 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7113 machine_mode vmode;
7114 if (mode_for_vector (elmode, group_size).exists (&vmode)
7115 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7116 && (convert_optab_handler (vec_init_optab,
7117 TYPE_MODE (vectype), vmode)
7118 != CODE_FOR_nothing))
ea60dd34
RB
7119 {
7120 nloads = nunits / group_size;
7121 lnel = group_size;
ff03930a
JJ
7122 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7123 }
7124 else
7125 {
7126 /* Otherwise avoid emitting a constructor of vector elements
7127 by performing the loads using an integer type of the same
7128 size, constructing a vector of those and then
7129 re-interpreting it as the original vector type.
7130 This avoids a huge runtime penalty due to the general
7131 inability to perform store forwarding from smaller stores
7132 to a larger load. */
7133 unsigned lsize
7134 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7135 elmode = int_mode_for_size (lsize, 0).require ();
ff03930a
JJ
7136 /* If we can't construct such a vector fall back to
7137 element loads of the original vector type. */
9da15d40
RS
7138 if (mode_for_vector (elmode,
7139 nunits / group_size).exists (&vmode)
7140 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7141 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7142 != CODE_FOR_nothing))
7143 {
7144 nloads = nunits / group_size;
7145 lnel = group_size;
7146 ltype = build_nonstandard_integer_type (lsize, 1);
7147 lvectype = build_vector_type (ltype, nloads);
7148 }
ea60dd34 7149 }
e09b4c37 7150 }
2de001ee 7151 else
e09b4c37 7152 {
ea60dd34 7153 nloads = 1;
e09b4c37
RB
7154 lnel = nunits;
7155 ltype = vectype;
e09b4c37 7156 }
2de001ee
RS
7157 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7158 }
7159 if (slp)
7160 {
66c16fd9
RB
7161 /* For SLP permutation support we need to load the whole group,
7162 not only the number of vector stmts the permutation result
7163 fits in. */
b266b968 7164 if (slp_perm)
66c16fd9
RB
7165 {
7166 ncopies = (group_size * vf + nunits - 1) / nunits;
7167 dr_chain.create (ncopies);
7168 }
7169 else
7170 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7171 }
e09b4c37
RB
7172 int group_el = 0;
7173 unsigned HOST_WIDE_INT
7174 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7175 for (j = 0; j < ncopies; j++)
7176 {
7b5fc413 7177 if (nloads > 1)
e09b4c37
RB
7178 vec_alloc (v, nloads);
7179 for (i = 0; i < nloads; i++)
7b5fc413 7180 {
e09b4c37
RB
7181 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7182 group_el * elsz);
7183 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7184 build2 (MEM_REF, ltype,
7185 running_off, this_off));
7186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7187 if (nloads > 1)
7188 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7189 gimple_assign_lhs (new_stmt));
7190
7191 group_el += lnel;
7192 if (! slp
7193 || group_el == group_size)
7b5fc413 7194 {
e09b4c37
RB
7195 tree newoff = copy_ssa_name (running_off);
7196 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7197 running_off, stride_step);
7b5fc413
RB
7198 vect_finish_stmt_generation (stmt, incr, gsi);
7199
7200 running_off = newoff;
e09b4c37 7201 group_el = 0;
7b5fc413 7202 }
7b5fc413 7203 }
e09b4c37 7204 if (nloads > 1)
7d75abc8 7205 {
ea60dd34
RB
7206 tree vec_inv = build_constructor (lvectype, v);
7207 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7208 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7209 if (lvectype != vectype)
7210 {
7211 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7212 VIEW_CONVERT_EXPR,
7213 build1 (VIEW_CONVERT_EXPR,
7214 vectype, new_temp));
7215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7216 }
7d75abc8
MM
7217 }
7218
7b5fc413 7219 if (slp)
b266b968 7220 {
b266b968
RB
7221 if (slp_perm)
7222 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7223 else
7224 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7225 }
7d75abc8 7226 else
225ce44b
RB
7227 {
7228 if (j == 0)
7229 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7230 else
7231 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7232 prev_stmt_info = vinfo_for_stmt (new_stmt);
7233 }
7d75abc8 7234 }
b266b968 7235 if (slp_perm)
29afecdf
RB
7236 {
7237 unsigned n_perms;
7238 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7239 slp_node_instance, false, &n_perms);
7240 }
7d75abc8
MM
7241 return true;
7242 }
aec7ae7d 7243
0d0293ac 7244 if (grouped_load)
ebfd146a 7245 {
e14c1050 7246 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7247 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7248 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7249 without permutation. */
7250 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7251 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7252 /* For BB vectorization always use the first stmt to base
7253 the data ref pointer on. */
7254 if (bb_vinfo)
7255 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7256
ebfd146a 7257 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7258 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7259 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7260 ??? But we can only do so if there is exactly one
7261 as we have no way to get at the rest. Leave the CSE
7262 opportunity alone.
7263 ??? With the group load eventually participating
7264 in multiple different permutations (having multiple
7265 slp nodes which refer to the same group) the CSE
7266 is even wrong code. See PR56270. */
7267 && !slp)
ebfd146a
IR
7268 {
7269 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7270 return true;
7271 }
7272 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7273 group_gap_adj = 0;
ebfd146a
IR
7274
7275 /* VEC_NUM is the number of vect stmts to be created for this group. */
7276 if (slp)
7277 {
0d0293ac 7278 grouped_load = false;
91ff1504
RB
7279 /* For SLP permutation support we need to load the whole group,
7280 not only the number of vector stmts the permutation result
7281 fits in. */
7282 if (slp_perm)
b267968e
RB
7283 {
7284 vec_num = (group_size * vf + nunits - 1) / nunits;
7285 group_gap_adj = vf * group_size - nunits * vec_num;
7286 }
91ff1504 7287 else
b267968e
RB
7288 {
7289 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7290 group_gap_adj
7291 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7292 }
a70d6342 7293 }
ebfd146a 7294 else
9b999e8c 7295 vec_num = group_size;
44fc7854
BE
7296
7297 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7298 }
7299 else
7300 {
7301 first_stmt = stmt;
7302 first_dr = dr;
7303 group_size = vec_num = 1;
9b999e8c 7304 group_gap_adj = 0;
44fc7854 7305 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7306 }
7307
720f5239 7308 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7309 gcc_assert (alignment_support_scheme);
272c6793
RS
7310 /* Targets with load-lane instructions must not require explicit
7311 realignment. */
2de001ee 7312 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
7313 || alignment_support_scheme == dr_aligned
7314 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7315
7316 /* In case the vectorization factor (VF) is bigger than the number
7317 of elements that we can fit in a vectype (nunits), we have to generate
7318 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7319 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7320 from one copy of the vector stmt to the next, in the field
ff802fa1 7321 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7322 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7323 stmts that use the defs of the current stmt. The example below
7324 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7325 need to create 4 vectorized stmts):
ebfd146a
IR
7326
7327 before vectorization:
7328 RELATED_STMT VEC_STMT
7329 S1: x = memref - -
7330 S2: z = x + 1 - -
7331
7332 step 1: vectorize stmt S1:
7333 We first create the vector stmt VS1_0, and, as usual, record a
7334 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7335 Next, we create the vector stmt VS1_1, and record a pointer to
7336 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7337 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7338 stmts and pointers:
7339 RELATED_STMT VEC_STMT
7340 VS1_0: vx0 = memref0 VS1_1 -
7341 VS1_1: vx1 = memref1 VS1_2 -
7342 VS1_2: vx2 = memref2 VS1_3 -
7343 VS1_3: vx3 = memref3 - -
7344 S1: x = load - VS1_0
7345 S2: z = x + 1 - -
7346
b8698a0f
L
7347 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7348 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7349 stmt S2. */
7350
0d0293ac 7351 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7352
7353 S1: x2 = &base + 2
7354 S2: x0 = &base
7355 S3: x1 = &base + 1
7356 S4: x3 = &base + 3
7357
b8698a0f 7358 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7359 starting from the access of the first stmt of the chain:
7360
7361 VS1: vx0 = &base
7362 VS2: vx1 = &base + vec_size*1
7363 VS3: vx3 = &base + vec_size*2
7364 VS4: vx4 = &base + vec_size*3
7365
7366 Then permutation statements are generated:
7367
e2c83630
RH
7368 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7369 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7370 ...
7371
7372 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7373 (the order of the data-refs in the output of vect_permute_load_chain
7374 corresponds to the order of scalar stmts in the interleaving chain - see
7375 the documentation of vect_permute_load_chain()).
7376 The generation of permutation stmts and recording them in
0d0293ac 7377 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7378
b8698a0f 7379 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7380 permutation stmts above are created for every copy. The result vector
7381 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7382 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7383
7384 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7385 on a target that supports unaligned accesses (dr_unaligned_supported)
7386 we generate the following code:
7387 p = initial_addr;
7388 indx = 0;
7389 loop {
7390 p = p + indx * vectype_size;
7391 vec_dest = *(p);
7392 indx = indx + 1;
7393 }
7394
7395 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7396 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7397 then generate the following code, in which the data in each iteration is
7398 obtained by two vector loads, one from the previous iteration, and one
7399 from the current iteration:
7400 p1 = initial_addr;
7401 msq_init = *(floor(p1))
7402 p2 = initial_addr + VS - 1;
7403 realignment_token = call target_builtin;
7404 indx = 0;
7405 loop {
7406 p2 = p2 + indx * vectype_size
7407 lsq = *(floor(p2))
7408 vec_dest = realign_load (msq, lsq, realignment_token)
7409 indx = indx + 1;
7410 msq = lsq;
7411 } */
7412
7413 /* If the misalignment remains the same throughout the execution of the
7414 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7415 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7416 This can only occur when vectorizing memory accesses in the inner-loop
7417 nested within an outer-loop that is being vectorized. */
7418
d1e4b493 7419 if (nested_in_vect_loop
832b4117 7420 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
ebfd146a
IR
7421 {
7422 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7423 compute_in_loop = true;
7424 }
7425
7426 if ((alignment_support_scheme == dr_explicit_realign_optimized
7427 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7428 && !compute_in_loop)
ebfd146a
IR
7429 {
7430 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7431 alignment_support_scheme, NULL_TREE,
7432 &at_loop);
7433 if (alignment_support_scheme == dr_explicit_realign_optimized)
7434 {
538dd0b7 7435 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7436 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7437 size_one_node);
ebfd146a
IR
7438 }
7439 }
7440 else
7441 at_loop = loop;
7442
62da9e14 7443 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7444 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7445
2de001ee 7446 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7447 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7448 else
7449 aggr_type = vectype;
7450
ebfd146a 7451 prev_stmt_info = NULL;
b267968e 7452 int group_elt = 0;
ebfd146a 7453 for (j = 0; j < ncopies; j++)
b8698a0f 7454 {
272c6793 7455 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7456 if (j == 0)
74bf76ed
JJ
7457 {
7458 bool simd_lane_access_p
7459 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7460 if (simd_lane_access_p
7461 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7462 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7463 && integer_zerop (DR_OFFSET (first_dr))
7464 && integer_zerop (DR_INIT (first_dr))
7465 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7466 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7467 && (alignment_support_scheme == dr_aligned
7468 || alignment_support_scheme == dr_unaligned_supported))
7469 {
7470 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7471 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7472 inv_p = false;
74bf76ed 7473 }
4f0a0218
RB
7474 else if (first_stmt_for_drptr
7475 && first_stmt != first_stmt_for_drptr)
7476 {
7477 dataref_ptr
7478 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7479 at_loop, offset, &dummy, gsi,
7480 &ptr_incr, simd_lane_access_p,
7481 &inv_p, byte_offset);
7482 /* Adjust the pointer by the difference to first_stmt. */
7483 data_reference_p ptrdr
7484 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7485 tree diff = fold_convert (sizetype,
7486 size_binop (MINUS_EXPR,
7487 DR_INIT (first_dr),
7488 DR_INIT (ptrdr)));
7489 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7490 stmt, diff);
7491 }
74bf76ed
JJ
7492 else
7493 dataref_ptr
7494 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7495 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7496 simd_lane_access_p, &inv_p,
7497 byte_offset);
74bf76ed
JJ
7498 }
7499 else if (dataref_offset)
7500 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7501 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7502 else
272c6793
RS
7503 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7504 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7505
0d0293ac 7506 if (grouped_load || slp_perm)
9771b263 7507 dr_chain.create (vec_num);
5ce1ee7f 7508
2de001ee 7509 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7510 {
272c6793
RS
7511 tree vec_array;
7512
7513 vec_array = create_vector_array (vectype, vec_num);
7514
7515 /* Emit:
7516 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
44fc7854 7517 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
7518 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7519 data_ref);
7520 gimple_call_set_lhs (call, vec_array);
7521 gimple_call_set_nothrow (call, true);
7522 new_stmt = call;
272c6793 7523 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7524
272c6793
RS
7525 /* Extract each vector into an SSA_NAME. */
7526 for (i = 0; i < vec_num; i++)
ebfd146a 7527 {
272c6793
RS
7528 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7529 vec_array, i);
9771b263 7530 dr_chain.quick_push (new_temp);
272c6793
RS
7531 }
7532
7533 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7534 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7535 }
7536 else
7537 {
7538 for (i = 0; i < vec_num; i++)
7539 {
7540 if (i > 0)
7541 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7542 stmt, NULL_TREE);
7543
7544 /* 2. Create the vector-load in the loop. */
7545 switch (alignment_support_scheme)
7546 {
7547 case dr_aligned:
7548 case dr_unaligned_supported:
be1ac4ec 7549 {
644ffefd
MJ
7550 unsigned int align, misalign;
7551
272c6793 7552 data_ref
aed93b23
RB
7553 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7554 dataref_offset
7555 ? dataref_offset
44fc7854 7556 : build_int_cst (ref_type, 0));
f702e7d4 7557 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7558 if (alignment_support_scheme == dr_aligned)
7559 {
7560 gcc_assert (aligned_access_p (first_dr));
644ffefd 7561 misalign = 0;
272c6793
RS
7562 }
7563 else if (DR_MISALIGNMENT (first_dr) == -1)
7564 {
25f68d90 7565 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7566 misalign = 0;
272c6793
RS
7567 TREE_TYPE (data_ref)
7568 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7569 align * BITS_PER_UNIT);
272c6793
RS
7570 }
7571 else
7572 {
7573 TREE_TYPE (data_ref)
7574 = build_aligned_type (TREE_TYPE (data_ref),
7575 TYPE_ALIGN (elem_type));
644ffefd 7576 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7577 }
aed93b23
RB
7578 if (dataref_offset == NULL_TREE
7579 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7580 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7581 align, misalign);
272c6793 7582 break;
be1ac4ec 7583 }
272c6793 7584 case dr_explicit_realign:
267d3070 7585 {
272c6793 7586 tree ptr, bump;
272c6793 7587
d88981fc 7588 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7589
7590 if (compute_in_loop)
7591 msq = vect_setup_realignment (first_stmt, gsi,
7592 &realignment_token,
7593 dr_explicit_realign,
7594 dataref_ptr, NULL);
7595
aed93b23
RB
7596 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7597 ptr = copy_ssa_name (dataref_ptr);
7598 else
7599 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7600 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7601 new_stmt = gimple_build_assign
7602 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7603 build_int_cst
7604 (TREE_TYPE (dataref_ptr),
f702e7d4 7605 -(HOST_WIDE_INT) align));
272c6793
RS
7606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7607 data_ref
7608 = build2 (MEM_REF, vectype, ptr,
44fc7854 7609 build_int_cst (ref_type, 0));
272c6793
RS
7610 vec_dest = vect_create_destination_var (scalar_dest,
7611 vectype);
7612 new_stmt = gimple_build_assign (vec_dest, data_ref);
7613 new_temp = make_ssa_name (vec_dest, new_stmt);
7614 gimple_assign_set_lhs (new_stmt, new_temp);
7615 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7616 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7617 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7618 msq = new_temp;
7619
d88981fc 7620 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7621 TYPE_SIZE_UNIT (elem_type));
d88981fc 7622 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7623 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7624 new_stmt = gimple_build_assign
7625 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7626 build_int_cst
f702e7d4 7627 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7628 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7629 gimple_assign_set_lhs (new_stmt, ptr);
7630 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7631 data_ref
7632 = build2 (MEM_REF, vectype, ptr,
44fc7854 7633 build_int_cst (ref_type, 0));
272c6793 7634 break;
267d3070 7635 }
272c6793 7636 case dr_explicit_realign_optimized:
f702e7d4
RS
7637 {
7638 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7639 new_temp = copy_ssa_name (dataref_ptr);
7640 else
7641 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7642 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7643 new_stmt = gimple_build_assign
7644 (new_temp, BIT_AND_EXPR, dataref_ptr,
7645 build_int_cst (TREE_TYPE (dataref_ptr),
7646 -(HOST_WIDE_INT) align));
7647 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7648 data_ref
7649 = build2 (MEM_REF, vectype, new_temp,
7650 build_int_cst (ref_type, 0));
7651 break;
7652 }
272c6793
RS
7653 default:
7654 gcc_unreachable ();
7655 }
ebfd146a 7656 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7657 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7658 new_temp = make_ssa_name (vec_dest, new_stmt);
7659 gimple_assign_set_lhs (new_stmt, new_temp);
7660 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7661
272c6793
RS
7662 /* 3. Handle explicit realignment if necessary/supported.
7663 Create in loop:
7664 vec_dest = realign_load (msq, lsq, realignment_token) */
7665 if (alignment_support_scheme == dr_explicit_realign_optimized
7666 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7667 {
272c6793
RS
7668 lsq = gimple_assign_lhs (new_stmt);
7669 if (!realignment_token)
7670 realignment_token = dataref_ptr;
7671 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7672 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7673 msq, lsq, realignment_token);
272c6793
RS
7674 new_temp = make_ssa_name (vec_dest, new_stmt);
7675 gimple_assign_set_lhs (new_stmt, new_temp);
7676 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7677
7678 if (alignment_support_scheme == dr_explicit_realign_optimized)
7679 {
7680 gcc_assert (phi);
7681 if (i == vec_num - 1 && j == ncopies - 1)
7682 add_phi_arg (phi, lsq,
7683 loop_latch_edge (containing_loop),
9e227d60 7684 UNKNOWN_LOCATION);
272c6793
RS
7685 msq = lsq;
7686 }
ebfd146a 7687 }
ebfd146a 7688
59fd17e3
RB
7689 /* 4. Handle invariant-load. */
7690 if (inv_p && !bb_vinfo)
7691 {
59fd17e3 7692 gcc_assert (!grouped_load);
d1417442
JJ
7693 /* If we have versioned for aliasing or the loop doesn't
7694 have any data dependencies that would preclude this,
7695 then we are sure this is a loop invariant load and
7696 thus we can insert it on the preheader edge. */
7697 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7698 && !nested_in_vect_loop
6b916b36 7699 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7700 {
7701 if (dump_enabled_p ())
7702 {
7703 dump_printf_loc (MSG_NOTE, vect_location,
7704 "hoisting out of the vectorized "
7705 "loop: ");
7706 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7707 }
b731b390 7708 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7709 gsi_insert_on_edge_immediate
7710 (loop_preheader_edge (loop),
7711 gimple_build_assign (tem,
7712 unshare_expr
7713 (gimple_assign_rhs1 (stmt))));
7714 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7715 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7716 set_vinfo_for_stmt (new_stmt,
7717 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7718 }
7719 else
7720 {
7721 gimple_stmt_iterator gsi2 = *gsi;
7722 gsi_next (&gsi2);
7723 new_temp = vect_init_vector (stmt, scalar_dest,
7724 vectype, &gsi2);
34cd48e5 7725 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7726 }
59fd17e3
RB
7727 }
7728
62da9e14 7729 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7730 {
aec7ae7d
JJ
7731 tree perm_mask = perm_mask_for_reverse (vectype);
7732 new_temp = permute_vec_elements (new_temp, new_temp,
7733 perm_mask, stmt, gsi);
ebfd146a
IR
7734 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7735 }
267d3070 7736
272c6793 7737 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7738 vect_transform_grouped_load (). */
7739 if (grouped_load || slp_perm)
9771b263 7740 dr_chain.quick_push (new_temp);
267d3070 7741
272c6793
RS
7742 /* Store vector loads in the corresponding SLP_NODE. */
7743 if (slp && !slp_perm)
9771b263 7744 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7745
7746 /* With SLP permutation we load the gaps as well, without
7747 we need to skip the gaps after we manage to fully load
7748 all elements. group_gap_adj is GROUP_SIZE here. */
7749 group_elt += nunits;
7750 if (group_gap_adj != 0 && ! slp_perm
7751 && group_elt == group_size - group_gap_adj)
7752 {
8e6cdc90
RS
7753 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7754 * group_gap_adj);
7755 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7756 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7757 stmt, bump);
7758 group_elt = 0;
7759 }
272c6793 7760 }
9b999e8c
RB
7761 /* Bump the vector pointer to account for a gap or for excess
7762 elements loaded for a permuted SLP load. */
b267968e 7763 if (group_gap_adj != 0 && slp_perm)
a64b9c26 7764 {
8e6cdc90
RS
7765 wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7766 * group_gap_adj);
7767 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7768 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7769 stmt, bump);
7770 }
ebfd146a
IR
7771 }
7772
7773 if (slp && !slp_perm)
7774 continue;
7775
7776 if (slp_perm)
7777 {
29afecdf 7778 unsigned n_perms;
01d8bf07 7779 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7780 slp_node_instance, false,
7781 &n_perms))
ebfd146a 7782 {
9771b263 7783 dr_chain.release ();
ebfd146a
IR
7784 return false;
7785 }
7786 }
7787 else
7788 {
0d0293ac 7789 if (grouped_load)
ebfd146a 7790 {
2de001ee 7791 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7792 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7793 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7794 }
7795 else
7796 {
7797 if (j == 0)
7798 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7799 else
7800 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7801 prev_stmt_info = vinfo_for_stmt (new_stmt);
7802 }
7803 }
9771b263 7804 dr_chain.release ();
ebfd146a
IR
7805 }
7806
ebfd146a
IR
7807 return true;
7808}
7809
7810/* Function vect_is_simple_cond.
b8698a0f 7811
ebfd146a
IR
7812 Input:
7813 LOOP - the loop that is being vectorized.
7814 COND - Condition that is checked for simple use.
7815
e9e1d143
RG
7816 Output:
7817 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7818 *DTS - The def types for the arguments of the comparison
e9e1d143 7819
ebfd146a
IR
7820 Returns whether a COND can be vectorized. Checks whether
7821 condition operands are supportable using vec_is_simple_use. */
7822
87aab9b2 7823static bool
4fc5ebf1 7824vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7825 tree *comp_vectype, enum vect_def_type *dts,
7826 tree vectype)
ebfd146a
IR
7827{
7828 tree lhs, rhs;
e9e1d143 7829 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7830
a414c77f
IE
7831 /* Mask case. */
7832 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7833 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7834 {
7835 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7836 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 7837 &dts[0], comp_vectype)
a414c77f
IE
7838 || !*comp_vectype
7839 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7840 return false;
7841 return true;
7842 }
7843
ebfd146a
IR
7844 if (!COMPARISON_CLASS_P (cond))
7845 return false;
7846
7847 lhs = TREE_OPERAND (cond, 0);
7848 rhs = TREE_OPERAND (cond, 1);
7849
7850 if (TREE_CODE (lhs) == SSA_NAME)
7851 {
355fe088 7852 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 7853 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
7854 return false;
7855 }
4fc5ebf1
JG
7856 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7857 || TREE_CODE (lhs) == FIXED_CST)
7858 dts[0] = vect_constant_def;
7859 else
ebfd146a
IR
7860 return false;
7861
7862 if (TREE_CODE (rhs) == SSA_NAME)
7863 {
355fe088 7864 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 7865 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
7866 return false;
7867 }
4fc5ebf1
JG
7868 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7869 || TREE_CODE (rhs) == FIXED_CST)
7870 dts[1] = vect_constant_def;
7871 else
ebfd146a
IR
7872 return false;
7873
28b33016
IE
7874 if (vectype1 && vectype2
7875 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7876 return false;
7877
e9e1d143 7878 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
7879 /* Invariant comparison. */
7880 if (! *comp_vectype)
7881 {
7882 tree scalar_type = TREE_TYPE (lhs);
7883 /* If we can widen the comparison to match vectype do so. */
7884 if (INTEGRAL_TYPE_P (scalar_type)
7885 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7886 TYPE_SIZE (TREE_TYPE (vectype))))
7887 scalar_type = build_nonstandard_integer_type
7888 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7889 TYPE_UNSIGNED (scalar_type));
7890 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7891 }
7892
ebfd146a
IR
7893 return true;
7894}
7895
7896/* vectorizable_condition.
7897
b8698a0f
L
7898 Check if STMT is conditional modify expression that can be vectorized.
7899 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7900 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7901 at GSI.
7902
7903 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7904 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7905 else clause if it is 2).
ebfd146a
IR
7906
7907 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7908
4bbe8262 7909bool
355fe088
TS
7910vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7911 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7912 slp_tree slp_node)
ebfd146a
IR
7913{
7914 tree scalar_dest = NULL_TREE;
7915 tree vec_dest = NULL_TREE;
01216d27
JJ
7916 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7917 tree then_clause, else_clause;
ebfd146a 7918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7919 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7920 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7921 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7922 tree vec_compare;
ebfd146a
IR
7923 tree new_temp;
7924 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
7925 enum vect_def_type dts[4]
7926 = {vect_unknown_def_type, vect_unknown_def_type,
7927 vect_unknown_def_type, vect_unknown_def_type};
7928 int ndts = 4;
f7e531cf 7929 int ncopies;
01216d27 7930 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 7931 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7932 int i, j;
7933 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7934 vec<tree> vec_oprnds0 = vNULL;
7935 vec<tree> vec_oprnds1 = vNULL;
7936 vec<tree> vec_oprnds2 = vNULL;
7937 vec<tree> vec_oprnds3 = vNULL;
74946978 7938 tree vec_cmp_type;
a414c77f 7939 bool masked = false;
b8698a0f 7940
f7e531cf
IR
7941 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7942 return false;
7943
af29617a
AH
7944 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7945 {
7946 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7947 return false;
ebfd146a 7948
af29617a
AH
7949 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7950 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7951 && reduc_def))
7952 return false;
ebfd146a 7953
af29617a
AH
7954 /* FORNOW: not yet supported. */
7955 if (STMT_VINFO_LIVE_P (stmt_info))
7956 {
7957 if (dump_enabled_p ())
7958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7959 "value used after loop.\n");
7960 return false;
7961 }
ebfd146a
IR
7962 }
7963
7964 /* Is vectorizable conditional operation? */
7965 if (!is_gimple_assign (stmt))
7966 return false;
7967
7968 code = gimple_assign_rhs_code (stmt);
7969
7970 if (code != COND_EXPR)
7971 return false;
7972
465c8c19 7973 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 7974 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 7975
fce57248 7976 if (slp_node)
465c8c19
JJ
7977 ncopies = 1;
7978 else
e8f142e2 7979 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
7980
7981 gcc_assert (ncopies >= 1);
7982 if (reduc_index && ncopies > 1)
7983 return false; /* FORNOW */
7984
4e71066d
RG
7985 cond_expr = gimple_assign_rhs1 (stmt);
7986 then_clause = gimple_assign_rhs2 (stmt);
7987 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7988
4fc5ebf1 7989 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 7990 &comp_vectype, &dts[0], vectype)
e9e1d143 7991 || !comp_vectype)
ebfd146a
IR
7992 return false;
7993
81c40241 7994 gimple *def_stmt;
4fc5ebf1 7995 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
7996 &vectype1))
7997 return false;
4fc5ebf1 7998 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 7999 &vectype2))
ebfd146a 8000 return false;
2947d3b2
IE
8001
8002 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8003 return false;
8004
8005 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8006 return false;
8007
28b33016
IE
8008 masked = !COMPARISON_CLASS_P (cond_expr);
8009 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8010
74946978
MP
8011 if (vec_cmp_type == NULL_TREE)
8012 return false;
784fb9b3 8013
01216d27
JJ
8014 cond_code = TREE_CODE (cond_expr);
8015 if (!masked)
8016 {
8017 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8018 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8019 }
8020
8021 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8022 {
8023 /* Boolean values may have another representation in vectors
8024 and therefore we prefer bit operations over comparison for
8025 them (which also works for scalar masks). We store opcodes
8026 to use in bitop1 and bitop2. Statement is vectorized as
8027 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8028 depending on bitop1 and bitop2 arity. */
8029 switch (cond_code)
8030 {
8031 case GT_EXPR:
8032 bitop1 = BIT_NOT_EXPR;
8033 bitop2 = BIT_AND_EXPR;
8034 break;
8035 case GE_EXPR:
8036 bitop1 = BIT_NOT_EXPR;
8037 bitop2 = BIT_IOR_EXPR;
8038 break;
8039 case LT_EXPR:
8040 bitop1 = BIT_NOT_EXPR;
8041 bitop2 = BIT_AND_EXPR;
8042 std::swap (cond_expr0, cond_expr1);
8043 break;
8044 case LE_EXPR:
8045 bitop1 = BIT_NOT_EXPR;
8046 bitop2 = BIT_IOR_EXPR;
8047 std::swap (cond_expr0, cond_expr1);
8048 break;
8049 case NE_EXPR:
8050 bitop1 = BIT_XOR_EXPR;
8051 break;
8052 case EQ_EXPR:
8053 bitop1 = BIT_XOR_EXPR;
8054 bitop2 = BIT_NOT_EXPR;
8055 break;
8056 default:
8057 return false;
8058 }
8059 cond_code = SSA_NAME;
8060 }
8061
b8698a0f 8062 if (!vec_stmt)
ebfd146a
IR
8063 {
8064 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8065 if (bitop1 != NOP_EXPR)
8066 {
8067 machine_mode mode = TYPE_MODE (comp_vectype);
8068 optab optab;
8069
8070 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8071 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8072 return false;
8073
8074 if (bitop2 != NOP_EXPR)
8075 {
8076 optab = optab_for_tree_code (bitop2, comp_vectype,
8077 optab_default);
8078 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8079 return false;
8080 }
8081 }
4fc5ebf1
JG
8082 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8083 cond_code))
8084 {
8085 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8086 return true;
8087 }
8088 return false;
ebfd146a
IR
8089 }
8090
f7e531cf
IR
8091 /* Transform. */
8092
8093 if (!slp_node)
8094 {
9771b263
DN
8095 vec_oprnds0.create (1);
8096 vec_oprnds1.create (1);
8097 vec_oprnds2.create (1);
8098 vec_oprnds3.create (1);
f7e531cf 8099 }
ebfd146a
IR
8100
8101 /* Handle def. */
8102 scalar_dest = gimple_assign_lhs (stmt);
8103 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8104
8105 /* Handle cond expr. */
a855b1b1
MM
8106 for (j = 0; j < ncopies; j++)
8107 {
538dd0b7 8108 gassign *new_stmt = NULL;
a855b1b1
MM
8109 if (j == 0)
8110 {
f7e531cf
IR
8111 if (slp_node)
8112 {
00f96dc9
TS
8113 auto_vec<tree, 4> ops;
8114 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8115
a414c77f 8116 if (masked)
01216d27 8117 ops.safe_push (cond_expr);
a414c77f
IE
8118 else
8119 {
01216d27
JJ
8120 ops.safe_push (cond_expr0);
8121 ops.safe_push (cond_expr1);
a414c77f 8122 }
9771b263
DN
8123 ops.safe_push (then_clause);
8124 ops.safe_push (else_clause);
306b0c92 8125 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8126 vec_oprnds3 = vec_defs.pop ();
8127 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8128 if (!masked)
8129 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8130 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8131 }
8132 else
8133 {
355fe088 8134 gimple *gtemp;
a414c77f
IE
8135 if (masked)
8136 {
8137 vec_cond_lhs
8138 = vect_get_vec_def_for_operand (cond_expr, stmt,
8139 comp_vectype);
8140 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8141 &gtemp, &dts[0]);
8142 }
8143 else
8144 {
01216d27
JJ
8145 vec_cond_lhs
8146 = vect_get_vec_def_for_operand (cond_expr0,
8147 stmt, comp_vectype);
8148 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8149
8150 vec_cond_rhs
8151 = vect_get_vec_def_for_operand (cond_expr1,
8152 stmt, comp_vectype);
8153 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8154 }
f7e531cf
IR
8155 if (reduc_index == 1)
8156 vec_then_clause = reduc_def;
8157 else
8158 {
8159 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8160 stmt);
8161 vect_is_simple_use (then_clause, loop_vinfo,
8162 &gtemp, &dts[2]);
f7e531cf
IR
8163 }
8164 if (reduc_index == 2)
8165 vec_else_clause = reduc_def;
8166 else
8167 {
8168 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8169 stmt);
8170 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8171 }
a855b1b1
MM
8172 }
8173 }
8174 else
8175 {
a414c77f
IE
8176 vec_cond_lhs
8177 = vect_get_vec_def_for_stmt_copy (dts[0],
8178 vec_oprnds0.pop ());
8179 if (!masked)
8180 vec_cond_rhs
8181 = vect_get_vec_def_for_stmt_copy (dts[1],
8182 vec_oprnds1.pop ());
8183
a855b1b1 8184 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8185 vec_oprnds2.pop ());
a855b1b1 8186 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8187 vec_oprnds3.pop ());
f7e531cf
IR
8188 }
8189
8190 if (!slp_node)
8191 {
9771b263 8192 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8193 if (!masked)
8194 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8195 vec_oprnds2.quick_push (vec_then_clause);
8196 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8197 }
8198
9dc3f7de 8199 /* Arguments are ready. Create the new vector stmt. */
9771b263 8200 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8201 {
9771b263
DN
8202 vec_then_clause = vec_oprnds2[i];
8203 vec_else_clause = vec_oprnds3[i];
a855b1b1 8204
a414c77f
IE
8205 if (masked)
8206 vec_compare = vec_cond_lhs;
8207 else
8208 {
8209 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8210 if (bitop1 == NOP_EXPR)
8211 vec_compare = build2 (cond_code, vec_cmp_type,
8212 vec_cond_lhs, vec_cond_rhs);
8213 else
8214 {
8215 new_temp = make_ssa_name (vec_cmp_type);
8216 if (bitop1 == BIT_NOT_EXPR)
8217 new_stmt = gimple_build_assign (new_temp, bitop1,
8218 vec_cond_rhs);
8219 else
8220 new_stmt
8221 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8222 vec_cond_rhs);
8223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8224 if (bitop2 == NOP_EXPR)
8225 vec_compare = new_temp;
8226 else if (bitop2 == BIT_NOT_EXPR)
8227 {
8228 /* Instead of doing ~x ? y : z do x ? z : y. */
8229 vec_compare = new_temp;
8230 std::swap (vec_then_clause, vec_else_clause);
8231 }
8232 else
8233 {
8234 vec_compare = make_ssa_name (vec_cmp_type);
8235 new_stmt
8236 = gimple_build_assign (vec_compare, bitop2,
8237 vec_cond_lhs, new_temp);
8238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8239 }
8240 }
a414c77f 8241 }
5958f9e2
JJ
8242 new_temp = make_ssa_name (vec_dest);
8243 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8244 vec_compare, vec_then_clause,
8245 vec_else_clause);
f7e531cf
IR
8246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8247 if (slp_node)
9771b263 8248 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8249 }
8250
8251 if (slp_node)
8252 continue;
8253
8254 if (j == 0)
8255 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8256 else
8257 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8258
8259 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8260 }
b8698a0f 8261
9771b263
DN
8262 vec_oprnds0.release ();
8263 vec_oprnds1.release ();
8264 vec_oprnds2.release ();
8265 vec_oprnds3.release ();
f7e531cf 8266
ebfd146a
IR
8267 return true;
8268}
8269
42fd8198
IE
8270/* vectorizable_comparison.
8271
8272 Check if STMT is comparison expression that can be vectorized.
8273 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8274 comparison, put it in VEC_STMT, and insert it at GSI.
8275
8276 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8277
fce57248 8278static bool
42fd8198
IE
8279vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8280 gimple **vec_stmt, tree reduc_def,
8281 slp_tree slp_node)
8282{
8283 tree lhs, rhs1, rhs2;
8284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8285 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8286 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8287 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8288 tree new_temp;
8289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8290 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8291 int ndts = 2;
42fd8198
IE
8292 unsigned nunits;
8293 int ncopies;
49e76ff1 8294 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8295 stmt_vec_info prev_stmt_info = NULL;
8296 int i, j;
8297 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8298 vec<tree> vec_oprnds0 = vNULL;
8299 vec<tree> vec_oprnds1 = vNULL;
8300 gimple *def_stmt;
8301 tree mask_type;
8302 tree mask;
8303
c245362b
IE
8304 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8305 return false;
8306
30480bcd 8307 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8308 return false;
8309
8310 mask_type = vectype;
8311 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8312
fce57248 8313 if (slp_node)
42fd8198
IE
8314 ncopies = 1;
8315 else
e8f142e2 8316 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8317
8318 gcc_assert (ncopies >= 1);
42fd8198
IE
8319 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8320 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8321 && reduc_def))
8322 return false;
8323
8324 if (STMT_VINFO_LIVE_P (stmt_info))
8325 {
8326 if (dump_enabled_p ())
8327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8328 "value used after loop.\n");
8329 return false;
8330 }
8331
8332 if (!is_gimple_assign (stmt))
8333 return false;
8334
8335 code = gimple_assign_rhs_code (stmt);
8336
8337 if (TREE_CODE_CLASS (code) != tcc_comparison)
8338 return false;
8339
8340 rhs1 = gimple_assign_rhs1 (stmt);
8341 rhs2 = gimple_assign_rhs2 (stmt);
8342
8343 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8344 &dts[0], &vectype1))
8345 return false;
8346
8347 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8348 &dts[1], &vectype2))
8349 return false;
8350
8351 if (vectype1 && vectype2
8352 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8353 return false;
8354
8355 vectype = vectype1 ? vectype1 : vectype2;
8356
8357 /* Invariant comparison. */
8358 if (!vectype)
8359 {
69a9a66f
RB
8360 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8361 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
42fd8198
IE
8362 return false;
8363 }
8364 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8365 return false;
8366
49e76ff1
IE
8367 /* Can't compare mask and non-mask types. */
8368 if (vectype1 && vectype2
8369 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8370 return false;
8371
8372 /* Boolean values may have another representation in vectors
8373 and therefore we prefer bit operations over comparison for
8374 them (which also works for scalar masks). We store opcodes
8375 to use in bitop1 and bitop2. Statement is vectorized as
8376 BITOP2 (rhs1 BITOP1 rhs2) or
8377 rhs1 BITOP2 (BITOP1 rhs2)
8378 depending on bitop1 and bitop2 arity. */
8379 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8380 {
8381 if (code == GT_EXPR)
8382 {
8383 bitop1 = BIT_NOT_EXPR;
8384 bitop2 = BIT_AND_EXPR;
8385 }
8386 else if (code == GE_EXPR)
8387 {
8388 bitop1 = BIT_NOT_EXPR;
8389 bitop2 = BIT_IOR_EXPR;
8390 }
8391 else if (code == LT_EXPR)
8392 {
8393 bitop1 = BIT_NOT_EXPR;
8394 bitop2 = BIT_AND_EXPR;
8395 std::swap (rhs1, rhs2);
264d951a 8396 std::swap (dts[0], dts[1]);
49e76ff1
IE
8397 }
8398 else if (code == LE_EXPR)
8399 {
8400 bitop1 = BIT_NOT_EXPR;
8401 bitop2 = BIT_IOR_EXPR;
8402 std::swap (rhs1, rhs2);
264d951a 8403 std::swap (dts[0], dts[1]);
49e76ff1
IE
8404 }
8405 else
8406 {
8407 bitop1 = BIT_XOR_EXPR;
8408 if (code == EQ_EXPR)
8409 bitop2 = BIT_NOT_EXPR;
8410 }
8411 }
8412
42fd8198
IE
8413 if (!vec_stmt)
8414 {
8415 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8416 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8417 dts, ndts, NULL, NULL);
49e76ff1 8418 if (bitop1 == NOP_EXPR)
96592eed 8419 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8420 else
8421 {
8422 machine_mode mode = TYPE_MODE (vectype);
8423 optab optab;
8424
8425 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8426 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8427 return false;
8428
8429 if (bitop2 != NOP_EXPR)
8430 {
8431 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8432 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8433 return false;
8434 }
8435 return true;
8436 }
42fd8198
IE
8437 }
8438
8439 /* Transform. */
8440 if (!slp_node)
8441 {
8442 vec_oprnds0.create (1);
8443 vec_oprnds1.create (1);
8444 }
8445
8446 /* Handle def. */
8447 lhs = gimple_assign_lhs (stmt);
8448 mask = vect_create_destination_var (lhs, mask_type);
8449
8450 /* Handle cmp expr. */
8451 for (j = 0; j < ncopies; j++)
8452 {
8453 gassign *new_stmt = NULL;
8454 if (j == 0)
8455 {
8456 if (slp_node)
8457 {
8458 auto_vec<tree, 2> ops;
8459 auto_vec<vec<tree>, 2> vec_defs;
8460
8461 ops.safe_push (rhs1);
8462 ops.safe_push (rhs2);
306b0c92 8463 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8464 vec_oprnds1 = vec_defs.pop ();
8465 vec_oprnds0 = vec_defs.pop ();
8466 }
8467 else
8468 {
e4af0bc4
IE
8469 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8470 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8471 }
8472 }
8473 else
8474 {
8475 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8476 vec_oprnds0.pop ());
8477 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8478 vec_oprnds1.pop ());
8479 }
8480
8481 if (!slp_node)
8482 {
8483 vec_oprnds0.quick_push (vec_rhs1);
8484 vec_oprnds1.quick_push (vec_rhs2);
8485 }
8486
8487 /* Arguments are ready. Create the new vector stmt. */
8488 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8489 {
8490 vec_rhs2 = vec_oprnds1[i];
8491
8492 new_temp = make_ssa_name (mask);
49e76ff1
IE
8493 if (bitop1 == NOP_EXPR)
8494 {
8495 new_stmt = gimple_build_assign (new_temp, code,
8496 vec_rhs1, vec_rhs2);
8497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8498 }
8499 else
8500 {
8501 if (bitop1 == BIT_NOT_EXPR)
8502 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8503 else
8504 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8505 vec_rhs2);
8506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8507 if (bitop2 != NOP_EXPR)
8508 {
8509 tree res = make_ssa_name (mask);
8510 if (bitop2 == BIT_NOT_EXPR)
8511 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8512 else
8513 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8514 new_temp);
8515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8516 }
8517 }
42fd8198
IE
8518 if (slp_node)
8519 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8520 }
8521
8522 if (slp_node)
8523 continue;
8524
8525 if (j == 0)
8526 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8527 else
8528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8529
8530 prev_stmt_info = vinfo_for_stmt (new_stmt);
8531 }
8532
8533 vec_oprnds0.release ();
8534 vec_oprnds1.release ();
8535
8536 return true;
8537}
ebfd146a 8538
68a0f2ff
RS
8539/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8540 can handle all live statements in the node. Otherwise return true
8541 if STMT is not live or if vectorizable_live_operation can handle it.
8542 GSI and VEC_STMT are as for vectorizable_live_operation. */
8543
8544static bool
8545can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8546 slp_tree slp_node, gimple **vec_stmt)
8547{
8548 if (slp_node)
8549 {
8550 gimple *slp_stmt;
8551 unsigned int i;
8552 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8553 {
8554 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8555 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8556 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8557 vec_stmt))
8558 return false;
8559 }
8560 }
8561 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8562 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8563 return false;
8564
8565 return true;
8566}
8567
8644a673 8568/* Make sure the statement is vectorizable. */
ebfd146a
IR
8569
8570bool
891ad31c
RB
8571vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8572 slp_instance node_instance)
ebfd146a 8573{
8644a673 8574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8575 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8576 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8577 bool ok;
355fe088 8578 gimple *pattern_stmt;
363477c0 8579 gimple_seq pattern_def_seq;
ebfd146a 8580
73fbfcad 8581 if (dump_enabled_p ())
ebfd146a 8582 {
78c60e3d
SS
8583 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8584 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8585 }
ebfd146a 8586
1825a1f3 8587 if (gimple_has_volatile_ops (stmt))
b8698a0f 8588 {
73fbfcad 8589 if (dump_enabled_p ())
78c60e3d 8590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8591 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8592
8593 return false;
8594 }
b8698a0f
L
8595
8596 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8597 to include:
8598 - the COND_EXPR which is the loop exit condition
8599 - any LABEL_EXPRs in the loop
b8698a0f 8600 - computations that are used only for array indexing or loop control.
8644a673 8601 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8602 instance, therefore, all the statements are relevant.
ebfd146a 8603
d092494c 8604 Pattern statement needs to be analyzed instead of the original statement
83197f37 8605 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8606 statements. In basic blocks we are called from some SLP instance
8607 traversal, don't analyze pattern stmts instead, the pattern stmts
8608 already will be part of SLP instance. */
83197f37
IR
8609
8610 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8611 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8612 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8613 {
9d5e7640 8614 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8615 && pattern_stmt
9d5e7640
IR
8616 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8617 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8618 {
83197f37 8619 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8620 stmt = pattern_stmt;
8621 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8622 if (dump_enabled_p ())
9d5e7640 8623 {
78c60e3d
SS
8624 dump_printf_loc (MSG_NOTE, vect_location,
8625 "==> examining pattern statement: ");
8626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8627 }
8628 }
8629 else
8630 {
73fbfcad 8631 if (dump_enabled_p ())
e645e942 8632 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8633
9d5e7640
IR
8634 return true;
8635 }
8644a673 8636 }
83197f37 8637 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8638 && node == NULL
83197f37
IR
8639 && pattern_stmt
8640 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8641 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8642 {
8643 /* Analyze PATTERN_STMT too. */
73fbfcad 8644 if (dump_enabled_p ())
83197f37 8645 {
78c60e3d
SS
8646 dump_printf_loc (MSG_NOTE, vect_location,
8647 "==> examining pattern statement: ");
8648 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8649 }
8650
891ad31c
RB
8651 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8652 node_instance))
83197f37
IR
8653 return false;
8654 }
ebfd146a 8655
1107f3ae 8656 if (is_pattern_stmt_p (stmt_info)
079c527f 8657 && node == NULL
363477c0 8658 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8659 {
363477c0 8660 gimple_stmt_iterator si;
1107f3ae 8661
363477c0
JJ
8662 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8663 {
355fe088 8664 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8665 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8666 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8667 {
8668 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8669 if (dump_enabled_p ())
363477c0 8670 {
78c60e3d
SS
8671 dump_printf_loc (MSG_NOTE, vect_location,
8672 "==> examining pattern def statement: ");
8673 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8674 }
1107f3ae 8675
363477c0 8676 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8677 need_to_vectorize, node, node_instance))
363477c0
JJ
8678 return false;
8679 }
8680 }
8681 }
1107f3ae 8682
8644a673
IR
8683 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8684 {
8685 case vect_internal_def:
8686 break;
ebfd146a 8687
8644a673 8688 case vect_reduction_def:
7c5222ff 8689 case vect_nested_cycle:
14a61437
RB
8690 gcc_assert (!bb_vinfo
8691 && (relevance == vect_used_in_outer
8692 || relevance == vect_used_in_outer_by_reduction
8693 || relevance == vect_used_by_reduction
b28ead45
AH
8694 || relevance == vect_unused_in_scope
8695 || relevance == vect_used_only_live));
8644a673
IR
8696 break;
8697
8698 case vect_induction_def:
e7baeb39
RB
8699 gcc_assert (!bb_vinfo);
8700 break;
8701
8644a673
IR
8702 case vect_constant_def:
8703 case vect_external_def:
8704 case vect_unknown_def_type:
8705 default:
8706 gcc_unreachable ();
8707 }
ebfd146a 8708
8644a673 8709 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8710 {
8644a673 8711 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8712 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8713 || (is_gimple_call (stmt)
8714 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8715 *need_to_vectorize = true;
ebfd146a
IR
8716 }
8717
b1af7da6
RB
8718 if (PURE_SLP_STMT (stmt_info) && !node)
8719 {
8720 dump_printf_loc (MSG_NOTE, vect_location,
8721 "handled only by SLP analysis\n");
8722 return true;
8723 }
8724
8725 ok = true;
8726 if (!bb_vinfo
8727 && (STMT_VINFO_RELEVANT_P (stmt_info)
8728 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8729 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8730 || vectorizable_conversion (stmt, NULL, NULL, node)
8731 || vectorizable_shift (stmt, NULL, NULL, node)
8732 || vectorizable_operation (stmt, NULL, NULL, node)
8733 || vectorizable_assignment (stmt, NULL, NULL, node)
8734 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8735 || vectorizable_call (stmt, NULL, NULL, node)
8736 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8737 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8738 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8739 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8740 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8741 else
8742 {
8743 if (bb_vinfo)
8744 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8745 || vectorizable_conversion (stmt, NULL, NULL, node)
8746 || vectorizable_shift (stmt, NULL, NULL, node)
8747 || vectorizable_operation (stmt, NULL, NULL, node)
8748 || vectorizable_assignment (stmt, NULL, NULL, node)
8749 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8750 || vectorizable_call (stmt, NULL, NULL, node)
8751 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8752 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8753 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8754 }
8644a673
IR
8755
8756 if (!ok)
ebfd146a 8757 {
73fbfcad 8758 if (dump_enabled_p ())
8644a673 8759 {
78c60e3d
SS
8760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8761 "not vectorized: relevant stmt not ");
8762 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8763 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8764 }
b8698a0f 8765
ebfd146a
IR
8766 return false;
8767 }
8768
a70d6342
IR
8769 if (bb_vinfo)
8770 return true;
8771
8644a673
IR
8772 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8773 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8774 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8775 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8776 {
73fbfcad 8777 if (dump_enabled_p ())
8644a673 8778 {
78c60e3d 8779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8780 "not vectorized: live stmt not supported: ");
78c60e3d 8781 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8782 }
b8698a0f 8783
8644a673 8784 return false;
ebfd146a
IR
8785 }
8786
ebfd146a
IR
8787 return true;
8788}
8789
8790
8791/* Function vect_transform_stmt.
8792
8793 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8794
8795bool
355fe088 8796vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8797 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8798 slp_instance slp_node_instance)
8799{
8800 bool is_store = false;
355fe088 8801 gimple *vec_stmt = NULL;
ebfd146a 8802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8803 bool done;
ebfd146a 8804
fce57248 8805 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8806 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8807
ebfd146a
IR
8808 switch (STMT_VINFO_TYPE (stmt_info))
8809 {
8810 case type_demotion_vec_info_type:
ebfd146a 8811 case type_promotion_vec_info_type:
ebfd146a
IR
8812 case type_conversion_vec_info_type:
8813 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8814 gcc_assert (done);
8815 break;
8816
8817 case induc_vec_info_type:
e7baeb39 8818 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8819 gcc_assert (done);
8820 break;
8821
9dc3f7de
IR
8822 case shift_vec_info_type:
8823 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8824 gcc_assert (done);
8825 break;
8826
ebfd146a
IR
8827 case op_vec_info_type:
8828 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8829 gcc_assert (done);
8830 break;
8831
8832 case assignment_vec_info_type:
8833 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8834 gcc_assert (done);
8835 break;
8836
8837 case load_vec_info_type:
b8698a0f 8838 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8839 slp_node_instance);
8840 gcc_assert (done);
8841 break;
8842
8843 case store_vec_info_type:
8844 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8845 gcc_assert (done);
0d0293ac 8846 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8847 {
8848 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8849 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8850 one are skipped, and there vec_stmt_info shouldn't be freed
8851 meanwhile. */
0d0293ac 8852 *grouped_store = true;
ebfd146a
IR
8853 if (STMT_VINFO_VEC_STMT (stmt_info))
8854 is_store = true;
8855 }
8856 else
8857 is_store = true;
8858 break;
8859
8860 case condition_vec_info_type:
f7e531cf 8861 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8862 gcc_assert (done);
8863 break;
8864
42fd8198
IE
8865 case comparison_vec_info_type:
8866 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8867 gcc_assert (done);
8868 break;
8869
ebfd146a 8870 case call_vec_info_type:
190c2236 8871 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8872 stmt = gsi_stmt (*gsi);
8e4284d0 8873 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
5ce9450f 8874 is_store = true;
ebfd146a
IR
8875 break;
8876
0136f8f0
AH
8877 case call_simd_clone_vec_info_type:
8878 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8879 stmt = gsi_stmt (*gsi);
8880 break;
8881
ebfd146a 8882 case reduc_vec_info_type:
891ad31c
RB
8883 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8884 slp_node_instance);
ebfd146a
IR
8885 gcc_assert (done);
8886 break;
8887
8888 default:
8889 if (!STMT_VINFO_LIVE_P (stmt_info))
8890 {
73fbfcad 8891 if (dump_enabled_p ())
78c60e3d 8892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8893 "stmt not supported.\n");
ebfd146a
IR
8894 gcc_unreachable ();
8895 }
8896 }
8897
225ce44b
RB
8898 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8899 This would break hybrid SLP vectorization. */
8900 if (slp_node)
d90f8440
RB
8901 gcc_assert (!vec_stmt
8902 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8903
ebfd146a
IR
8904 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8905 is being vectorized, but outside the immediately enclosing loop. */
8906 if (vec_stmt
a70d6342
IR
8907 && STMT_VINFO_LOOP_VINFO (stmt_info)
8908 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8909 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8910 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8911 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8912 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8913 vect_used_in_outer_by_reduction))
ebfd146a 8914 {
a70d6342
IR
8915 struct loop *innerloop = LOOP_VINFO_LOOP (
8916 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8917 imm_use_iterator imm_iter;
8918 use_operand_p use_p;
8919 tree scalar_dest;
355fe088 8920 gimple *exit_phi;
ebfd146a 8921
73fbfcad 8922 if (dump_enabled_p ())
78c60e3d 8923 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8924 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8925
8926 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8927 (to be used when vectorizing outer-loop stmts that use the DEF of
8928 STMT). */
8929 if (gimple_code (stmt) == GIMPLE_PHI)
8930 scalar_dest = PHI_RESULT (stmt);
8931 else
8932 scalar_dest = gimple_assign_lhs (stmt);
8933
8934 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8935 {
8936 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8937 {
8938 exit_phi = USE_STMT (use_p);
8939 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8940 }
8941 }
8942 }
8943
8944 /* Handle stmts whose DEF is used outside the loop-nest that is
8945 being vectorized. */
68a0f2ff 8946 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 8947 {
68a0f2ff 8948 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
8949 gcc_assert (done);
8950 }
8951
8952 if (vec_stmt)
83197f37 8953 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8954
b8698a0f 8955 return is_store;
ebfd146a
IR
8956}
8957
8958
b8698a0f 8959/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8960 stmt_vec_info. */
8961
8962void
355fe088 8963vect_remove_stores (gimple *first_stmt)
ebfd146a 8964{
355fe088
TS
8965 gimple *next = first_stmt;
8966 gimple *tmp;
ebfd146a
IR
8967 gimple_stmt_iterator next_si;
8968
8969 while (next)
8970 {
78048b1c
JJ
8971 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8972
8973 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8974 if (is_pattern_stmt_p (stmt_info))
8975 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8976 /* Free the attached stmt_vec_info and remove the stmt. */
8977 next_si = gsi_for_stmt (next);
3d3f2249 8978 unlink_stmt_vdef (next);
ebfd146a 8979 gsi_remove (&next_si, true);
3d3f2249 8980 release_defs (next);
ebfd146a
IR
8981 free_stmt_vec_info (next);
8982 next = tmp;
8983 }
8984}
8985
8986
8987/* Function new_stmt_vec_info.
8988
8989 Create and initialize a new stmt_vec_info struct for STMT. */
8990
8991stmt_vec_info
310213d4 8992new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
8993{
8994 stmt_vec_info res;
8995 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8996
8997 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8998 STMT_VINFO_STMT (res) = stmt;
310213d4 8999 res->vinfo = vinfo;
8644a673 9000 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9001 STMT_VINFO_LIVE_P (res) = false;
9002 STMT_VINFO_VECTYPE (res) = NULL;
9003 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9004 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9005 STMT_VINFO_IN_PATTERN_P (res) = false;
9006 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9007 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9008 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9009 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9010 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9011
ebfd146a
IR
9012 if (gimple_code (stmt) == GIMPLE_PHI
9013 && is_loop_header_bb_p (gimple_bb (stmt)))
9014 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9015 else
8644a673
IR
9016 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9017
9771b263 9018 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9019 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9020 STMT_VINFO_NUM_SLP_USES (res) = 0;
9021
e14c1050
IR
9022 GROUP_FIRST_ELEMENT (res) = NULL;
9023 GROUP_NEXT_ELEMENT (res) = NULL;
9024 GROUP_SIZE (res) = 0;
9025 GROUP_STORE_COUNT (res) = 0;
9026 GROUP_GAP (res) = 0;
9027 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9028
9029 return res;
9030}
9031
9032
9033/* Create a hash table for stmt_vec_info. */
9034
9035void
9036init_stmt_vec_info_vec (void)
9037{
9771b263
DN
9038 gcc_assert (!stmt_vec_info_vec.exists ());
9039 stmt_vec_info_vec.create (50);
ebfd146a
IR
9040}
9041
9042
9043/* Free hash table for stmt_vec_info. */
9044
9045void
9046free_stmt_vec_info_vec (void)
9047{
93675444 9048 unsigned int i;
3161455c 9049 stmt_vec_info info;
93675444
JJ
9050 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9051 if (info != NULL)
3161455c 9052 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9053 gcc_assert (stmt_vec_info_vec.exists ());
9054 stmt_vec_info_vec.release ();
ebfd146a
IR
9055}
9056
9057
9058/* Free stmt vectorization related info. */
9059
9060void
355fe088 9061free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9062{
9063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9064
9065 if (!stmt_info)
9066 return;
9067
78048b1c
JJ
9068 /* Check if this statement has a related "pattern stmt"
9069 (introduced by the vectorizer during the pattern recognition
9070 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9071 too. */
9072 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9073 {
9074 stmt_vec_info patt_info
9075 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9076 if (patt_info)
9077 {
363477c0 9078 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9079 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9080 gimple_set_bb (patt_stmt, NULL);
9081 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9082 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9083 release_ssa_name (lhs);
363477c0
JJ
9084 if (seq)
9085 {
9086 gimple_stmt_iterator si;
9087 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9088 {
355fe088 9089 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9090 gimple_set_bb (seq_stmt, NULL);
7532abf2 9091 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9092 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9093 release_ssa_name (lhs);
9094 free_stmt_vec_info (seq_stmt);
9095 }
363477c0 9096 }
f0281fde 9097 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9098 }
9099 }
9100
9771b263 9101 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9102 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9103 set_vinfo_for_stmt (stmt, NULL);
9104 free (stmt_info);
9105}
9106
9107
bb67d9c7 9108/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9109
bb67d9c7 9110 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9111 by the target. */
9112
bb67d9c7
RG
9113static tree
9114get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 9115{
c7d97b28 9116 tree orig_scalar_type = scalar_type;
3bd8f481 9117 scalar_mode inner_mode;
ef4bddc2 9118 machine_mode simd_mode;
ebfd146a
IR
9119 int nunits;
9120 tree vectype;
9121
3bd8f481
RS
9122 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9123 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9124 return NULL_TREE;
9125
3bd8f481 9126 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9127
7b7b1813
RG
9128 /* For vector types of elements whose mode precision doesn't
9129 match their types precision we use a element type of mode
9130 precision. The vectorization routines will have to make sure
48f2e373
RB
9131 they support the proper result truncation/extension.
9132 We also make sure to build vector types with INTEGER_TYPE
9133 component type only. */
6d7971b8 9134 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9135 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9136 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9137 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9138 TYPE_UNSIGNED (scalar_type));
6d7971b8 9139
ccbf5bb4
RG
9140 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9141 When the component mode passes the above test simply use a type
9142 corresponding to that mode. The theory is that any use that
9143 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9144 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9145 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9146 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9147
9148 /* We can't build a vector type of elements with alignment bigger than
9149 their size. */
dfc2e2ac 9150 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9151 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9152 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9153
dfc2e2ac
RB
9154 /* If we felt back to using the mode fail if there was
9155 no scalar type for it. */
9156 if (scalar_type == NULL_TREE)
9157 return NULL_TREE;
9158
bb67d9c7
RG
9159 /* If no size was supplied use the mode the target prefers. Otherwise
9160 lookup a vector mode of the specified size. */
9161 if (size == 0)
9162 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9da15d40
RS
9163 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9164 return NULL_TREE;
cc4b5170 9165 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
4c8fd8ac
JB
9166 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9167 if (nunits < 1)
cc4b5170 9168 return NULL_TREE;
ebfd146a
IR
9169
9170 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9171
9172 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9173 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9174 return NULL_TREE;
ebfd146a 9175
c7d97b28
RB
9176 /* Re-attach the address-space qualifier if we canonicalized the scalar
9177 type. */
9178 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9179 return build_qualified_type
9180 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9181
ebfd146a
IR
9182 return vectype;
9183}
9184
bb67d9c7
RG
9185unsigned int current_vector_size;
9186
9187/* Function get_vectype_for_scalar_type.
9188
9189 Returns the vector type corresponding to SCALAR_TYPE as supported
9190 by the target. */
9191
9192tree
9193get_vectype_for_scalar_type (tree scalar_type)
9194{
9195 tree vectype;
9196 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9197 current_vector_size);
9198 if (vectype
9199 && current_vector_size == 0)
9200 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9201 return vectype;
9202}
9203
42fd8198
IE
9204/* Function get_mask_type_for_scalar_type.
9205
9206 Returns the mask type corresponding to a result of comparison
9207 of vectors of specified SCALAR_TYPE as supported by target. */
9208
9209tree
9210get_mask_type_for_scalar_type (tree scalar_type)
9211{
9212 tree vectype = get_vectype_for_scalar_type (scalar_type);
9213
9214 if (!vectype)
9215 return NULL;
9216
9217 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9218 current_vector_size);
9219}
9220
b690cc0f
RG
9221/* Function get_same_sized_vectype
9222
9223 Returns a vector type corresponding to SCALAR_TYPE of size
9224 VECTOR_TYPE if supported by the target. */
9225
9226tree
bb67d9c7 9227get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9228{
2568d8a1 9229 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9230 return build_same_sized_truth_vector_type (vector_type);
9231
bb67d9c7
RG
9232 return get_vectype_for_scalar_type_and_size
9233 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9234}
9235
ebfd146a
IR
9236/* Function vect_is_simple_use.
9237
9238 Input:
81c40241
RB
9239 VINFO - the vect info of the loop or basic block that is being vectorized.
9240 OPERAND - operand in the loop or bb.
9241 Output:
9242 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9243 DT - the type of definition
ebfd146a
IR
9244
9245 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9246 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9247 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9248 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9249 is the case in reduction/induction computations).
9250 For basic blocks, supportable operands are constants and bb invariants.
9251 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9252
9253bool
81c40241
RB
9254vect_is_simple_use (tree operand, vec_info *vinfo,
9255 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9256{
ebfd146a 9257 *def_stmt = NULL;
3fc356dc 9258 *dt = vect_unknown_def_type;
b8698a0f 9259
73fbfcad 9260 if (dump_enabled_p ())
ebfd146a 9261 {
78c60e3d
SS
9262 dump_printf_loc (MSG_NOTE, vect_location,
9263 "vect_is_simple_use: operand ");
9264 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9265 dump_printf (MSG_NOTE, "\n");
ebfd146a 9266 }
b8698a0f 9267
b758f602 9268 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9269 {
9270 *dt = vect_constant_def;
9271 return true;
9272 }
b8698a0f 9273
ebfd146a
IR
9274 if (is_gimple_min_invariant (operand))
9275 {
8644a673 9276 *dt = vect_external_def;
ebfd146a
IR
9277 return true;
9278 }
9279
ebfd146a
IR
9280 if (TREE_CODE (operand) != SSA_NAME)
9281 {
73fbfcad 9282 if (dump_enabled_p ())
af29617a
AH
9283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9284 "not ssa-name.\n");
ebfd146a
IR
9285 return false;
9286 }
b8698a0f 9287
3fc356dc 9288 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9289 {
3fc356dc
RB
9290 *dt = vect_external_def;
9291 return true;
ebfd146a
IR
9292 }
9293
3fc356dc 9294 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9295 if (dump_enabled_p ())
ebfd146a 9296 {
78c60e3d
SS
9297 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9298 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9299 }
9300
61d371eb 9301 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9302 *dt = vect_external_def;
ebfd146a
IR
9303 else
9304 {
3fc356dc 9305 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9306 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9307 }
9308
2e8ab70c
RB
9309 if (dump_enabled_p ())
9310 {
9311 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9312 switch (*dt)
9313 {
9314 case vect_uninitialized_def:
9315 dump_printf (MSG_NOTE, "uninitialized\n");
9316 break;
9317 case vect_constant_def:
9318 dump_printf (MSG_NOTE, "constant\n");
9319 break;
9320 case vect_external_def:
9321 dump_printf (MSG_NOTE, "external\n");
9322 break;
9323 case vect_internal_def:
9324 dump_printf (MSG_NOTE, "internal\n");
9325 break;
9326 case vect_induction_def:
9327 dump_printf (MSG_NOTE, "induction\n");
9328 break;
9329 case vect_reduction_def:
9330 dump_printf (MSG_NOTE, "reduction\n");
9331 break;
9332 case vect_double_reduction_def:
9333 dump_printf (MSG_NOTE, "double reduction\n");
9334 break;
9335 case vect_nested_cycle:
9336 dump_printf (MSG_NOTE, "nested cycle\n");
9337 break;
9338 case vect_unknown_def_type:
9339 dump_printf (MSG_NOTE, "unknown\n");
9340 break;
9341 }
9342 }
9343
81c40241 9344 if (*dt == vect_unknown_def_type)
ebfd146a 9345 {
73fbfcad 9346 if (dump_enabled_p ())
78c60e3d 9347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9348 "Unsupported pattern.\n");
ebfd146a
IR
9349 return false;
9350 }
9351
ebfd146a
IR
9352 switch (gimple_code (*def_stmt))
9353 {
9354 case GIMPLE_PHI:
ebfd146a 9355 case GIMPLE_ASSIGN:
ebfd146a 9356 case GIMPLE_CALL:
81c40241 9357 break;
ebfd146a 9358 default:
73fbfcad 9359 if (dump_enabled_p ())
78c60e3d 9360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9361 "unsupported defining stmt:\n");
ebfd146a
IR
9362 return false;
9363 }
9364
9365 return true;
9366}
9367
81c40241 9368/* Function vect_is_simple_use.
b690cc0f 9369
81c40241 9370 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9371 type of OPERAND and stores it to *VECTYPE. If the definition of
9372 OPERAND is vect_uninitialized_def, vect_constant_def or
9373 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9374 is responsible to compute the best suited vector type for the
9375 scalar operand. */
9376
9377bool
81c40241
RB
9378vect_is_simple_use (tree operand, vec_info *vinfo,
9379 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9380{
81c40241 9381 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9382 return false;
9383
9384 /* Now get a vector type if the def is internal, otherwise supply
9385 NULL_TREE and leave it up to the caller to figure out a proper
9386 type for the use stmt. */
9387 if (*dt == vect_internal_def
9388 || *dt == vect_induction_def
9389 || *dt == vect_reduction_def
9390 || *dt == vect_double_reduction_def
9391 || *dt == vect_nested_cycle)
9392 {
9393 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9394
9395 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9396 && !STMT_VINFO_RELEVANT (stmt_info)
9397 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9398 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9399
b690cc0f
RG
9400 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9401 gcc_assert (*vectype != NULL_TREE);
9402 }
9403 else if (*dt == vect_uninitialized_def
9404 || *dt == vect_constant_def
9405 || *dt == vect_external_def)
9406 *vectype = NULL_TREE;
9407 else
9408 gcc_unreachable ();
9409
9410 return true;
9411}
9412
ebfd146a
IR
9413
9414/* Function supportable_widening_operation
9415
b8698a0f
L
9416 Check whether an operation represented by the code CODE is a
9417 widening operation that is supported by the target platform in
b690cc0f
RG
9418 vector form (i.e., when operating on arguments of type VECTYPE_IN
9419 producing a result of type VECTYPE_OUT).
b8698a0f 9420
ebfd146a
IR
9421 Widening operations we currently support are NOP (CONVERT), FLOAT
9422 and WIDEN_MULT. This function checks if these operations are supported
9423 by the target platform either directly (via vector tree-codes), or via
9424 target builtins.
9425
9426 Output:
b8698a0f
L
9427 - CODE1 and CODE2 are codes of vector operations to be used when
9428 vectorizing the operation, if available.
ebfd146a
IR
9429 - MULTI_STEP_CVT determines the number of required intermediate steps in
9430 case of multi-step conversion (like char->short->int - in that case
9431 MULTI_STEP_CVT will be 1).
b8698a0f
L
9432 - INTERM_TYPES contains the intermediate type required to perform the
9433 widening operation (short in the above example). */
ebfd146a
IR
9434
9435bool
355fe088 9436supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9437 tree vectype_out, tree vectype_in,
ebfd146a
IR
9438 enum tree_code *code1, enum tree_code *code2,
9439 int *multi_step_cvt,
9771b263 9440 vec<tree> *interm_types)
ebfd146a
IR
9441{
9442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9443 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9444 struct loop *vect_loop = NULL;
ef4bddc2 9445 machine_mode vec_mode;
81f40b79 9446 enum insn_code icode1, icode2;
ebfd146a 9447 optab optab1, optab2;
b690cc0f
RG
9448 tree vectype = vectype_in;
9449 tree wide_vectype = vectype_out;
ebfd146a 9450 enum tree_code c1, c2;
4a00c761
JJ
9451 int i;
9452 tree prev_type, intermediate_type;
ef4bddc2 9453 machine_mode intermediate_mode, prev_mode;
4a00c761 9454 optab optab3, optab4;
ebfd146a 9455
4a00c761 9456 *multi_step_cvt = 0;
4ef69dfc
IR
9457 if (loop_info)
9458 vect_loop = LOOP_VINFO_LOOP (loop_info);
9459
ebfd146a
IR
9460 switch (code)
9461 {
9462 case WIDEN_MULT_EXPR:
6ae6116f
RH
9463 /* The result of a vectorized widening operation usually requires
9464 two vectors (because the widened results do not fit into one vector).
9465 The generated vector results would normally be expected to be
9466 generated in the same order as in the original scalar computation,
9467 i.e. if 8 results are generated in each vector iteration, they are
9468 to be organized as follows:
9469 vect1: [res1,res2,res3,res4],
9470 vect2: [res5,res6,res7,res8].
9471
9472 However, in the special case that the result of the widening
9473 operation is used in a reduction computation only, the order doesn't
9474 matter (because when vectorizing a reduction we change the order of
9475 the computation). Some targets can take advantage of this and
9476 generate more efficient code. For example, targets like Altivec,
9477 that support widen_mult using a sequence of {mult_even,mult_odd}
9478 generate the following vectors:
9479 vect1: [res1,res3,res5,res7],
9480 vect2: [res2,res4,res6,res8].
9481
9482 When vectorizing outer-loops, we execute the inner-loop sequentially
9483 (each vectorized inner-loop iteration contributes to VF outer-loop
9484 iterations in parallel). We therefore don't allow to change the
9485 order of the computation in the inner-loop during outer-loop
9486 vectorization. */
9487 /* TODO: Another case in which order doesn't *really* matter is when we
9488 widen and then contract again, e.g. (short)((int)x * y >> 8).
9489 Normally, pack_trunc performs an even/odd permute, whereas the
9490 repack from an even/odd expansion would be an interleave, which
9491 would be significantly simpler for e.g. AVX2. */
9492 /* In any case, in order to avoid duplicating the code below, recurse
9493 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9494 are properly set up for the caller. If we fail, we'll continue with
9495 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9496 if (vect_loop
9497 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9498 && !nested_in_vect_loop_p (vect_loop, stmt)
9499 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9500 stmt, vectype_out, vectype_in,
a86ec597
RH
9501 code1, code2, multi_step_cvt,
9502 interm_types))
ebc047a2
CH
9503 {
9504 /* Elements in a vector with vect_used_by_reduction property cannot
9505 be reordered if the use chain with this property does not have the
9506 same operation. One such an example is s += a * b, where elements
9507 in a and b cannot be reordered. Here we check if the vector defined
9508 by STMT is only directly used in the reduction statement. */
9509 tree lhs = gimple_assign_lhs (stmt);
9510 use_operand_p dummy;
355fe088 9511 gimple *use_stmt;
ebc047a2
CH
9512 stmt_vec_info use_stmt_info = NULL;
9513 if (single_imm_use (lhs, &dummy, &use_stmt)
9514 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9515 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9516 return true;
9517 }
4a00c761
JJ
9518 c1 = VEC_WIDEN_MULT_LO_EXPR;
9519 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9520 break;
9521
81c40241
RB
9522 case DOT_PROD_EXPR:
9523 c1 = DOT_PROD_EXPR;
9524 c2 = DOT_PROD_EXPR;
9525 break;
9526
9527 case SAD_EXPR:
9528 c1 = SAD_EXPR;
9529 c2 = SAD_EXPR;
9530 break;
9531
6ae6116f
RH
9532 case VEC_WIDEN_MULT_EVEN_EXPR:
9533 /* Support the recursion induced just above. */
9534 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9535 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9536 break;
9537
36ba4aae 9538 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9539 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9540 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9541 break;
9542
ebfd146a 9543 CASE_CONVERT:
4a00c761
JJ
9544 c1 = VEC_UNPACK_LO_EXPR;
9545 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9546 break;
9547
9548 case FLOAT_EXPR:
4a00c761
JJ
9549 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9550 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9551 break;
9552
9553 case FIX_TRUNC_EXPR:
9554 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9555 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9556 computing the operation. */
9557 return false;
9558
9559 default:
9560 gcc_unreachable ();
9561 }
9562
6ae6116f 9563 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9564 std::swap (c1, c2);
4a00c761 9565
ebfd146a
IR
9566 if (code == FIX_TRUNC_EXPR)
9567 {
9568 /* The signedness is determined from output operand. */
b690cc0f
RG
9569 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9570 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9571 }
9572 else
9573 {
9574 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9575 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9576 }
9577
9578 if (!optab1 || !optab2)
9579 return false;
9580
9581 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9582 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9583 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9584 return false;
9585
4a00c761
JJ
9586 *code1 = c1;
9587 *code2 = c2;
9588
9589 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9590 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9591 /* For scalar masks we may have different boolean
9592 vector types having the same QImode. Thus we
9593 add additional check for elements number. */
9594 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9595 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9596 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761 9597
b8698a0f 9598 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9599 types. */
ebfd146a 9600
4a00c761
JJ
9601 prev_type = vectype;
9602 prev_mode = vec_mode;
b8698a0f 9603
4a00c761
JJ
9604 if (!CONVERT_EXPR_CODE_P (code))
9605 return false;
b8698a0f 9606
4a00c761
JJ
9607 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9608 intermediate steps in promotion sequence. We try
9609 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9610 not. */
9771b263 9611 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9612 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9613 {
9614 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9615 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9616 {
9617 intermediate_type
9618 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9619 current_vector_size);
9620 if (intermediate_mode != TYPE_MODE (intermediate_type))
9621 return false;
9622 }
9623 else
9624 intermediate_type
9625 = lang_hooks.types.type_for_mode (intermediate_mode,
9626 TYPE_UNSIGNED (prev_type));
9627
4a00c761
JJ
9628 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9629 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9630
9631 if (!optab3 || !optab4
9632 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9633 || insn_data[icode1].operand[0].mode != intermediate_mode
9634 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9635 || insn_data[icode2].operand[0].mode != intermediate_mode
9636 || ((icode1 = optab_handler (optab3, intermediate_mode))
9637 == CODE_FOR_nothing)
9638 || ((icode2 = optab_handler (optab4, intermediate_mode))
9639 == CODE_FOR_nothing))
9640 break;
ebfd146a 9641
9771b263 9642 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9643 (*multi_step_cvt)++;
9644
9645 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9646 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9647 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9648 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9649 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761
JJ
9650
9651 prev_type = intermediate_type;
9652 prev_mode = intermediate_mode;
ebfd146a
IR
9653 }
9654
9771b263 9655 interm_types->release ();
4a00c761 9656 return false;
ebfd146a
IR
9657}
9658
9659
9660/* Function supportable_narrowing_operation
9661
b8698a0f
L
9662 Check whether an operation represented by the code CODE is a
9663 narrowing operation that is supported by the target platform in
b690cc0f
RG
9664 vector form (i.e., when operating on arguments of type VECTYPE_IN
9665 and producing a result of type VECTYPE_OUT).
b8698a0f 9666
ebfd146a 9667 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9668 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9669 the target platform directly via vector tree-codes.
9670
9671 Output:
b8698a0f
L
9672 - CODE1 is the code of a vector operation to be used when
9673 vectorizing the operation, if available.
ebfd146a
IR
9674 - MULTI_STEP_CVT determines the number of required intermediate steps in
9675 case of multi-step conversion (like int->short->char - in that case
9676 MULTI_STEP_CVT will be 1).
9677 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9678 narrowing operation (short in the above example). */
ebfd146a
IR
9679
9680bool
9681supportable_narrowing_operation (enum tree_code code,
b690cc0f 9682 tree vectype_out, tree vectype_in,
ebfd146a 9683 enum tree_code *code1, int *multi_step_cvt,
9771b263 9684 vec<tree> *interm_types)
ebfd146a 9685{
ef4bddc2 9686 machine_mode vec_mode;
ebfd146a
IR
9687 enum insn_code icode1;
9688 optab optab1, interm_optab;
b690cc0f
RG
9689 tree vectype = vectype_in;
9690 tree narrow_vectype = vectype_out;
ebfd146a 9691 enum tree_code c1;
3ae0661a 9692 tree intermediate_type, prev_type;
ef4bddc2 9693 machine_mode intermediate_mode, prev_mode;
ebfd146a 9694 int i;
4a00c761 9695 bool uns;
ebfd146a 9696
4a00c761 9697 *multi_step_cvt = 0;
ebfd146a
IR
9698 switch (code)
9699 {
9700 CASE_CONVERT:
9701 c1 = VEC_PACK_TRUNC_EXPR;
9702 break;
9703
9704 case FIX_TRUNC_EXPR:
9705 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9706 break;
9707
9708 case FLOAT_EXPR:
9709 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9710 tree code and optabs used for computing the operation. */
9711 return false;
9712
9713 default:
9714 gcc_unreachable ();
9715 }
9716
9717 if (code == FIX_TRUNC_EXPR)
9718 /* The signedness is determined from output operand. */
b690cc0f 9719 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9720 else
9721 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9722
9723 if (!optab1)
9724 return false;
9725
9726 vec_mode = TYPE_MODE (vectype);
947131ba 9727 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9728 return false;
9729
4a00c761
JJ
9730 *code1 = c1;
9731
9732 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9733 /* For scalar masks we may have different boolean
9734 vector types having the same QImode. Thus we
9735 add additional check for elements number. */
9736 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9737 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9738 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9739
ebfd146a
IR
9740 /* Check if it's a multi-step conversion that can be done using intermediate
9741 types. */
4a00c761 9742 prev_mode = vec_mode;
3ae0661a 9743 prev_type = vectype;
4a00c761
JJ
9744 if (code == FIX_TRUNC_EXPR)
9745 uns = TYPE_UNSIGNED (vectype_out);
9746 else
9747 uns = TYPE_UNSIGNED (vectype);
9748
9749 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9750 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9751 costly than signed. */
9752 if (code == FIX_TRUNC_EXPR && uns)
9753 {
9754 enum insn_code icode2;
9755
9756 intermediate_type
9757 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9758 interm_optab
9759 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9760 if (interm_optab != unknown_optab
4a00c761
JJ
9761 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9762 && insn_data[icode1].operand[0].mode
9763 == insn_data[icode2].operand[0].mode)
9764 {
9765 uns = false;
9766 optab1 = interm_optab;
9767 icode1 = icode2;
9768 }
9769 }
ebfd146a 9770
4a00c761
JJ
9771 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9772 intermediate steps in promotion sequence. We try
9773 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9774 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9775 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9776 {
9777 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9778 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9779 {
9780 intermediate_type
9781 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9782 current_vector_size);
9783 if (intermediate_mode != TYPE_MODE (intermediate_type))
9784 return false;
9785 }
9786 else
9787 intermediate_type
9788 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9789 interm_optab
9790 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9791 optab_default);
9792 if (!interm_optab
9793 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9794 || insn_data[icode1].operand[0].mode != intermediate_mode
9795 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9796 == CODE_FOR_nothing))
9797 break;
9798
9771b263 9799 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9800 (*multi_step_cvt)++;
9801
9802 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9803 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9804 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9805 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9806
9807 prev_mode = intermediate_mode;
3ae0661a 9808 prev_type = intermediate_type;
4a00c761 9809 optab1 = interm_optab;
ebfd146a
IR
9810 }
9811
9771b263 9812 interm_types->release ();
4a00c761 9813 return false;
ebfd146a 9814}