]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
poly_int: vectorizable_call
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
ebfd146a 53
7ee2468b
SB
54/* For lang_hooks.types.type_for_mode. */
55#include "langhooks.h"
ebfd146a 56
2de001ee
RS
57/* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63};
64
c3e7ee41
BS
65/* Return the vectorized type for the given statement. */
66
67tree
68stmt_vectype (struct _stmt_vec_info *stmt_info)
69{
70 return STMT_VINFO_VECTYPE (stmt_info);
71}
72
73/* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75bool
76stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77{
355fe088 78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89}
90
91/* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95unsigned
92345349 96record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 98 int misalign, enum vect_cost_model_location where)
c3e7ee41 99{
cc9fe6bb
JH
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
92345349 106 if (body_cost_vec)
c3e7ee41 107 {
92345349 108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
c3e7ee41 113 return (unsigned)
92345349 114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
115 }
116 else
310213d4
RB
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
119}
120
272c6793
RS
121/* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123static tree
124create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125{
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128}
129
130/* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135static tree
355fe088 136read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
137 tree array, unsigned HOST_WIDE_INT n)
138{
139 tree vect_type, vect, vect_name, array_ref;
355fe088 140 gimple *new_stmt;
272c6793
RS
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
153
154 return vect_name;
155}
156
157/* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161static void
355fe088 162write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
163 tree array, unsigned HOST_WIDE_INT n)
164{
165 tree array_ref;
355fe088 166 gimple *new_stmt;
272c6793
RS
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
174}
175
176/* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180static tree
44fc7854 181create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 182{
44fc7854 183 tree mem_ref;
272c6793 184
272c6793
RS
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
644ffefd 187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
188 return mem_ref;
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197static void
355fe088 198vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 199 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
200{
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 204 gimple *pattern_stmt;
ebfd146a 205
73fbfcad 206 if (dump_enabled_p ())
66c16fd9
RB
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
ebfd146a 212
83197f37
IR
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
ebfd146a
IR
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
97ecdb46
JJ
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
83197f37 223
97ecdb46
JJ
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
ebfd146a
IR
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
73fbfcad 244 if (dump_enabled_p ())
78c60e3d 245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 246 "already marked relevant/live.\n");
ebfd146a
IR
247 return;
248 }
249
9771b263 250 worklist->safe_push (stmt);
ebfd146a
IR
251}
252
253
b28ead45
AH
254/* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258bool
259is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260{
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
818
b8698a0f 819/* Function vect_model_simple_cost.
ebfd146a 820
b8698a0f 821 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825void
b8698a0f 826vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 827 enum vect_def_type *dt,
4fc5ebf1 828 int ndts,
92345349
BS
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
831{
832 int i;
92345349 833 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
4fc5ebf1
JG
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
92345349 843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 845 stmt_info, 0, vect_prologue);
c3e7ee41
BS
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
c3e7ee41 850
73fbfcad 851 if (dump_enabled_p ())
78c60e3d
SS
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
e645e942 854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
855}
856
857
8bd37302
BS
858/* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863static void
864vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866{
867 int i, tmp;
92345349 868 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
8bd37302
BS
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
c3e7ee41
BS
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
8bd37302
BS
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
c3e7ee41 886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
8bd37302
BS
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
92345349
BS
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
8bd37302 896
73fbfcad 897 if (dump_enabled_p ())
78c60e3d
SS
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
901}
902
ebfd146a
IR
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
92345349
BS
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a 914{
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 919
8644a673 920 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
ebfd146a 923
892a981f
RS
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
720f5239 927 {
892a981f
RS
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 930 }
ebfd146a 931
892a981f
RS
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
272c6793 937 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 939 access is instead being provided by a permute-and-store operation,
2de001ee
RS
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 943 {
e1377713
ES
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
892a981f 946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
ebfd146a 950
73fbfcad 951 if (dump_enabled_p ())
78c60e3d 952 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 953 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 954 group_size);
ebfd146a
IR
955 }
956
cee62fee 957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 958 /* Costs of the stores. */
067bc855
RB
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
961 {
962 /* N scalar stores plus extracting the elements. */
963 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
964 inside_cost += record_stmt_cost (body_cost_vec,
965 ncopies * assumed_nunits,
966 scalar_store, stmt_info, 0, vect_body);
967 }
f2e2a985 968 else
892a981f 969 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 970
2de001ee
RS
971 if (memory_access_type == VMAT_ELEMENTWISE
972 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
973 {
974 /* N scalar stores plus extracting the elements. */
975 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
976 inside_cost += record_stmt_cost (body_cost_vec,
977 ncopies * assumed_nunits,
978 vec_to_scalar, stmt_info, 0, vect_body);
979 }
cee62fee 980
73fbfcad 981 if (dump_enabled_p ())
78c60e3d
SS
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_model_store_cost: inside_cost = %d, "
e645e942 984 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
985}
986
987
720f5239
IR
988/* Calculate cost of DR's memory access. */
989void
990vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 991 unsigned int *inside_cost,
92345349 992 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
993{
994 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 995 gimple *stmt = DR_STMT (dr);
c3e7ee41 996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
997
998 switch (alignment_support_scheme)
999 {
1000 case dr_aligned:
1001 {
92345349
BS
1002 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1003 vector_store, stmt_info, 0,
1004 vect_body);
720f5239 1005
73fbfcad 1006 if (dump_enabled_p ())
78c60e3d 1007 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1008 "vect_model_store_cost: aligned.\n");
720f5239
IR
1009 break;
1010 }
1011
1012 case dr_unaligned_supported:
1013 {
720f5239 1014 /* Here, we assign an additional cost for the unaligned store. */
92345349 1015 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1016 unaligned_store, stmt_info,
92345349 1017 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1018 if (dump_enabled_p ())
78c60e3d
SS
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: unaligned supported by "
e645e942 1021 "hardware.\n");
720f5239
IR
1022 break;
1023 }
1024
38eec4c6
UW
1025 case dr_unaligned_unsupported:
1026 {
1027 *inside_cost = VECT_MAX_COST;
1028
73fbfcad 1029 if (dump_enabled_p ())
78c60e3d 1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1031 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1032 break;
1033 }
1034
720f5239
IR
1035 default:
1036 gcc_unreachable ();
1037 }
1038}
1039
1040
ebfd146a
IR
1041/* Function vect_model_load_cost
1042
892a981f
RS
1043 Models cost for loads. In the case of grouped accesses, one access has
1044 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1045 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1046 access scheme chosen. */
1047
1048void
92345349 1049vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1050 vect_memory_access_type memory_access_type,
1051 slp_tree slp_node,
92345349
BS
1052 stmt_vector_for_cost *prologue_cost_vec,
1053 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1054{
892a981f
RS
1055 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1056 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1057 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1058 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1059
892a981f
RS
1060 /* Grouped loads read all elements in the group at once,
1061 so we want the DR for the first statement. */
1062 if (!slp_node && grouped_access_p)
ebfd146a 1063 {
892a981f
RS
1064 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1065 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1066 }
1067
892a981f
RS
1068 /* True if we should include any once-per-group costs as well as
1069 the cost of the statement itself. For SLP we only get called
1070 once per group anyhow. */
1071 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1072
272c6793 1073 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1074 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1075 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1076 include the cost of the permutes. */
1077 if (first_stmt_p
1078 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1079 {
2c23db6d
ES
1080 /* Uses an even and odd extract operations or shuffle operations
1081 for each needed permute. */
892a981f 1082 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1083 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1084 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1085 stmt_info, 0, vect_body);
ebfd146a 1086
73fbfcad 1087 if (dump_enabled_p ())
e645e942
TJ
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1090 group_size);
ebfd146a
IR
1091 }
1092
1093 /* The loads themselves. */
067bc855
RB
1094 if (memory_access_type == VMAT_ELEMENTWISE
1095 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1096 {
a21892ad
BS
1097 /* N scalar loads plus gathering them into a vector. */
1098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1099 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1100 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1101 ncopies * assumed_nunits,
92345349 1102 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1103 }
1104 else
892a981f 1105 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1106 &inside_cost, &prologue_cost,
1107 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1108 if (memory_access_type == VMAT_ELEMENTWISE
1109 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1110 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1111 stmt_info, 0, vect_body);
720f5239 1112
73fbfcad 1113 if (dump_enabled_p ())
78c60e3d
SS
1114 dump_printf_loc (MSG_NOTE, vect_location,
1115 "vect_model_load_cost: inside_cost = %d, "
e645e942 1116 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1117}
1118
1119
1120/* Calculate cost of DR's memory access. */
1121void
1122vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1123 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1124 unsigned int *prologue_cost,
1125 stmt_vector_for_cost *prologue_cost_vec,
1126 stmt_vector_for_cost *body_cost_vec,
1127 bool record_prologue_costs)
720f5239
IR
1128{
1129 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1130 gimple *stmt = DR_STMT (dr);
c3e7ee41 1131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1132
1133 switch (alignment_support_scheme)
ebfd146a
IR
1134 {
1135 case dr_aligned:
1136 {
92345349
BS
1137 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1138 stmt_info, 0, vect_body);
ebfd146a 1139
73fbfcad 1140 if (dump_enabled_p ())
78c60e3d 1141 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1142 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1143
1144 break;
1145 }
1146 case dr_unaligned_supported:
1147 {
720f5239 1148 /* Here, we assign an additional cost for the unaligned load. */
92345349 1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1150 unaligned_load, stmt_info,
92345349 1151 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1152
73fbfcad 1153 if (dump_enabled_p ())
78c60e3d
SS
1154 dump_printf_loc (MSG_NOTE, vect_location,
1155 "vect_model_load_cost: unaligned supported by "
e645e942 1156 "hardware.\n");
ebfd146a
IR
1157
1158 break;
1159 }
1160 case dr_explicit_realign:
1161 {
92345349
BS
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1163 vector_load, stmt_info, 0, vect_body);
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1165 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1166
1167 /* FIXME: If the misalignment remains fixed across the iterations of
1168 the containing loop, the following cost should be added to the
92345349 1169 prologue costs. */
ebfd146a 1170 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1171 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1172 stmt_info, 0, vect_body);
ebfd146a 1173
73fbfcad 1174 if (dump_enabled_p ())
e645e942
TJ
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: explicit realign\n");
8bd37302 1177
ebfd146a
IR
1178 break;
1179 }
1180 case dr_explicit_realign_optimized:
1181 {
73fbfcad 1182 if (dump_enabled_p ())
e645e942 1183 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1184 "vect_model_load_cost: unaligned software "
e645e942 1185 "pipelined.\n");
ebfd146a
IR
1186
1187 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1188 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1189 if this is an access in a group of loads, which provide grouped
ebfd146a 1190 access, then the above cost should only be considered for one
ff802fa1 1191 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1192 and a realignment op. */
1193
92345349 1194 if (add_realign_cost && record_prologue_costs)
ebfd146a 1195 {
92345349
BS
1196 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1197 vector_stmt, stmt_info,
1198 0, vect_prologue);
ebfd146a 1199 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1200 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1201 vector_stmt, stmt_info,
1202 0, vect_prologue);
ebfd146a
IR
1203 }
1204
92345349
BS
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1206 stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1208 stmt_info, 0, vect_body);
8bd37302 1209
73fbfcad 1210 if (dump_enabled_p ())
78c60e3d 1211 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1212 "vect_model_load_cost: explicit realign optimized"
1213 "\n");
8bd37302 1214
ebfd146a
IR
1215 break;
1216 }
1217
38eec4c6
UW
1218 case dr_unaligned_unsupported:
1219 {
1220 *inside_cost = VECT_MAX_COST;
1221
73fbfcad 1222 if (dump_enabled_p ())
78c60e3d 1223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1224 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1225 break;
1226 }
1227
ebfd146a
IR
1228 default:
1229 gcc_unreachable ();
1230 }
ebfd146a
IR
1231}
1232
418b7df3
RG
1233/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1234 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1235
418b7df3 1236static void
355fe088 1237vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1238{
ebfd146a 1239 if (gsi)
418b7df3 1240 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1241 else
1242 {
418b7df3 1243 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1245
a70d6342
IR
1246 if (loop_vinfo)
1247 {
1248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1249 basic_block new_bb;
1250 edge pe;
a70d6342
IR
1251
1252 if (nested_in_vect_loop_p (loop, stmt))
1253 loop = loop->inner;
b8698a0f 1254
a70d6342 1255 pe = loop_preheader_edge (loop);
418b7df3 1256 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1257 gcc_assert (!new_bb);
1258 }
1259 else
1260 {
1261 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1262 basic_block bb;
1263 gimple_stmt_iterator gsi_bb_start;
1264
1265 gcc_assert (bb_vinfo);
1266 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1267 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1268 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1269 }
ebfd146a
IR
1270 }
1271
73fbfcad 1272 if (dump_enabled_p ())
ebfd146a 1273 {
78c60e3d
SS
1274 dump_printf_loc (MSG_NOTE, vect_location,
1275 "created new init_stmt: ");
1276 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1277 }
418b7df3
RG
1278}
1279
1280/* Function vect_init_vector.
ebfd146a 1281
5467ee52
RG
1282 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1283 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1284 vector type a vector with all elements equal to VAL is created first.
1285 Place the initialization at BSI if it is not NULL. Otherwise, place the
1286 initialization at the loop preheader.
418b7df3
RG
1287 Return the DEF of INIT_STMT.
1288 It will be used in the vectorization of STMT. */
1289
1290tree
355fe088 1291vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1292{
355fe088 1293 gimple *init_stmt;
418b7df3
RG
1294 tree new_temp;
1295
e412ece4
RB
1296 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1297 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1298 {
e412ece4
RB
1299 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1300 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1301 {
5a308cf1
IE
1302 /* Scalar boolean value should be transformed into
1303 all zeros or all ones value before building a vector. */
1304 if (VECTOR_BOOLEAN_TYPE_P (type))
1305 {
b3d51f23
IE
1306 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1307 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1308
1309 if (CONSTANT_CLASS_P (val))
1310 val = integer_zerop (val) ? false_val : true_val;
1311 else
1312 {
1313 new_temp = make_ssa_name (TREE_TYPE (type));
1314 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1315 val, true_val, false_val);
1316 vect_init_vector_1 (stmt, init_stmt, gsi);
1317 val = new_temp;
1318 }
1319 }
1320 else if (CONSTANT_CLASS_P (val))
42fd8198 1321 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1322 else
1323 {
b731b390 1324 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1325 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1326 init_stmt = gimple_build_assign (new_temp,
1327 fold_build1 (VIEW_CONVERT_EXPR,
1328 TREE_TYPE (type),
1329 val));
1330 else
1331 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1332 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1333 val = new_temp;
418b7df3
RG
1334 }
1335 }
5467ee52 1336 val = build_vector_from_val (type, val);
418b7df3
RG
1337 }
1338
0e22bb5a
RB
1339 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1340 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1341 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1342 return new_temp;
ebfd146a
IR
1343}
1344
c83a894c 1345/* Function vect_get_vec_def_for_operand_1.
a70d6342 1346
c83a894c
AH
1347 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1348 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1349
1350tree
c83a894c 1351vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1352{
1353 tree vec_oprnd;
355fe088 1354 gimple *vec_stmt;
ebfd146a 1355 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1356
1357 switch (dt)
1358 {
81c40241 1359 /* operand is a constant or a loop invariant. */
ebfd146a 1360 case vect_constant_def:
81c40241 1361 case vect_external_def:
c83a894c
AH
1362 /* Code should use vect_get_vec_def_for_operand. */
1363 gcc_unreachable ();
ebfd146a 1364
81c40241 1365 /* operand is defined inside the loop. */
8644a673 1366 case vect_internal_def:
ebfd146a 1367 {
ebfd146a
IR
1368 /* Get the def from the vectorized stmt. */
1369 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1370
ebfd146a 1371 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1372 /* Get vectorized pattern statement. */
1373 if (!vec_stmt
1374 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1375 && !STMT_VINFO_RELEVANT (def_stmt_info))
1376 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1377 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1378 gcc_assert (vec_stmt);
1379 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1380 vec_oprnd = PHI_RESULT (vec_stmt);
1381 else if (is_gimple_call (vec_stmt))
1382 vec_oprnd = gimple_call_lhs (vec_stmt);
1383 else
1384 vec_oprnd = gimple_assign_lhs (vec_stmt);
1385 return vec_oprnd;
1386 }
1387
c78e3652 1388 /* operand is defined by a loop header phi. */
ebfd146a 1389 case vect_reduction_def:
06066f92 1390 case vect_double_reduction_def:
7c5222ff 1391 case vect_nested_cycle:
ebfd146a
IR
1392 case vect_induction_def:
1393 {
1394 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1395
1396 /* Get the def from the vectorized stmt. */
1397 def_stmt_info = vinfo_for_stmt (def_stmt);
1398 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1399 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1400 vec_oprnd = PHI_RESULT (vec_stmt);
1401 else
1402 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1403 return vec_oprnd;
1404 }
1405
1406 default:
1407 gcc_unreachable ();
1408 }
1409}
1410
1411
c83a894c
AH
1412/* Function vect_get_vec_def_for_operand.
1413
1414 OP is an operand in STMT. This function returns a (vector) def that will be
1415 used in the vectorized stmt for STMT.
1416
1417 In the case that OP is an SSA_NAME which is defined in the loop, then
1418 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1419
1420 In case OP is an invariant or constant, a new stmt that creates a vector def
1421 needs to be introduced. VECTYPE may be used to specify a required type for
1422 vector invariant. */
1423
1424tree
1425vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1426{
1427 gimple *def_stmt;
1428 enum vect_def_type dt;
1429 bool is_simple_use;
1430 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1431 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1432
1433 if (dump_enabled_p ())
1434 {
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "vect_get_vec_def_for_operand: ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1438 dump_printf (MSG_NOTE, "\n");
1439 }
1440
1441 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1442 gcc_assert (is_simple_use);
1443 if (def_stmt && dump_enabled_p ())
1444 {
1445 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1446 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1447 }
1448
1449 if (dt == vect_constant_def || dt == vect_external_def)
1450 {
1451 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1452 tree vector_type;
1453
1454 if (vectype)
1455 vector_type = vectype;
2568d8a1 1456 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1457 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1458 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1459 else
1460 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1461
1462 gcc_assert (vector_type);
1463 return vect_init_vector (stmt, op, vector_type, NULL);
1464 }
1465 else
1466 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1467}
1468
1469
ebfd146a
IR
1470/* Function vect_get_vec_def_for_stmt_copy
1471
ff802fa1 1472 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1475 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1477 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1478 DT is the type of the vector def VEC_OPRND.
1479
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1483 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1484 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1488 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1491
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1493
ebfd146a
IR
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1497 VS1.3: vx.3 = memref3
ebfd146a
IR
1498
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1503
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
b8698a0f
L
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1508 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1509 returns the vector-def 'vx.0'.
1510
b8698a0f
L
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515
b8698a0f
L
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525
1526tree
1527vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528{
355fe088 1529 gimple *vec_stmt_for_operand;
ebfd146a
IR
1530 stmt_vec_info def_stmt_info;
1531
1532 /* Do nothing; can reuse same def. */
8644a673 1533 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1534 return vec_oprnd;
1535
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546}
1547
1548
1549/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1551
c78e3652 1552void
b8698a0f 1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
ebfd146a 1556{
9771b263 1557 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1560 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1561
9771b263 1562 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1563 {
9771b263 1564 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1566 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1567 }
1568}
1569
1570
c78e3652 1571/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1572
c78e3652 1573void
355fe088 1574vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1575 vec<tree> *vec_oprnds0,
1576 vec<tree> *vec_oprnds1,
306b0c92 1577 slp_tree slp_node)
ebfd146a
IR
1578{
1579 if (slp_node)
d092494c
IR
1580 {
1581 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1582 auto_vec<tree> ops (nops);
1583 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1584
9771b263 1585 ops.quick_push (op0);
d092494c 1586 if (op1)
9771b263 1587 ops.quick_push (op1);
d092494c 1588
306b0c92 1589 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1590
37b5ec8f 1591 *vec_oprnds0 = vec_defs[0];
d092494c 1592 if (op1)
37b5ec8f 1593 *vec_oprnds1 = vec_defs[1];
d092494c 1594 }
ebfd146a
IR
1595 else
1596 {
1597 tree vec_oprnd;
1598
9771b263 1599 vec_oprnds0->create (1);
81c40241 1600 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1601 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1602
1603 if (op1)
1604 {
9771b263 1605 vec_oprnds1->create (1);
81c40241 1606 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1607 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1608 }
1609 }
1610}
1611
1612
1613/* Function vect_finish_stmt_generation.
1614
1615 Insert a new stmt. */
1616
1617void
355fe088 1618vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1619 gimple_stmt_iterator *gsi)
1620{
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1622 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1623
1624 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1625
54e8e2c3
RG
1626 if (!gsi_end_p (*gsi)
1627 && gimple_has_mem_ops (vec_stmt))
1628 {
355fe088 1629 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1630 tree vuse = gimple_vuse (at_stmt);
1631 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1632 {
1633 tree vdef = gimple_vdef (at_stmt);
1634 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1635 /* If we have an SSA vuse and insert a store, update virtual
1636 SSA form to avoid triggering the renamer. Do so only
1637 if we can easily see all uses - which is what almost always
1638 happens with the way vectorized stmts are inserted. */
1639 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1640 && ((is_gimple_assign (vec_stmt)
1641 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1642 || (is_gimple_call (vec_stmt)
1643 && !(gimple_call_flags (vec_stmt)
1644 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1645 {
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1649 }
1650 }
1651 }
ebfd146a
IR
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653
310213d4 1654 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1655
73fbfcad 1656 if (dump_enabled_p ())
ebfd146a 1657 {
78c60e3d
SS
1658 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1660 }
1661
ad885386 1662 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1663
1664 /* While EH edges will generally prevent vectorization, stmt might
1665 e.g. be in a must-not-throw region. Ensure newly created stmts
1666 that could throw are part of the same region. */
1667 int lp_nr = lookup_stmt_eh_lp (stmt);
1668 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1669 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1670}
1671
70439f0d
RS
1672/* We want to vectorize a call to combined function CFN with function
1673 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1674 as the types of all inputs. Check whether this is possible using
1675 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1676
70439f0d
RS
1677static internal_fn
1678vectorizable_internal_function (combined_fn cfn, tree fndecl,
1679 tree vectype_out, tree vectype_in)
ebfd146a 1680{
70439f0d
RS
1681 internal_fn ifn;
1682 if (internal_fn_p (cfn))
1683 ifn = as_internal_fn (cfn);
1684 else
1685 ifn = associated_internal_fn (fndecl);
1686 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1687 {
1688 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1689 if (info.vectorizable)
1690 {
1691 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1692 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1693 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1694 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1695 return ifn;
1696 }
1697 }
1698 return IFN_LAST;
ebfd146a
IR
1699}
1700
5ce9450f 1701
355fe088 1702static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1703 gimple_stmt_iterator *);
1704
62da9e14
RS
1705/* STMT is a non-strided load or store, meaning that it accesses
1706 elements with a known constant step. Return -1 if that step
1707 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1708
1709static int
1710compare_step_with_zero (gimple *stmt)
1711{
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1713 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1714 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1715 size_zero_node);
62da9e14
RS
1716}
1717
1718/* If the target supports a permute mask that reverses the elements in
1719 a vector of type VECTYPE, return that mask, otherwise return null. */
1720
1721static tree
1722perm_mask_for_reverse (tree vectype)
1723{
1724 int i, nunits;
62da9e14
RS
1725
1726 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1727
d980067b
RS
1728 /* The encoding has a single stepped pattern. */
1729 vec_perm_builder sel (nunits, 1, 3);
1730 for (i = 0; i < 3; ++i)
908a1a16 1731 sel.quick_push (nunits - 1 - i);
62da9e14 1732
e3342de4
RS
1733 vec_perm_indices indices (sel, 1, nunits);
1734 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1735 return NULL_TREE;
e3342de4 1736 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1737}
5ce9450f 1738
2de001ee
RS
1739/* A subroutine of get_load_store_type, with a subset of the same
1740 arguments. Handle the case where STMT is part of a grouped load
1741 or store.
1742
1743 For stores, the statements in the group are all consecutive
1744 and there is no gap at the end. For loads, the statements in the
1745 group might not be consecutive; there can be gaps between statements
1746 as well as at the end. */
1747
1748static bool
1749get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1750 vec_load_store_type vls_type,
1751 vect_memory_access_type *memory_access_type)
1752{
1753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1754 vec_info *vinfo = stmt_info->vinfo;
1755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1756 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1757 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1758 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1759 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1760 bool single_element_p = (stmt == first_stmt
1761 && !GROUP_NEXT_ELEMENT (stmt_info));
1762 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
522fcdd7 1763 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1764
1765 /* True if the vectorized statements would access beyond the last
1766 statement in the group. */
1767 bool overrun_p = false;
1768
1769 /* True if we can cope with such overrun by peeling for gaps, so that
1770 there is at least one final scalar iteration after the vector loop. */
1771 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1772
1773 /* There can only be a gap at the end of the group if the stride is
1774 known at compile time. */
1775 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1776
1777 /* Stores can't yet have gaps. */
1778 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1779
1780 if (slp)
1781 {
1782 if (STMT_VINFO_STRIDED_P (stmt_info))
1783 {
1784 /* Try to use consecutive accesses of GROUP_SIZE elements,
1785 separated by the stride, until we have a complete vector.
1786 Fall back to scalar accesses if that isn't possible. */
1787 if (nunits % group_size == 0)
1788 *memory_access_type = VMAT_STRIDED_SLP;
1789 else
1790 *memory_access_type = VMAT_ELEMENTWISE;
1791 }
1792 else
1793 {
1794 overrun_p = loop_vinfo && gap != 0;
1795 if (overrun_p && vls_type != VLS_LOAD)
1796 {
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "Grouped store with gaps requires"
1799 " non-consecutive accesses\n");
1800 return false;
1801 }
f702e7d4
RS
1802 /* An overrun is fine if the trailing elements are smaller
1803 than the alignment boundary B. Every vector access will
1804 be a multiple of B and so we are guaranteed to access a
1805 non-gap element in the same B-sized block. */
f9ef2c76 1806 if (overrun_p
f702e7d4
RS
1807 && gap < (vect_known_alignment_in_bytes (first_dr)
1808 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1809 overrun_p = false;
2de001ee
RS
1810 if (overrun_p && !can_overrun_p)
1811 {
1812 if (dump_enabled_p ())
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "Peeling for outer loop is not supported\n");
1815 return false;
1816 }
1817 *memory_access_type = VMAT_CONTIGUOUS;
1818 }
1819 }
1820 else
1821 {
1822 /* We can always handle this case using elementwise accesses,
1823 but see if something more efficient is available. */
1824 *memory_access_type = VMAT_ELEMENTWISE;
1825
1826 /* If there is a gap at the end of the group then these optimizations
1827 would access excess elements in the last iteration. */
1828 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1829 /* An overrun is fine if the trailing elements are smaller than the
1830 alignment boundary B. Every vector access will be a multiple of B
1831 and so we are guaranteed to access a non-gap element in the
1832 same B-sized block. */
f9ef2c76 1833 if (would_overrun_p
f702e7d4
RS
1834 && gap < (vect_known_alignment_in_bytes (first_dr)
1835 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1836 would_overrun_p = false;
f702e7d4 1837
2de001ee 1838 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1839 && (can_overrun_p || !would_overrun_p)
1840 && compare_step_with_zero (stmt) > 0)
2de001ee
RS
1841 {
1842 /* First try using LOAD/STORE_LANES. */
1843 if (vls_type == VLS_LOAD
1844 ? vect_load_lanes_supported (vectype, group_size)
1845 : vect_store_lanes_supported (vectype, group_size))
1846 {
1847 *memory_access_type = VMAT_LOAD_STORE_LANES;
1848 overrun_p = would_overrun_p;
1849 }
1850
1851 /* If that fails, try using permuting loads. */
1852 if (*memory_access_type == VMAT_ELEMENTWISE
1853 && (vls_type == VLS_LOAD
1854 ? vect_grouped_load_supported (vectype, single_element_p,
1855 group_size)
1856 : vect_grouped_store_supported (vectype, group_size)))
1857 {
1858 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1859 overrun_p = would_overrun_p;
1860 }
1861 }
1862 }
1863
1864 if (vls_type != VLS_LOAD && first_stmt == stmt)
1865 {
1866 /* STMT is the leader of the group. Check the operands of all the
1867 stmts of the group. */
1868 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1869 while (next_stmt)
1870 {
1871 gcc_assert (gimple_assign_single_p (next_stmt));
1872 tree op = gimple_assign_rhs1 (next_stmt);
1873 gimple *def_stmt;
1874 enum vect_def_type dt;
1875 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1876 {
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "use not simple.\n");
1880 return false;
1881 }
1882 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1883 }
1884 }
1885
1886 if (overrun_p)
1887 {
1888 gcc_assert (can_overrun_p);
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1891 "Data access with gaps requires scalar "
1892 "epilogue loop\n");
1893 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1894 }
1895
1896 return true;
1897}
1898
62da9e14
RS
1899/* A subroutine of get_load_store_type, with a subset of the same
1900 arguments. Handle the case where STMT is a load or store that
1901 accesses consecutive elements with a negative step. */
1902
1903static vect_memory_access_type
1904get_negative_load_store_type (gimple *stmt, tree vectype,
1905 vec_load_store_type vls_type,
1906 unsigned int ncopies)
1907{
1908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1909 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1910 dr_alignment_support alignment_support_scheme;
1911
1912 if (ncopies > 1)
1913 {
1914 if (dump_enabled_p ())
1915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1916 "multiple types with negative step.\n");
1917 return VMAT_ELEMENTWISE;
1918 }
1919
1920 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1921 if (alignment_support_scheme != dr_aligned
1922 && alignment_support_scheme != dr_unaligned_supported)
1923 {
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "negative step but alignment required.\n");
1927 return VMAT_ELEMENTWISE;
1928 }
1929
1930 if (vls_type == VLS_STORE_INVARIANT)
1931 {
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_NOTE, vect_location,
1934 "negative step with invariant source;"
1935 " no permute needed.\n");
1936 return VMAT_CONTIGUOUS_DOWN;
1937 }
1938
1939 if (!perm_mask_for_reverse (vectype))
1940 {
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1943 "negative step and reversing not supported.\n");
1944 return VMAT_ELEMENTWISE;
1945 }
1946
1947 return VMAT_CONTIGUOUS_REVERSE;
1948}
1949
2de001ee
RS
1950/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1951 if there is a memory access type that the vectorized form can use,
1952 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1953 or scatters, fill in GS_INFO accordingly.
1954
1955 SLP says whether we're performing SLP rather than loop vectorization.
62da9e14
RS
1956 VECTYPE is the vector type that the vectorized statements will use.
1957 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
1958
1959static bool
1960get_load_store_type (gimple *stmt, tree vectype, bool slp,
62da9e14 1961 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
1962 vect_memory_access_type *memory_access_type,
1963 gather_scatter_info *gs_info)
1964{
1965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1966 vec_info *vinfo = stmt_info->vinfo;
1967 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 1968 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1969 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1970 {
1971 *memory_access_type = VMAT_GATHER_SCATTER;
1972 gimple *def_stmt;
1973 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1974 gcc_unreachable ();
1975 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1976 &gs_info->offset_dt,
1977 &gs_info->offset_vectype))
1978 {
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1981 "%s index use not simple.\n",
1982 vls_type == VLS_LOAD ? "gather" : "scatter");
1983 return false;
1984 }
1985 }
1986 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1987 {
1988 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1989 memory_access_type))
1990 return false;
1991 }
1992 else if (STMT_VINFO_STRIDED_P (stmt_info))
1993 {
1994 gcc_assert (!slp);
1995 *memory_access_type = VMAT_ELEMENTWISE;
1996 }
1997 else
62da9e14
RS
1998 {
1999 int cmp = compare_step_with_zero (stmt);
2000 if (cmp < 0)
2001 *memory_access_type = get_negative_load_store_type
2002 (stmt, vectype, vls_type, ncopies);
2003 else if (cmp == 0)
2004 {
2005 gcc_assert (vls_type == VLS_LOAD);
2006 *memory_access_type = VMAT_INVARIANT;
2007 }
2008 else
2009 *memory_access_type = VMAT_CONTIGUOUS;
2010 }
2de001ee 2011
4d694b27
RS
2012 if ((*memory_access_type == VMAT_ELEMENTWISE
2013 || *memory_access_type == VMAT_STRIDED_SLP)
2014 && !nunits.is_constant ())
2015 {
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2018 "Not using elementwise accesses due to variable "
2019 "vectorization factor.\n");
2020 return false;
2021 }
2022
2de001ee
RS
2023 /* FIXME: At the moment the cost model seems to underestimate the
2024 cost of using elementwise accesses. This check preserves the
2025 traditional behavior until that can be fixed. */
2026 if (*memory_access_type == VMAT_ELEMENTWISE
2027 && !STMT_VINFO_STRIDED_P (stmt_info))
2028 {
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2031 "not falling back to elementwise accesses\n");
2032 return false;
2033 }
2034 return true;
2035}
2036
5ce9450f
JJ
2037/* Function vectorizable_mask_load_store.
2038
2039 Check if STMT performs a conditional load or store that can be vectorized.
2040 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2041 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2042 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2043
2044static bool
355fe088
TS
2045vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2046 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
2047{
2048 tree vec_dest = NULL;
2049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2050 stmt_vec_info prev_stmt_info;
2051 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2052 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2053 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2054 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2055 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 2056 tree rhs_vectype = NULL_TREE;
045c1278 2057 tree mask_vectype;
5ce9450f 2058 tree elem_type;
355fe088 2059 gimple *new_stmt;
5ce9450f
JJ
2060 tree dummy;
2061 tree dataref_ptr = NULL_TREE;
355fe088 2062 gimple *ptr_incr;
4d694b27 2063 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5ce9450f
JJ
2064 int ncopies;
2065 int i, j;
2066 bool inv_p;
134c85ca 2067 gather_scatter_info gs_info;
2de001ee 2068 vec_load_store_type vls_type;
5ce9450f 2069 tree mask;
355fe088 2070 gimple *def_stmt;
5ce9450f
JJ
2071 enum vect_def_type dt;
2072
2073 if (slp_node != NULL)
2074 return false;
2075
e8f142e2 2076 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5ce9450f
JJ
2077 gcc_assert (ncopies >= 1);
2078
5ce9450f 2079 mask = gimple_call_arg (stmt, 2);
045c1278 2080
2568d8a1 2081 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
5ce9450f
JJ
2082 return false;
2083
2084 /* FORNOW. This restriction should be relaxed. */
2085 if (nested_in_vect_loop && ncopies > 1)
2086 {
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2089 "multiple types in nested loop.");
2090 return false;
2091 }
2092
2093 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2094 return false;
2095
66c16fd9
RB
2096 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2097 && ! vec_stmt)
5ce9450f
JJ
2098 return false;
2099
2100 if (!STMT_VINFO_DATA_REF (stmt_info))
2101 return false;
2102
2103 elem_type = TREE_TYPE (vectype);
2104
045c1278
IE
2105 if (TREE_CODE (mask) != SSA_NAME)
2106 return false;
2107
2108 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2109 return false;
2110
2111 if (!mask_vectype)
2112 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2113
dc6a3147
IE
2114 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2115 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
045c1278
IE
2116 return false;
2117
2de001ee 2118 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
57e2f6ad
IE
2119 {
2120 tree rhs = gimple_call_arg (stmt, 3);
2121 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2122 return false;
2de001ee
RS
2123 if (dt == vect_constant_def || dt == vect_external_def)
2124 vls_type = VLS_STORE_INVARIANT;
2125 else
2126 vls_type = VLS_STORE;
57e2f6ad 2127 }
2de001ee
RS
2128 else
2129 vls_type = VLS_LOAD;
57e2f6ad 2130
2de001ee 2131 vect_memory_access_type memory_access_type;
62da9e14 2132 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2de001ee
RS
2133 &memory_access_type, &gs_info))
2134 return false;
03b9e8e4 2135
2de001ee
RS
2136 if (memory_access_type == VMAT_GATHER_SCATTER)
2137 {
134c85ca 2138 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
03b9e8e4
JJ
2139 tree masktype
2140 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2141 if (TREE_CODE (masktype) == INTEGER_TYPE)
2142 {
2143 if (dump_enabled_p ())
2144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2145 "masked gather with integer mask not supported.");
2146 return false;
2147 }
5ce9450f 2148 }
2de001ee
RS
2149 else if (memory_access_type != VMAT_CONTIGUOUS)
2150 {
2151 if (dump_enabled_p ())
2152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2153 "unsupported access type for masked %s.\n",
2154 vls_type == VLS_LOAD ? "load" : "store");
2155 return false;
2156 }
5ce9450f 2157 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
2158 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2159 TYPE_MODE (mask_vectype),
2de001ee 2160 vls_type == VLS_LOAD)
57e2f6ad
IE
2161 || (rhs_vectype
2162 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
2163 return false;
2164
5ce9450f
JJ
2165 if (!vec_stmt) /* transformation not required. */
2166 {
2de001ee 2167 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5ce9450f 2168 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2de001ee
RS
2169 if (vls_type == VLS_LOAD)
2170 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2171 NULL, NULL, NULL);
5ce9450f 2172 else
2de001ee
RS
2173 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2174 dt, NULL, NULL, NULL);
5ce9450f
JJ
2175 return true;
2176 }
2de001ee 2177 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5ce9450f 2178
67b8dbac 2179 /* Transform. */
5ce9450f 2180
2de001ee 2181 if (memory_access_type == VMAT_GATHER_SCATTER)
5ce9450f
JJ
2182 {
2183 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 2184 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5ce9450f 2185 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 2186 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 2187 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 2188 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
2189 edge pe = loop_preheader_edge (loop);
2190 gimple_seq seq;
2191 basic_block new_bb;
2192 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
2193 poly_uint64 gather_off_nunits
2194 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5ce9450f 2195
134c85ca 2196 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
acdcd61b
JJ
2197 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2198 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2199 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2200 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2201 scaletype = TREE_VALUE (arglist);
2202 gcc_checking_assert (types_compatible_p (srctype, rettype)
2203 && types_compatible_p (srctype, masktype));
2204
4d694b27 2205 if (known_eq (nunits, gather_off_nunits))
5ce9450f 2206 modifier = NONE;
4d694b27 2207 else if (known_eq (nunits * 2, gather_off_nunits))
5ce9450f 2208 {
5ce9450f
JJ
2209 modifier = WIDEN;
2210
4d694b27
RS
2211 /* Currently widening gathers and scatters are only supported for
2212 fixed-length vectors. */
2213 int count = gather_off_nunits.to_constant ();
2214 vec_perm_builder sel (count, count, 1);
2215 for (i = 0; i < count; ++i)
2216 sel.quick_push (i | (count / 2));
5ce9450f 2217
4d694b27 2218 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
2219 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2220 indices);
5ce9450f 2221 }
4d694b27 2222 else if (known_eq (nunits, gather_off_nunits * 2))
5ce9450f 2223 {
5ce9450f
JJ
2224 modifier = NARROW;
2225
4d694b27
RS
2226 /* Currently narrowing gathers and scatters are only supported for
2227 fixed-length vectors. */
2228 int count = nunits.to_constant ();
2229 vec_perm_builder sel (count, count, 1);
2230 sel.quick_grow (count);
2231 for (i = 0; i < count; ++i)
2232 sel[i] = i < count / 2 ? i : i + count / 2;
2233 vec_perm_indices indices (sel, 2, count);
e3342de4 2234 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5ce9450f 2235
5ce9450f 2236 ncopies *= 2;
4d694b27
RS
2237 for (i = 0; i < count; ++i)
2238 sel[i] = i | (count / 2);
2239 indices.new_vector (sel, 2, count);
e3342de4 2240 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
5ce9450f
JJ
2241 }
2242 else
2243 gcc_unreachable ();
2244
5ce9450f
JJ
2245 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2246
134c85ca 2247 ptr = fold_convert (ptrtype, gs_info.base);
5ce9450f
JJ
2248 if (!is_gimple_min_invariant (ptr))
2249 {
2250 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2251 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2252 gcc_assert (!new_bb);
2253 }
2254
134c85ca 2255 scale = build_int_cst (scaletype, gs_info.scale);
5ce9450f
JJ
2256
2257 prev_stmt_info = NULL;
2258 for (j = 0; j < ncopies; ++j)
2259 {
2260 if (modifier == WIDEN && (j & 1))
2261 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2262 perm_mask, stmt, gsi);
2263 else if (j == 0)
2264 op = vec_oprnd0
134c85ca 2265 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5ce9450f
JJ
2266 else
2267 op = vec_oprnd0
134c85ca 2268 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
5ce9450f
JJ
2269
2270 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2271 {
2272 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2273 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 2274 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
2275 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2276 new_stmt
0d0e4a03 2277 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2279 op = var;
2280 }
2281
acdcd61b
JJ
2282 if (mask_perm_mask && (j & 1))
2283 mask_op = permute_vec_elements (mask_op, mask_op,
2284 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
2285 else
2286 {
acdcd61b 2287 if (j == 0)
81c40241 2288 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
2289 else
2290 {
81c40241 2291 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
2292 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2293 }
5ce9450f 2294
acdcd61b
JJ
2295 mask_op = vec_mask;
2296 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2297 {
2298 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2299 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 2300 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
2301 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2302 new_stmt
0d0e4a03 2303 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2304 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2305 mask_op = var;
2306 }
5ce9450f
JJ
2307 }
2308
2309 new_stmt
134c85ca 2310 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
5ce9450f
JJ
2311 scale);
2312
2313 if (!useless_type_conversion_p (vectype, rettype))
2314 {
2315 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2316 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 2317 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
2318 gimple_call_set_lhs (new_stmt, op);
2319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2320 var = make_ssa_name (vec_dest);
5ce9450f 2321 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2322 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2323 }
2324 else
2325 {
2326 var = make_ssa_name (vec_dest, new_stmt);
2327 gimple_call_set_lhs (new_stmt, var);
2328 }
2329
2330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2331
2332 if (modifier == NARROW)
2333 {
2334 if ((j & 1) == 0)
2335 {
2336 prev_res = var;
2337 continue;
2338 }
2339 var = permute_vec_elements (prev_res, var,
2340 perm_mask, stmt, gsi);
2341 new_stmt = SSA_NAME_DEF_STMT (var);
2342 }
2343
2344 if (prev_stmt_info == NULL)
2345 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2346 else
2347 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2348 prev_stmt_info = vinfo_for_stmt (new_stmt);
2349 }
3efe2e2c
JJ
2350
2351 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2352 from the IL. */
e6f5c25d
IE
2353 if (STMT_VINFO_RELATED_STMT (stmt_info))
2354 {
2355 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2356 stmt_info = vinfo_for_stmt (stmt);
2357 }
3efe2e2c
JJ
2358 tree lhs = gimple_call_lhs (stmt);
2359 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2360 set_vinfo_for_stmt (new_stmt, stmt_info);
2361 set_vinfo_for_stmt (stmt, NULL);
2362 STMT_VINFO_STMT (stmt_info) = new_stmt;
2363 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2364 return true;
2365 }
2de001ee 2366 else if (vls_type != VLS_LOAD)
5ce9450f
JJ
2367 {
2368 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2369 prev_stmt_info = NULL;
2d4dc223 2370 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2371 for (i = 0; i < ncopies; i++)
2372 {
2373 unsigned align, misalign;
2374
2375 if (i == 0)
2376 {
2377 tree rhs = gimple_call_arg (stmt, 3);
81c40241 2378 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
7251b0bf
RS
2379 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2380 mask_vectype);
5ce9450f
JJ
2381 /* We should have catched mismatched types earlier. */
2382 gcc_assert (useless_type_conversion_p (vectype,
2383 TREE_TYPE (vec_rhs)));
2384 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2385 NULL_TREE, &dummy, gsi,
2386 &ptr_incr, false, &inv_p);
2387 gcc_assert (!inv_p);
2388 }
2389 else
2390 {
81c40241 2391 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2392 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2393 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2394 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2395 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2396 TYPE_SIZE_UNIT (vectype));
2397 }
2398
f702e7d4 2399 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2400 if (aligned_access_p (dr))
2401 misalign = 0;
2402 else if (DR_MISALIGNMENT (dr) == -1)
2403 {
2404 align = TYPE_ALIGN_UNIT (elem_type);
2405 misalign = 0;
2406 }
2407 else
2408 misalign = DR_MISALIGNMENT (dr);
2409 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2410 misalign);
08554c26 2411 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2412 misalign ? least_bit_hwi (misalign) : align);
a844293d 2413 gcall *call
5ce9450f 2414 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2415 ptr, vec_mask, vec_rhs);
a844293d
RS
2416 gimple_call_set_nothrow (call, true);
2417 new_stmt = call;
5ce9450f
JJ
2418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2419 if (i == 0)
2420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2421 else
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2423 prev_stmt_info = vinfo_for_stmt (new_stmt);
2424 }
2425 }
2426 else
2427 {
2428 tree vec_mask = NULL_TREE;
2429 prev_stmt_info = NULL;
2430 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2431 for (i = 0; i < ncopies; i++)
2432 {
2433 unsigned align, misalign;
2434
2435 if (i == 0)
2436 {
7251b0bf
RS
2437 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2438 mask_vectype);
5ce9450f
JJ
2439 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2440 NULL_TREE, &dummy, gsi,
2441 &ptr_incr, false, &inv_p);
2442 gcc_assert (!inv_p);
2443 }
2444 else
2445 {
81c40241 2446 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2447 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2448 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2449 TYPE_SIZE_UNIT (vectype));
2450 }
2451
f702e7d4 2452 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2453 if (aligned_access_p (dr))
2454 misalign = 0;
2455 else if (DR_MISALIGNMENT (dr) == -1)
2456 {
2457 align = TYPE_ALIGN_UNIT (elem_type);
2458 misalign = 0;
2459 }
2460 else
2461 misalign = DR_MISALIGNMENT (dr);
2462 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2463 misalign);
08554c26 2464 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2465 misalign ? least_bit_hwi (misalign) : align);
a844293d 2466 gcall *call
5ce9450f 2467 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2468 ptr, vec_mask);
a844293d
RS
2469 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2470 gimple_call_set_nothrow (call, true);
2471 vect_finish_stmt_generation (stmt, call, gsi);
5ce9450f 2472 if (i == 0)
a844293d 2473 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
5ce9450f 2474 else
a844293d
RS
2475 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2476 prev_stmt_info = vinfo_for_stmt (call);
5ce9450f
JJ
2477 }
2478 }
2479
2de001ee 2480 if (vls_type == VLS_LOAD)
3efe2e2c
JJ
2481 {
2482 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2483 from the IL. */
e6f5c25d
IE
2484 if (STMT_VINFO_RELATED_STMT (stmt_info))
2485 {
2486 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2487 stmt_info = vinfo_for_stmt (stmt);
2488 }
3efe2e2c
JJ
2489 tree lhs = gimple_call_lhs (stmt);
2490 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2491 set_vinfo_for_stmt (new_stmt, stmt_info);
2492 set_vinfo_for_stmt (stmt, NULL);
2493 STMT_VINFO_STMT (stmt_info) = new_stmt;
2494 gsi_replace (gsi, new_stmt, true);
2495 }
2496
5ce9450f
JJ
2497 return true;
2498}
2499
37b14185
RB
2500/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2501
2502static bool
2503vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2504 gimple **vec_stmt, slp_tree slp_node,
2505 tree vectype_in, enum vect_def_type *dt)
2506{
2507 tree op, vectype;
2508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2509 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2510 unsigned ncopies, nunits;
2511
2512 op = gimple_call_arg (stmt, 0);
2513 vectype = STMT_VINFO_VECTYPE (stmt_info);
2514 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2515
2516 /* Multiple types in SLP are handled by creating the appropriate number of
2517 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2518 case of SLP. */
2519 if (slp_node)
2520 ncopies = 1;
2521 else
e8f142e2 2522 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2523
2524 gcc_assert (ncopies >= 1);
2525
2526 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2527 if (! char_vectype)
2528 return false;
2529
794e3180 2530 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
794e3180 2531 unsigned word_bytes = num_bytes / nunits;
908a1a16 2532
d980067b
RS
2533 /* The encoding uses one stepped pattern for each byte in the word. */
2534 vec_perm_builder elts (num_bytes, word_bytes, 3);
2535 for (unsigned i = 0; i < 3; ++i)
37b14185 2536 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2537 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2538
e3342de4
RS
2539 vec_perm_indices indices (elts, 1, num_bytes);
2540 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2541 return false;
2542
2543 if (! vec_stmt)
2544 {
2545 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2548 "\n");
2549 if (! PURE_SLP_STMT (stmt_info))
2550 {
2551 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2552 1, vector_stmt, stmt_info, 0, vect_prologue);
2553 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2554 ncopies, vec_perm, stmt_info, 0, vect_body);
2555 }
2556 return true;
2557 }
2558
736d0f28 2559 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2560
2561 /* Transform. */
2562 vec<tree> vec_oprnds = vNULL;
2563 gimple *new_stmt = NULL;
2564 stmt_vec_info prev_stmt_info = NULL;
2565 for (unsigned j = 0; j < ncopies; j++)
2566 {
2567 /* Handle uses. */
2568 if (j == 0)
306b0c92 2569 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2570 else
2571 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2572
2573 /* Arguments are ready. create the new vector stmt. */
2574 unsigned i;
2575 tree vop;
2576 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2577 {
2578 tree tem = make_ssa_name (char_vectype);
2579 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2580 char_vectype, vop));
2581 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2582 tree tem2 = make_ssa_name (char_vectype);
2583 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2584 tem, tem, bswap_vconst);
2585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2586 tem = make_ssa_name (vectype);
2587 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2588 vectype, tem2));
2589 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2590 if (slp_node)
2591 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2592 }
2593
2594 if (slp_node)
2595 continue;
2596
2597 if (j == 0)
2598 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2599 else
2600 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2601
2602 prev_stmt_info = vinfo_for_stmt (new_stmt);
2603 }
2604
2605 vec_oprnds.release ();
2606 return true;
2607}
2608
b1b6836e
RS
2609/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2610 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2611 in a single step. On success, store the binary pack code in
2612 *CONVERT_CODE. */
2613
2614static bool
2615simple_integer_narrowing (tree vectype_out, tree vectype_in,
2616 tree_code *convert_code)
2617{
2618 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2619 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2620 return false;
2621
2622 tree_code code;
2623 int multi_step_cvt = 0;
2624 auto_vec <tree, 8> interm_types;
2625 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2626 &code, &multi_step_cvt,
2627 &interm_types)
2628 || multi_step_cvt)
2629 return false;
2630
2631 *convert_code = code;
2632 return true;
2633}
5ce9450f 2634
ebfd146a
IR
2635/* Function vectorizable_call.
2636
538dd0b7 2637 Check if GS performs a function call that can be vectorized.
b8698a0f 2638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2641
2642static bool
355fe088 2643vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2644 slp_tree slp_node)
ebfd146a 2645{
538dd0b7 2646 gcall *stmt;
ebfd146a
IR
2647 tree vec_dest;
2648 tree scalar_dest;
2649 tree op, type;
2650 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2651 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 2652 tree vectype_out, vectype_in;
c7bda0f4
RS
2653 poly_uint64 nunits_in;
2654 poly_uint64 nunits_out;
ebfd146a 2655 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2656 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2657 vec_info *vinfo = stmt_info->vinfo;
81c40241 2658 tree fndecl, new_temp, rhs_type;
355fe088 2659 gimple *def_stmt;
0502fb85
UB
2660 enum vect_def_type dt[3]
2661 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2662 int ndts = 3;
355fe088 2663 gimple *new_stmt = NULL;
ebfd146a 2664 int ncopies, j;
6e1aa848 2665 vec<tree> vargs = vNULL;
ebfd146a
IR
2666 enum { NARROW, NONE, WIDEN } modifier;
2667 size_t i, nargs;
9d5e7640 2668 tree lhs;
ebfd146a 2669
190c2236 2670 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2671 return false;
2672
66c16fd9
RB
2673 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2674 && ! vec_stmt)
ebfd146a
IR
2675 return false;
2676
538dd0b7
DM
2677 /* Is GS a vectorizable call? */
2678 stmt = dyn_cast <gcall *> (gs);
2679 if (!stmt)
ebfd146a
IR
2680 return false;
2681
5ce9450f
JJ
2682 if (gimple_call_internal_p (stmt)
2683 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2684 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2685 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2686 slp_node);
2687
0136f8f0
AH
2688 if (gimple_call_lhs (stmt) == NULL_TREE
2689 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2690 return false;
2691
0136f8f0 2692 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2693
b690cc0f
RG
2694 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2695
ebfd146a
IR
2696 /* Process function arguments. */
2697 rhs_type = NULL_TREE;
b690cc0f 2698 vectype_in = NULL_TREE;
ebfd146a
IR
2699 nargs = gimple_call_num_args (stmt);
2700
1b1562a5
MM
2701 /* Bail out if the function has more than three arguments, we do not have
2702 interesting builtin functions to vectorize with more than two arguments
2703 except for fma. No arguments is also not good. */
2704 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2705 return false;
2706
74bf76ed
JJ
2707 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2708 if (gimple_call_internal_p (stmt)
2709 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2710 {
2711 nargs = 0;
2712 rhs_type = unsigned_type_node;
2713 }
2714
ebfd146a
IR
2715 for (i = 0; i < nargs; i++)
2716 {
b690cc0f
RG
2717 tree opvectype;
2718
ebfd146a
IR
2719 op = gimple_call_arg (stmt, i);
2720
2721 /* We can only handle calls with arguments of the same type. */
2722 if (rhs_type
8533c9d8 2723 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2724 {
73fbfcad 2725 if (dump_enabled_p ())
78c60e3d 2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2727 "argument types differ.\n");
ebfd146a
IR
2728 return false;
2729 }
b690cc0f
RG
2730 if (!rhs_type)
2731 rhs_type = TREE_TYPE (op);
ebfd146a 2732
81c40241 2733 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2734 {
73fbfcad 2735 if (dump_enabled_p ())
78c60e3d 2736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2737 "use not simple.\n");
ebfd146a
IR
2738 return false;
2739 }
ebfd146a 2740
b690cc0f
RG
2741 if (!vectype_in)
2742 vectype_in = opvectype;
2743 else if (opvectype
2744 && opvectype != vectype_in)
2745 {
73fbfcad 2746 if (dump_enabled_p ())
78c60e3d 2747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2748 "argument vector types differ.\n");
b690cc0f
RG
2749 return false;
2750 }
2751 }
2752 /* If all arguments are external or constant defs use a vector type with
2753 the same size as the output vector type. */
ebfd146a 2754 if (!vectype_in)
b690cc0f 2755 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2756 if (vec_stmt)
2757 gcc_assert (vectype_in);
2758 if (!vectype_in)
2759 {
73fbfcad 2760 if (dump_enabled_p ())
7d8930a0 2761 {
78c60e3d
SS
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "no vectype for scalar type ");
2764 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2765 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2766 }
2767
2768 return false;
2769 }
ebfd146a
IR
2770
2771 /* FORNOW */
b690cc0f
RG
2772 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2773 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 2774 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 2775 modifier = NARROW;
c7bda0f4 2776 else if (known_eq (nunits_out, nunits_in))
ebfd146a 2777 modifier = NONE;
c7bda0f4 2778 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
2779 modifier = WIDEN;
2780 else
2781 return false;
2782
70439f0d
RS
2783 /* We only handle functions that do not read or clobber memory. */
2784 if (gimple_vuse (stmt))
2785 {
2786 if (dump_enabled_p ())
2787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2788 "function reads from or writes to memory.\n");
2789 return false;
2790 }
2791
ebfd146a
IR
2792 /* For now, we only vectorize functions if a target specific builtin
2793 is available. TODO -- in some cases, it might be profitable to
2794 insert the calls for pieces of the vector, in order to be able
2795 to vectorize other operations in the loop. */
70439f0d
RS
2796 fndecl = NULL_TREE;
2797 internal_fn ifn = IFN_LAST;
2798 combined_fn cfn = gimple_call_combined_fn (stmt);
2799 tree callee = gimple_call_fndecl (stmt);
2800
2801 /* First try using an internal function. */
b1b6836e
RS
2802 tree_code convert_code = ERROR_MARK;
2803 if (cfn != CFN_LAST
2804 && (modifier == NONE
2805 || (modifier == NARROW
2806 && simple_integer_narrowing (vectype_out, vectype_in,
2807 &convert_code))))
70439f0d
RS
2808 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2809 vectype_in);
2810
2811 /* If that fails, try asking for a target-specific built-in function. */
2812 if (ifn == IFN_LAST)
2813 {
2814 if (cfn != CFN_LAST)
2815 fndecl = targetm.vectorize.builtin_vectorized_function
2816 (cfn, vectype_out, vectype_in);
2817 else
2818 fndecl = targetm.vectorize.builtin_md_vectorized_function
2819 (callee, vectype_out, vectype_in);
2820 }
2821
2822 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2823 {
70439f0d 2824 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2825 && !slp_node
2826 && loop_vinfo
2827 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2828 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2829 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2830 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2831 {
2832 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2833 { 0, 1, 2, ... vf - 1 } vector. */
2834 gcc_assert (nargs == 0);
2835 }
37b14185
RB
2836 else if (modifier == NONE
2837 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2838 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2839 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2840 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2841 vectype_in, dt);
74bf76ed
JJ
2842 else
2843 {
2844 if (dump_enabled_p ())
2845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2846 "function is not vectorizable.\n");
74bf76ed
JJ
2847 return false;
2848 }
ebfd146a
IR
2849 }
2850
fce57248 2851 if (slp_node)
190c2236 2852 ncopies = 1;
b1b6836e 2853 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2854 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2855 else
e8f142e2 2856 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2857
2858 /* Sanity check: make sure that at least one copy of the vectorized stmt
2859 needs to be generated. */
2860 gcc_assert (ncopies >= 1);
2861
2862 if (!vec_stmt) /* transformation not required. */
2863 {
2864 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2865 if (dump_enabled_p ())
e645e942
TJ
2866 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2867 "\n");
4fc5ebf1 2868 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2869 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2870 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2871 vec_promote_demote, stmt_info, 0, vect_body);
2872
ebfd146a
IR
2873 return true;
2874 }
2875
67b8dbac 2876 /* Transform. */
ebfd146a 2877
73fbfcad 2878 if (dump_enabled_p ())
e645e942 2879 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2880
2881 /* Handle def. */
2882 scalar_dest = gimple_call_lhs (stmt);
2883 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2884
2885 prev_stmt_info = NULL;
b1b6836e 2886 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2887 {
b1b6836e 2888 tree prev_res = NULL_TREE;
ebfd146a
IR
2889 for (j = 0; j < ncopies; ++j)
2890 {
2891 /* Build argument list for the vectorized call. */
2892 if (j == 0)
9771b263 2893 vargs.create (nargs);
ebfd146a 2894 else
9771b263 2895 vargs.truncate (0);
ebfd146a 2896
190c2236
JJ
2897 if (slp_node)
2898 {
ef062b13 2899 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2900 vec<tree> vec_oprnds0;
190c2236
JJ
2901
2902 for (i = 0; i < nargs; i++)
9771b263 2903 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2904 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2905 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2906
2907 /* Arguments are ready. Create the new vector stmt. */
9771b263 2908 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2909 {
2910 size_t k;
2911 for (k = 0; k < nargs; k++)
2912 {
37b5ec8f 2913 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2914 vargs[k] = vec_oprndsk[i];
190c2236 2915 }
b1b6836e
RS
2916 if (modifier == NARROW)
2917 {
2918 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2919 gcall *call
2920 = gimple_build_call_internal_vec (ifn, vargs);
2921 gimple_call_set_lhs (call, half_res);
2922 gimple_call_set_nothrow (call, true);
2923 new_stmt = call;
b1b6836e
RS
2924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925 if ((i & 1) == 0)
2926 {
2927 prev_res = half_res;
2928 continue;
2929 }
2930 new_temp = make_ssa_name (vec_dest);
2931 new_stmt = gimple_build_assign (new_temp, convert_code,
2932 prev_res, half_res);
2933 }
70439f0d 2934 else
b1b6836e 2935 {
a844293d 2936 gcall *call;
b1b6836e 2937 if (ifn != IFN_LAST)
a844293d 2938 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2939 else
a844293d
RS
2940 call = gimple_build_call_vec (fndecl, vargs);
2941 new_temp = make_ssa_name (vec_dest, call);
2942 gimple_call_set_lhs (call, new_temp);
2943 gimple_call_set_nothrow (call, true);
2944 new_stmt = call;
b1b6836e 2945 }
190c2236 2946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2947 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2948 }
2949
2950 for (i = 0; i < nargs; i++)
2951 {
37b5ec8f 2952 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2953 vec_oprndsi.release ();
190c2236 2954 }
190c2236
JJ
2955 continue;
2956 }
2957
ebfd146a
IR
2958 for (i = 0; i < nargs; i++)
2959 {
2960 op = gimple_call_arg (stmt, i);
2961 if (j == 0)
2962 vec_oprnd0
81c40241 2963 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2964 else
63827fb8
IR
2965 {
2966 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2967 vec_oprnd0
2968 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2969 }
ebfd146a 2970
9771b263 2971 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2972 }
2973
74bf76ed
JJ
2974 if (gimple_call_internal_p (stmt)
2975 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2976 {
c7bda0f4 2977 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 2978 tree new_var
0e22bb5a 2979 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2980 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2981 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2982 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2983 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2984 }
b1b6836e
RS
2985 else if (modifier == NARROW)
2986 {
2987 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2988 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2989 gimple_call_set_lhs (call, half_res);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
b1b6836e
RS
2992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2993 if ((j & 1) == 0)
2994 {
2995 prev_res = half_res;
2996 continue;
2997 }
2998 new_temp = make_ssa_name (vec_dest);
2999 new_stmt = gimple_build_assign (new_temp, convert_code,
3000 prev_res, half_res);
3001 }
74bf76ed
JJ
3002 else
3003 {
a844293d 3004 gcall *call;
70439f0d 3005 if (ifn != IFN_LAST)
a844293d 3006 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3007 else
a844293d 3008 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3009 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3010 gimple_call_set_lhs (call, new_temp);
3011 gimple_call_set_nothrow (call, true);
3012 new_stmt = call;
74bf76ed 3013 }
ebfd146a
IR
3014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3015
b1b6836e 3016 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3017 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3018 else
3019 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3020
3021 prev_stmt_info = vinfo_for_stmt (new_stmt);
3022 }
b1b6836e
RS
3023 }
3024 else if (modifier == NARROW)
3025 {
ebfd146a
IR
3026 for (j = 0; j < ncopies; ++j)
3027 {
3028 /* Build argument list for the vectorized call. */
3029 if (j == 0)
9771b263 3030 vargs.create (nargs * 2);
ebfd146a 3031 else
9771b263 3032 vargs.truncate (0);
ebfd146a 3033
190c2236
JJ
3034 if (slp_node)
3035 {
ef062b13 3036 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3037 vec<tree> vec_oprnds0;
190c2236
JJ
3038
3039 for (i = 0; i < nargs; i++)
9771b263 3040 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3041 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3042 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3043
3044 /* Arguments are ready. Create the new vector stmt. */
9771b263 3045 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3046 {
3047 size_t k;
9771b263 3048 vargs.truncate (0);
190c2236
JJ
3049 for (k = 0; k < nargs; k++)
3050 {
37b5ec8f 3051 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3052 vargs.quick_push (vec_oprndsk[i]);
3053 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3054 }
a844293d 3055 gcall *call;
70439f0d 3056 if (ifn != IFN_LAST)
a844293d 3057 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3058 else
a844293d
RS
3059 call = gimple_build_call_vec (fndecl, vargs);
3060 new_temp = make_ssa_name (vec_dest, call);
3061 gimple_call_set_lhs (call, new_temp);
3062 gimple_call_set_nothrow (call, true);
3063 new_stmt = call;
190c2236 3064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3065 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3066 }
3067
3068 for (i = 0; i < nargs; i++)
3069 {
37b5ec8f 3070 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3071 vec_oprndsi.release ();
190c2236 3072 }
190c2236
JJ
3073 continue;
3074 }
3075
ebfd146a
IR
3076 for (i = 0; i < nargs; i++)
3077 {
3078 op = gimple_call_arg (stmt, i);
3079 if (j == 0)
3080 {
3081 vec_oprnd0
81c40241 3082 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3083 vec_oprnd1
63827fb8 3084 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3085 }
3086 else
3087 {
336ecb65 3088 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3089 vec_oprnd0
63827fb8 3090 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3091 vec_oprnd1
63827fb8 3092 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3093 }
3094
9771b263
DN
3095 vargs.quick_push (vec_oprnd0);
3096 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3097 }
3098
b1b6836e 3099 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3100 new_temp = make_ssa_name (vec_dest, new_stmt);
3101 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3102 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3103
3104 if (j == 0)
3105 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3106 else
3107 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3108
3109 prev_stmt_info = vinfo_for_stmt (new_stmt);
3110 }
3111
3112 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3113 }
b1b6836e
RS
3114 else
3115 /* No current target implements this case. */
3116 return false;
ebfd146a 3117
9771b263 3118 vargs.release ();
ebfd146a 3119
ebfd146a
IR
3120 /* The call in STMT might prevent it from being removed in dce.
3121 We however cannot remove it here, due to the way the ssa name
3122 it defines is mapped to the new definition. So just replace
3123 rhs of the statement with something harmless. */
3124
dd34c087
JJ
3125 if (slp_node)
3126 return true;
3127
ebfd146a 3128 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3129 if (is_pattern_stmt_p (stmt_info))
3130 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3131 else
3132 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3133
9d5e7640 3134 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3135 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3136 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3137 STMT_VINFO_STMT (stmt_info) = new_stmt;
3138 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3139
3140 return true;
3141}
3142
3143
0136f8f0
AH
3144struct simd_call_arg_info
3145{
3146 tree vectype;
3147 tree op;
0136f8f0 3148 HOST_WIDE_INT linear_step;
34e82342 3149 enum vect_def_type dt;
0136f8f0 3150 unsigned int align;
17b658af 3151 bool simd_lane_linear;
0136f8f0
AH
3152};
3153
17b658af
JJ
3154/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3155 is linear within simd lane (but not within whole loop), note it in
3156 *ARGINFO. */
3157
3158static void
3159vect_simd_lane_linear (tree op, struct loop *loop,
3160 struct simd_call_arg_info *arginfo)
3161{
355fe088 3162 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3163
3164 if (!is_gimple_assign (def_stmt)
3165 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3166 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3167 return;
3168
3169 tree base = gimple_assign_rhs1 (def_stmt);
3170 HOST_WIDE_INT linear_step = 0;
3171 tree v = gimple_assign_rhs2 (def_stmt);
3172 while (TREE_CODE (v) == SSA_NAME)
3173 {
3174 tree t;
3175 def_stmt = SSA_NAME_DEF_STMT (v);
3176 if (is_gimple_assign (def_stmt))
3177 switch (gimple_assign_rhs_code (def_stmt))
3178 {
3179 case PLUS_EXPR:
3180 t = gimple_assign_rhs2 (def_stmt);
3181 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3182 return;
3183 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3184 v = gimple_assign_rhs1 (def_stmt);
3185 continue;
3186 case MULT_EXPR:
3187 t = gimple_assign_rhs2 (def_stmt);
3188 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3189 return;
3190 linear_step = tree_to_shwi (t);
3191 v = gimple_assign_rhs1 (def_stmt);
3192 continue;
3193 CASE_CONVERT:
3194 t = gimple_assign_rhs1 (def_stmt);
3195 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3196 || (TYPE_PRECISION (TREE_TYPE (v))
3197 < TYPE_PRECISION (TREE_TYPE (t))))
3198 return;
3199 if (!linear_step)
3200 linear_step = 1;
3201 v = t;
3202 continue;
3203 default:
3204 return;
3205 }
8e4284d0 3206 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3207 && loop->simduid
3208 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3209 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3210 == loop->simduid))
3211 {
3212 if (!linear_step)
3213 linear_step = 1;
3214 arginfo->linear_step = linear_step;
3215 arginfo->op = base;
3216 arginfo->simd_lane_linear = true;
3217 return;
3218 }
3219 }
3220}
3221
0136f8f0
AH
3222/* Function vectorizable_simd_clone_call.
3223
3224 Check if STMT performs a function call that can be vectorized
3225 by calling a simd clone of the function.
3226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3229
3230static bool
355fe088
TS
3231vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3232 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3233{
3234 tree vec_dest;
3235 tree scalar_dest;
3236 tree op, type;
3237 tree vec_oprnd0 = NULL_TREE;
3238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3239 tree vectype;
3240 unsigned int nunits;
3241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3242 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3243 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3244 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3245 tree fndecl, new_temp;
355fe088
TS
3246 gimple *def_stmt;
3247 gimple *new_stmt = NULL;
0136f8f0 3248 int ncopies, j;
00426f9a 3249 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3250 vec<tree> vargs = vNULL;
3251 size_t i, nargs;
3252 tree lhs, rtype, ratype;
e7a74006 3253 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3254
3255 /* Is STMT a vectorizable call? */
3256 if (!is_gimple_call (stmt))
3257 return false;
3258
3259 fndecl = gimple_call_fndecl (stmt);
3260 if (fndecl == NULL_TREE)
3261 return false;
3262
d52f5295 3263 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3264 if (node == NULL || node->simd_clones == NULL)
3265 return false;
3266
3267 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3268 return false;
3269
66c16fd9
RB
3270 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3271 && ! vec_stmt)
0136f8f0
AH
3272 return false;
3273
3274 if (gimple_call_lhs (stmt)
3275 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3276 return false;
3277
3278 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3279
3280 vectype = STMT_VINFO_VECTYPE (stmt_info);
3281
3282 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3283 return false;
3284
3285 /* FORNOW */
fce57248 3286 if (slp_node)
0136f8f0
AH
3287 return false;
3288
3289 /* Process function arguments. */
3290 nargs = gimple_call_num_args (stmt);
3291
3292 /* Bail out if the function has zero arguments. */
3293 if (nargs == 0)
3294 return false;
3295
00426f9a 3296 arginfo.reserve (nargs, true);
0136f8f0
AH
3297
3298 for (i = 0; i < nargs; i++)
3299 {
3300 simd_call_arg_info thisarginfo;
3301 affine_iv iv;
3302
3303 thisarginfo.linear_step = 0;
3304 thisarginfo.align = 0;
3305 thisarginfo.op = NULL_TREE;
17b658af 3306 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3307
3308 op = gimple_call_arg (stmt, i);
81c40241
RB
3309 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3310 &thisarginfo.vectype)
0136f8f0
AH
3311 || thisarginfo.dt == vect_uninitialized_def)
3312 {
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 "use not simple.\n");
0136f8f0
AH
3316 return false;
3317 }
3318
3319 if (thisarginfo.dt == vect_constant_def
3320 || thisarginfo.dt == vect_external_def)
3321 gcc_assert (thisarginfo.vectype == NULL_TREE);
3322 else
3323 gcc_assert (thisarginfo.vectype != NULL_TREE);
3324
6c9e85fb
JJ
3325 /* For linear arguments, the analyze phase should have saved
3326 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3327 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3328 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3329 {
3330 gcc_assert (vec_stmt);
3331 thisarginfo.linear_step
17b658af 3332 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3333 thisarginfo.op
17b658af
JJ
3334 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3335 thisarginfo.simd_lane_linear
3336 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3337 == boolean_true_node);
6c9e85fb
JJ
3338 /* If loop has been peeled for alignment, we need to adjust it. */
3339 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3340 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3341 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3342 {
3343 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3344 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3345 tree opt = TREE_TYPE (thisarginfo.op);
3346 bias = fold_convert (TREE_TYPE (step), bias);
3347 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3348 thisarginfo.op
3349 = fold_build2 (POINTER_TYPE_P (opt)
3350 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3351 thisarginfo.op, bias);
3352 }
3353 }
3354 else if (!vec_stmt
3355 && thisarginfo.dt != vect_constant_def
3356 && thisarginfo.dt != vect_external_def
3357 && loop_vinfo
3358 && TREE_CODE (op) == SSA_NAME
3359 && simple_iv (loop, loop_containing_stmt (stmt), op,
3360 &iv, false)
3361 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3362 {
3363 thisarginfo.linear_step = tree_to_shwi (iv.step);
3364 thisarginfo.op = iv.base;
3365 }
3366 else if ((thisarginfo.dt == vect_constant_def
3367 || thisarginfo.dt == vect_external_def)
3368 && POINTER_TYPE_P (TREE_TYPE (op)))
3369 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3370 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3371 linear too. */
3372 if (POINTER_TYPE_P (TREE_TYPE (op))
3373 && !thisarginfo.linear_step
3374 && !vec_stmt
3375 && thisarginfo.dt != vect_constant_def
3376 && thisarginfo.dt != vect_external_def
3377 && loop_vinfo
3378 && !slp_node
3379 && TREE_CODE (op) == SSA_NAME)
3380 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3381
3382 arginfo.quick_push (thisarginfo);
3383 }
3384
d9f21f6a
RS
3385 unsigned HOST_WIDE_INT vf;
3386 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3387 {
3388 if (dump_enabled_p ())
3389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3390 "not considering SIMD clones; not yet supported"
3391 " for variable-width vectors.\n");
3392 return NULL;
3393 }
3394
0136f8f0
AH
3395 unsigned int badness = 0;
3396 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3397 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3398 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3399 else
3400 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3401 n = n->simdclone->next_clone)
3402 {
3403 unsigned int this_badness = 0;
d9f21f6a 3404 if (n->simdclone->simdlen > vf
0136f8f0
AH
3405 || n->simdclone->nargs != nargs)
3406 continue;
d9f21f6a
RS
3407 if (n->simdclone->simdlen < vf)
3408 this_badness += (exact_log2 (vf)
0136f8f0
AH
3409 - exact_log2 (n->simdclone->simdlen)) * 1024;
3410 if (n->simdclone->inbranch)
3411 this_badness += 2048;
3412 int target_badness = targetm.simd_clone.usable (n);
3413 if (target_badness < 0)
3414 continue;
3415 this_badness += target_badness * 512;
3416 /* FORNOW: Have to add code to add the mask argument. */
3417 if (n->simdclone->inbranch)
3418 continue;
3419 for (i = 0; i < nargs; i++)
3420 {
3421 switch (n->simdclone->args[i].arg_type)
3422 {
3423 case SIMD_CLONE_ARG_TYPE_VECTOR:
3424 if (!useless_type_conversion_p
3425 (n->simdclone->args[i].orig_type,
3426 TREE_TYPE (gimple_call_arg (stmt, i))))
3427 i = -1;
3428 else if (arginfo[i].dt == vect_constant_def
3429 || arginfo[i].dt == vect_external_def
3430 || arginfo[i].linear_step)
3431 this_badness += 64;
3432 break;
3433 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3434 if (arginfo[i].dt != vect_constant_def
3435 && arginfo[i].dt != vect_external_def)
3436 i = -1;
3437 break;
3438 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3439 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3440 if (arginfo[i].dt == vect_constant_def
3441 || arginfo[i].dt == vect_external_def
3442 || (arginfo[i].linear_step
3443 != n->simdclone->args[i].linear_step))
3444 i = -1;
3445 break;
3446 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3447 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3448 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3449 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3450 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3451 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3452 /* FORNOW */
3453 i = -1;
3454 break;
3455 case SIMD_CLONE_ARG_TYPE_MASK:
3456 gcc_unreachable ();
3457 }
3458 if (i == (size_t) -1)
3459 break;
3460 if (n->simdclone->args[i].alignment > arginfo[i].align)
3461 {
3462 i = -1;
3463 break;
3464 }
3465 if (arginfo[i].align)
3466 this_badness += (exact_log2 (arginfo[i].align)
3467 - exact_log2 (n->simdclone->args[i].alignment));
3468 }
3469 if (i == (size_t) -1)
3470 continue;
3471 if (bestn == NULL || this_badness < badness)
3472 {
3473 bestn = n;
3474 badness = this_badness;
3475 }
3476 }
3477
3478 if (bestn == NULL)
00426f9a 3479 return false;
0136f8f0
AH
3480
3481 for (i = 0; i < nargs; i++)
3482 if ((arginfo[i].dt == vect_constant_def
3483 || arginfo[i].dt == vect_external_def)
3484 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3485 {
3486 arginfo[i].vectype
3487 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3488 i)));
3489 if (arginfo[i].vectype == NULL
3490 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3491 > bestn->simdclone->simdlen))
00426f9a 3492 return false;
0136f8f0
AH
3493 }
3494
3495 fndecl = bestn->decl;
3496 nunits = bestn->simdclone->simdlen;
d9f21f6a 3497 ncopies = vf / nunits;
0136f8f0
AH
3498
3499 /* If the function isn't const, only allow it in simd loops where user
3500 has asserted that at least nunits consecutive iterations can be
3501 performed using SIMD instructions. */
3502 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3503 && gimple_vuse (stmt))
00426f9a 3504 return false;
0136f8f0
AH
3505
3506 /* Sanity check: make sure that at least one copy of the vectorized stmt
3507 needs to be generated. */
3508 gcc_assert (ncopies >= 1);
3509
3510 if (!vec_stmt) /* transformation not required. */
3511 {
6c9e85fb
JJ
3512 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3513 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3514 if ((bestn->simdclone->args[i].arg_type
3515 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3516 || (bestn->simdclone->args[i].arg_type
3517 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3518 {
17b658af 3519 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3520 + 1);
3521 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3522 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3523 ? size_type_node : TREE_TYPE (arginfo[i].op);
3524 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3525 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3526 tree sll = arginfo[i].simd_lane_linear
3527 ? boolean_true_node : boolean_false_node;
3528 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3529 }
0136f8f0
AH
3530 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3531 if (dump_enabled_p ())
3532 dump_printf_loc (MSG_NOTE, vect_location,
3533 "=== vectorizable_simd_clone_call ===\n");
3534/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3535 return true;
3536 }
3537
67b8dbac 3538 /* Transform. */
0136f8f0
AH
3539
3540 if (dump_enabled_p ())
3541 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3542
3543 /* Handle def. */
3544 scalar_dest = gimple_call_lhs (stmt);
3545 vec_dest = NULL_TREE;
3546 rtype = NULL_TREE;
3547 ratype = NULL_TREE;
3548 if (scalar_dest)
3549 {
3550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3551 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3552 if (TREE_CODE (rtype) == ARRAY_TYPE)
3553 {
3554 ratype = rtype;
3555 rtype = TREE_TYPE (ratype);
3556 }
3557 }
3558
3559 prev_stmt_info = NULL;
3560 for (j = 0; j < ncopies; ++j)
3561 {
3562 /* Build argument list for the vectorized call. */
3563 if (j == 0)
3564 vargs.create (nargs);
3565 else
3566 vargs.truncate (0);
3567
3568 for (i = 0; i < nargs; i++)
3569 {
3570 unsigned int k, l, m, o;
3571 tree atype;
3572 op = gimple_call_arg (stmt, i);
3573 switch (bestn->simdclone->args[i].arg_type)
3574 {
3575 case SIMD_CLONE_ARG_TYPE_VECTOR:
3576 atype = bestn->simdclone->args[i].vector_type;
3577 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3578 for (m = j * o; m < (j + 1) * o; m++)
3579 {
3580 if (TYPE_VECTOR_SUBPARTS (atype)
3581 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3582 {
3583 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3584 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3585 / TYPE_VECTOR_SUBPARTS (atype));
3586 gcc_assert ((k & (k - 1)) == 0);
3587 if (m == 0)
3588 vec_oprnd0
81c40241 3589 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3590 else
3591 {
3592 vec_oprnd0 = arginfo[i].op;
3593 if ((m & (k - 1)) == 0)
3594 vec_oprnd0
3595 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3596 vec_oprnd0);
3597 }
3598 arginfo[i].op = vec_oprnd0;
3599 vec_oprnd0
3600 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3601 bitsize_int (prec),
0136f8f0
AH
3602 bitsize_int ((m & (k - 1)) * prec));
3603 new_stmt
b731b390 3604 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3605 vec_oprnd0);
3606 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3607 vargs.safe_push (gimple_assign_lhs (new_stmt));
3608 }
3609 else
3610 {
3611 k = (TYPE_VECTOR_SUBPARTS (atype)
3612 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3613 gcc_assert ((k & (k - 1)) == 0);
3614 vec<constructor_elt, va_gc> *ctor_elts;
3615 if (k != 1)
3616 vec_alloc (ctor_elts, k);
3617 else
3618 ctor_elts = NULL;
3619 for (l = 0; l < k; l++)
3620 {
3621 if (m == 0 && l == 0)
3622 vec_oprnd0
81c40241 3623 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3624 else
3625 vec_oprnd0
3626 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3627 arginfo[i].op);
3628 arginfo[i].op = vec_oprnd0;
3629 if (k == 1)
3630 break;
3631 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3632 vec_oprnd0);
3633 }
3634 if (k == 1)
3635 vargs.safe_push (vec_oprnd0);
3636 else
3637 {
3638 vec_oprnd0 = build_constructor (atype, ctor_elts);
3639 new_stmt
b731b390 3640 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3641 vec_oprnd0);
3642 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3643 vargs.safe_push (gimple_assign_lhs (new_stmt));
3644 }
3645 }
3646 }
3647 break;
3648 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3649 vargs.safe_push (op);
3650 break;
3651 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3652 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3653 if (j == 0)
3654 {
3655 gimple_seq stmts;
3656 arginfo[i].op
3657 = force_gimple_operand (arginfo[i].op, &stmts, true,
3658 NULL_TREE);
3659 if (stmts != NULL)
3660 {
3661 basic_block new_bb;
3662 edge pe = loop_preheader_edge (loop);
3663 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3664 gcc_assert (!new_bb);
3665 }
17b658af
JJ
3666 if (arginfo[i].simd_lane_linear)
3667 {
3668 vargs.safe_push (arginfo[i].op);
3669 break;
3670 }
b731b390 3671 tree phi_res = copy_ssa_name (op);
538dd0b7 3672 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3673 set_vinfo_for_stmt (new_phi,
310213d4 3674 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3675 add_phi_arg (new_phi, arginfo[i].op,
3676 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3677 enum tree_code code
3678 = POINTER_TYPE_P (TREE_TYPE (op))
3679 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3680 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3681 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3682 widest_int cst
3683 = wi::mul (bestn->simdclone->args[i].linear_step,
3684 ncopies * nunits);
3685 tree tcst = wide_int_to_tree (type, cst);
b731b390 3686 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3687 new_stmt
3688 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3689 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3690 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3691 set_vinfo_for_stmt (new_stmt,
310213d4 3692 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3693 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3694 UNKNOWN_LOCATION);
3695 arginfo[i].op = phi_res;
3696 vargs.safe_push (phi_res);
3697 }
3698 else
3699 {
3700 enum tree_code code
3701 = POINTER_TYPE_P (TREE_TYPE (op))
3702 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3703 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3704 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3705 widest_int cst
3706 = wi::mul (bestn->simdclone->args[i].linear_step,
3707 j * nunits);
3708 tree tcst = wide_int_to_tree (type, cst);
b731b390 3709 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3710 new_stmt = gimple_build_assign (new_temp, code,
3711 arginfo[i].op, tcst);
0136f8f0
AH
3712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713 vargs.safe_push (new_temp);
3714 }
3715 break;
7adb26f2
JJ
3716 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3717 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3718 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3719 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3720 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3721 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3722 default:
3723 gcc_unreachable ();
3724 }
3725 }
3726
3727 new_stmt = gimple_build_call_vec (fndecl, vargs);
3728 if (vec_dest)
3729 {
3730 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3731 if (ratype)
b731b390 3732 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3733 else if (TYPE_VECTOR_SUBPARTS (vectype)
3734 == TYPE_VECTOR_SUBPARTS (rtype))
3735 new_temp = make_ssa_name (vec_dest, new_stmt);
3736 else
3737 new_temp = make_ssa_name (rtype, new_stmt);
3738 gimple_call_set_lhs (new_stmt, new_temp);
3739 }
3740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3741
3742 if (vec_dest)
3743 {
3744 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3745 {
3746 unsigned int k, l;
3747 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3748 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3749 gcc_assert ((k & (k - 1)) == 0);
3750 for (l = 0; l < k; l++)
3751 {
3752 tree t;
3753 if (ratype)
3754 {
3755 t = build_fold_addr_expr (new_temp);
3756 t = build2 (MEM_REF, vectype, t,
3757 build_int_cst (TREE_TYPE (t),
3758 l * prec / BITS_PER_UNIT));
3759 }
3760 else
3761 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3762 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3763 new_stmt
b731b390 3764 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3765 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3766 if (j == 0 && l == 0)
3767 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3768 else
3769 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3770
3771 prev_stmt_info = vinfo_for_stmt (new_stmt);
3772 }
3773
3774 if (ratype)
3775 {
3776 tree clobber = build_constructor (ratype, NULL);
3777 TREE_THIS_VOLATILE (clobber) = 1;
3778 new_stmt = gimple_build_assign (new_temp, clobber);
3779 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3780 }
3781 continue;
3782 }
3783 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3784 {
3785 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3786 / TYPE_VECTOR_SUBPARTS (rtype));
3787 gcc_assert ((k & (k - 1)) == 0);
3788 if ((j & (k - 1)) == 0)
3789 vec_alloc (ret_ctor_elts, k);
3790 if (ratype)
3791 {
3792 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3793 for (m = 0; m < o; m++)
3794 {
3795 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3796 size_int (m), NULL_TREE, NULL_TREE);
3797 new_stmt
b731b390 3798 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3799 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3800 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3801 gimple_assign_lhs (new_stmt));
3802 }
3803 tree clobber = build_constructor (ratype, NULL);
3804 TREE_THIS_VOLATILE (clobber) = 1;
3805 new_stmt = gimple_build_assign (new_temp, clobber);
3806 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3807 }
3808 else
3809 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3810 if ((j & (k - 1)) != k - 1)
3811 continue;
3812 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3813 new_stmt
b731b390 3814 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3815 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3816
3817 if ((unsigned) j == k - 1)
3818 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3819 else
3820 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3821
3822 prev_stmt_info = vinfo_for_stmt (new_stmt);
3823 continue;
3824 }
3825 else if (ratype)
3826 {
3827 tree t = build_fold_addr_expr (new_temp);
3828 t = build2 (MEM_REF, vectype, t,
3829 build_int_cst (TREE_TYPE (t), 0));
3830 new_stmt
b731b390 3831 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3832 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3833 tree clobber = build_constructor (ratype, NULL);
3834 TREE_THIS_VOLATILE (clobber) = 1;
3835 vect_finish_stmt_generation (stmt,
3836 gimple_build_assign (new_temp,
3837 clobber), gsi);
3838 }
3839 }
3840
3841 if (j == 0)
3842 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3843 else
3844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3845
3846 prev_stmt_info = vinfo_for_stmt (new_stmt);
3847 }
3848
3849 vargs.release ();
3850
3851 /* The call in STMT might prevent it from being removed in dce.
3852 We however cannot remove it here, due to the way the ssa name
3853 it defines is mapped to the new definition. So just replace
3854 rhs of the statement with something harmless. */
3855
3856 if (slp_node)
3857 return true;
3858
3859 if (scalar_dest)
3860 {
3861 type = TREE_TYPE (scalar_dest);
3862 if (is_pattern_stmt_p (stmt_info))
3863 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3864 else
3865 lhs = gimple_call_lhs (stmt);
3866 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3867 }
3868 else
3869 new_stmt = gimple_build_nop ();
3870 set_vinfo_for_stmt (new_stmt, stmt_info);
3871 set_vinfo_for_stmt (stmt, NULL);
3872 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3873 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3874 unlink_stmt_vdef (stmt);
3875
3876 return true;
3877}
3878
3879
ebfd146a
IR
3880/* Function vect_gen_widened_results_half
3881
3882 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3883 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3884 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3885 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3886 needs to be created (DECL is a function-decl of a target-builtin).
3887 STMT is the original scalar stmt that we are vectorizing. */
3888
355fe088 3889static gimple *
ebfd146a
IR
3890vect_gen_widened_results_half (enum tree_code code,
3891 tree decl,
3892 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3893 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3894 gimple *stmt)
b8698a0f 3895{
355fe088 3896 gimple *new_stmt;
b8698a0f
L
3897 tree new_temp;
3898
3899 /* Generate half of the widened result: */
3900 if (code == CALL_EXPR)
3901 {
3902 /* Target specific support */
ebfd146a
IR
3903 if (op_type == binary_op)
3904 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3905 else
3906 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3907 new_temp = make_ssa_name (vec_dest, new_stmt);
3908 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3909 }
3910 else
ebfd146a 3911 {
b8698a0f
L
3912 /* Generic support */
3913 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3914 if (op_type != binary_op)
3915 vec_oprnd1 = NULL;
0d0e4a03 3916 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3917 new_temp = make_ssa_name (vec_dest, new_stmt);
3918 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3919 }
ebfd146a
IR
3920 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3921
ebfd146a
IR
3922 return new_stmt;
3923}
3924
4a00c761
JJ
3925
3926/* Get vectorized definitions for loop-based vectorization. For the first
3927 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3928 scalar operand), and for the rest we get a copy with
3929 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3930 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3931 The vectors are collected into VEC_OPRNDS. */
3932
3933static void
355fe088 3934vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3935 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3936{
3937 tree vec_oprnd;
3938
3939 /* Get first vector operand. */
3940 /* All the vector operands except the very first one (that is scalar oprnd)
3941 are stmt copies. */
3942 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3943 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3944 else
3945 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3946
9771b263 3947 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3948
3949 /* Get second vector operand. */
3950 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3951 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3952
3953 *oprnd = vec_oprnd;
3954
3955 /* For conversion in multiple steps, continue to get operands
3956 recursively. */
3957 if (multi_step_cvt)
3958 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3959}
3960
3961
3962/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3963 For multi-step conversions store the resulting vectors and call the function
3964 recursively. */
3965
3966static void
9771b263 3967vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3968 int multi_step_cvt, gimple *stmt,
9771b263 3969 vec<tree> vec_dsts,
4a00c761
JJ
3970 gimple_stmt_iterator *gsi,
3971 slp_tree slp_node, enum tree_code code,
3972 stmt_vec_info *prev_stmt_info)
3973{
3974 unsigned int i;
3975 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3976 gimple *new_stmt;
4a00c761
JJ
3977 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3978
9771b263 3979 vec_dest = vec_dsts.pop ();
4a00c761 3980
9771b263 3981 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3982 {
3983 /* Create demotion operation. */
9771b263
DN
3984 vop0 = (*vec_oprnds)[i];
3985 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3986 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3987 new_tmp = make_ssa_name (vec_dest, new_stmt);
3988 gimple_assign_set_lhs (new_stmt, new_tmp);
3989 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3990
3991 if (multi_step_cvt)
3992 /* Store the resulting vector for next recursive call. */
9771b263 3993 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3994 else
3995 {
3996 /* This is the last step of the conversion sequence. Store the
3997 vectors in SLP_NODE or in vector info of the scalar statement
3998 (or in STMT_VINFO_RELATED_STMT chain). */
3999 if (slp_node)
9771b263 4000 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4001 else
c689ce1e
RB
4002 {
4003 if (!*prev_stmt_info)
4004 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4005 else
4006 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4007
c689ce1e
RB
4008 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4009 }
4a00c761
JJ
4010 }
4011 }
4012
4013 /* For multi-step demotion operations we first generate demotion operations
4014 from the source type to the intermediate types, and then combine the
4015 results (stored in VEC_OPRNDS) in demotion operation to the destination
4016 type. */
4017 if (multi_step_cvt)
4018 {
4019 /* At each level of recursion we have half of the operands we had at the
4020 previous level. */
9771b263 4021 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4022 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4023 stmt, vec_dsts, gsi, slp_node,
4024 VEC_PACK_TRUNC_EXPR,
4025 prev_stmt_info);
4026 }
4027
9771b263 4028 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4029}
4030
4031
4032/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4033 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4034 the resulting vectors and call the function recursively. */
4035
4036static void
9771b263
DN
4037vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4038 vec<tree> *vec_oprnds1,
355fe088 4039 gimple *stmt, tree vec_dest,
4a00c761
JJ
4040 gimple_stmt_iterator *gsi,
4041 enum tree_code code1,
4042 enum tree_code code2, tree decl1,
4043 tree decl2, int op_type)
4044{
4045 int i;
4046 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4047 gimple *new_stmt1, *new_stmt2;
6e1aa848 4048 vec<tree> vec_tmp = vNULL;
4a00c761 4049
9771b263
DN
4050 vec_tmp.create (vec_oprnds0->length () * 2);
4051 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4052 {
4053 if (op_type == binary_op)
9771b263 4054 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4055 else
4056 vop1 = NULL_TREE;
4057
4058 /* Generate the two halves of promotion operation. */
4059 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4060 op_type, vec_dest, gsi, stmt);
4061 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4062 op_type, vec_dest, gsi, stmt);
4063 if (is_gimple_call (new_stmt1))
4064 {
4065 new_tmp1 = gimple_call_lhs (new_stmt1);
4066 new_tmp2 = gimple_call_lhs (new_stmt2);
4067 }
4068 else
4069 {
4070 new_tmp1 = gimple_assign_lhs (new_stmt1);
4071 new_tmp2 = gimple_assign_lhs (new_stmt2);
4072 }
4073
4074 /* Store the results for the next step. */
9771b263
DN
4075 vec_tmp.quick_push (new_tmp1);
4076 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4077 }
4078
689eaba3 4079 vec_oprnds0->release ();
4a00c761
JJ
4080 *vec_oprnds0 = vec_tmp;
4081}
4082
4083
b8698a0f
L
4084/* Check if STMT performs a conversion operation, that can be vectorized.
4085 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4086 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4087 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4088
4089static bool
355fe088
TS
4090vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4091 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4092{
4093 tree vec_dest;
4094 tree scalar_dest;
4a00c761 4095 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4096 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4099 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4100 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4101 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4102 tree new_temp;
355fe088 4103 gimple *def_stmt;
ebfd146a 4104 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4105 int ndts = 2;
355fe088 4106 gimple *new_stmt = NULL;
ebfd146a
IR
4107 stmt_vec_info prev_stmt_info;
4108 int nunits_in;
4109 int nunits_out;
4110 tree vectype_out, vectype_in;
4a00c761
JJ
4111 int ncopies, i, j;
4112 tree lhs_type, rhs_type;
ebfd146a 4113 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4114 vec<tree> vec_oprnds0 = vNULL;
4115 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4116 tree vop0;
4a00c761 4117 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4118 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4119 int multi_step_cvt = 0;
6e1aa848 4120 vec<tree> interm_types = vNULL;
4a00c761
JJ
4121 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4122 int op_type;
4a00c761 4123 unsigned short fltsz;
ebfd146a
IR
4124
4125 /* Is STMT a vectorizable conversion? */
4126
4a00c761 4127 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4128 return false;
4129
66c16fd9
RB
4130 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4131 && ! vec_stmt)
ebfd146a
IR
4132 return false;
4133
4134 if (!is_gimple_assign (stmt))
4135 return false;
4136
4137 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4138 return false;
4139
4140 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4141 if (!CONVERT_EXPR_CODE_P (code)
4142 && code != FIX_TRUNC_EXPR
4143 && code != FLOAT_EXPR
4144 && code != WIDEN_MULT_EXPR
4145 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4146 return false;
4147
4a00c761
JJ
4148 op_type = TREE_CODE_LENGTH (code);
4149
ebfd146a 4150 /* Check types of lhs and rhs. */
b690cc0f 4151 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4152 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4153 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4154
ebfd146a
IR
4155 op0 = gimple_assign_rhs1 (stmt);
4156 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4157
4158 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4159 && !((INTEGRAL_TYPE_P (lhs_type)
4160 && INTEGRAL_TYPE_P (rhs_type))
4161 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4162 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4163 return false;
4164
e6f5c25d
IE
4165 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4166 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4167 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4168 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4169 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4170 {
73fbfcad 4171 if (dump_enabled_p ())
78c60e3d 4172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4173 "type conversion to/from bit-precision unsupported."
4174 "\n");
4a00c761
JJ
4175 return false;
4176 }
4177
b690cc0f 4178 /* Check the operands of the operation. */
81c40241 4179 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4180 {
73fbfcad 4181 if (dump_enabled_p ())
78c60e3d 4182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4183 "use not simple.\n");
b690cc0f
RG
4184 return false;
4185 }
4a00c761
JJ
4186 if (op_type == binary_op)
4187 {
4188 bool ok;
4189
4190 op1 = gimple_assign_rhs2 (stmt);
4191 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4192 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4193 OP1. */
4194 if (CONSTANT_CLASS_P (op0))
81c40241 4195 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4196 else
81c40241 4197 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4198
4199 if (!ok)
4200 {
73fbfcad 4201 if (dump_enabled_p ())
78c60e3d 4202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4203 "use not simple.\n");
4a00c761
JJ
4204 return false;
4205 }
4206 }
4207
b690cc0f
RG
4208 /* If op0 is an external or constant defs use a vector type of
4209 the same size as the output vector type. */
ebfd146a 4210 if (!vectype_in)
b690cc0f 4211 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4212 if (vec_stmt)
4213 gcc_assert (vectype_in);
4214 if (!vectype_in)
4215 {
73fbfcad 4216 if (dump_enabled_p ())
4a00c761 4217 {
78c60e3d
SS
4218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4219 "no vectype for scalar type ");
4220 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4221 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4222 }
7d8930a0
IR
4223
4224 return false;
4225 }
ebfd146a 4226
e6f5c25d
IE
4227 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4228 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4229 {
4230 if (dump_enabled_p ())
4231 {
4232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4233 "can't convert between boolean and non "
4234 "boolean vectors");
4235 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4236 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4237 }
4238
4239 return false;
4240 }
4241
b690cc0f
RG
4242 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4243 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 4244 if (nunits_in < nunits_out)
ebfd146a
IR
4245 modifier = NARROW;
4246 else if (nunits_out == nunits_in)
4247 modifier = NONE;
ebfd146a 4248 else
4a00c761 4249 modifier = WIDEN;
ebfd146a 4250
ff802fa1
IR
4251 /* Multiple types in SLP are handled by creating the appropriate number of
4252 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4253 case of SLP. */
fce57248 4254 if (slp_node)
ebfd146a 4255 ncopies = 1;
4a00c761 4256 else if (modifier == NARROW)
e8f142e2 4257 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4258 else
e8f142e2 4259 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4260
ebfd146a
IR
4261 /* Sanity check: make sure that at least one copy of the vectorized stmt
4262 needs to be generated. */
4263 gcc_assert (ncopies >= 1);
4264
16d22000
RS
4265 bool found_mode = false;
4266 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4267 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4268 opt_scalar_mode rhs_mode_iter;
b397965c 4269
ebfd146a 4270 /* Supportable by target? */
4a00c761 4271 switch (modifier)
ebfd146a 4272 {
4a00c761
JJ
4273 case NONE:
4274 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4275 return false;
4276 if (supportable_convert_operation (code, vectype_out, vectype_in,
4277 &decl1, &code1))
4278 break;
4279 /* FALLTHRU */
4280 unsupported:
73fbfcad 4281 if (dump_enabled_p ())
78c60e3d 4282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4283 "conversion not supported by target.\n");
ebfd146a 4284 return false;
ebfd146a 4285
4a00c761
JJ
4286 case WIDEN:
4287 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4288 &code1, &code2, &multi_step_cvt,
4289 &interm_types))
4a00c761
JJ
4290 {
4291 /* Binary widening operation can only be supported directly by the
4292 architecture. */
4293 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4294 break;
4295 }
4296
4297 if (code != FLOAT_EXPR
b397965c 4298 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4299 goto unsupported;
4300
b397965c 4301 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4302 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4303 {
16d22000 4304 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4305 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4306 break;
4307
4a00c761
JJ
4308 cvt_type
4309 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4310 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4311 if (cvt_type == NULL_TREE)
4312 goto unsupported;
4313
4314 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4315 {
4316 if (!supportable_convert_operation (code, vectype_out,
4317 cvt_type, &decl1, &codecvt1))
4318 goto unsupported;
4319 }
4320 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4321 cvt_type, &codecvt1,
4322 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4323 &interm_types))
4324 continue;
4325 else
4326 gcc_assert (multi_step_cvt == 0);
4327
4328 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4329 vectype_in, &code1, &code2,
4330 &multi_step_cvt, &interm_types))
16d22000
RS
4331 {
4332 found_mode = true;
4333 break;
4334 }
4a00c761
JJ
4335 }
4336
16d22000 4337 if (!found_mode)
4a00c761
JJ
4338 goto unsupported;
4339
4340 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4341 codecvt2 = ERROR_MARK;
4342 else
4343 {
4344 multi_step_cvt++;
9771b263 4345 interm_types.safe_push (cvt_type);
4a00c761
JJ
4346 cvt_type = NULL_TREE;
4347 }
4348 break;
4349
4350 case NARROW:
4351 gcc_assert (op_type == unary_op);
4352 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4353 &code1, &multi_step_cvt,
4354 &interm_types))
4355 break;
4356
4357 if (code != FIX_TRUNC_EXPR
b397965c 4358 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4359 goto unsupported;
4360
4a00c761
JJ
4361 cvt_type
4362 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4363 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4364 if (cvt_type == NULL_TREE)
4365 goto unsupported;
4366 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4367 &decl1, &codecvt1))
4368 goto unsupported;
4369 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4370 &code1, &multi_step_cvt,
4371 &interm_types))
4372 break;
4373 goto unsupported;
4374
4375 default:
4376 gcc_unreachable ();
ebfd146a
IR
4377 }
4378
4379 if (!vec_stmt) /* transformation not required. */
4380 {
73fbfcad 4381 if (dump_enabled_p ())
78c60e3d 4382 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4383 "=== vectorizable_conversion ===\n");
4a00c761 4384 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4385 {
4386 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4387 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4388 }
4a00c761
JJ
4389 else if (modifier == NARROW)
4390 {
4391 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4392 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4393 }
4394 else
4395 {
4396 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4397 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4398 }
9771b263 4399 interm_types.release ();
ebfd146a
IR
4400 return true;
4401 }
4402
67b8dbac 4403 /* Transform. */
73fbfcad 4404 if (dump_enabled_p ())
78c60e3d 4405 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4406 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4407
4a00c761
JJ
4408 if (op_type == binary_op)
4409 {
4410 if (CONSTANT_CLASS_P (op0))
4411 op0 = fold_convert (TREE_TYPE (op1), op0);
4412 else if (CONSTANT_CLASS_P (op1))
4413 op1 = fold_convert (TREE_TYPE (op0), op1);
4414 }
4415
4416 /* In case of multi-step conversion, we first generate conversion operations
4417 to the intermediate types, and then from that types to the final one.
4418 We create vector destinations for the intermediate type (TYPES) received
4419 from supportable_*_operation, and store them in the correct order
4420 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4421 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4422 vec_dest = vect_create_destination_var (scalar_dest,
4423 (cvt_type && modifier == WIDEN)
4424 ? cvt_type : vectype_out);
9771b263 4425 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4426
4427 if (multi_step_cvt)
4428 {
9771b263
DN
4429 for (i = interm_types.length () - 1;
4430 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4431 {
4432 vec_dest = vect_create_destination_var (scalar_dest,
4433 intermediate_type);
9771b263 4434 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4435 }
4436 }
ebfd146a 4437
4a00c761 4438 if (cvt_type)
82294ec1
JJ
4439 vec_dest = vect_create_destination_var (scalar_dest,
4440 modifier == WIDEN
4441 ? vectype_out : cvt_type);
4a00c761
JJ
4442
4443 if (!slp_node)
4444 {
30862efc 4445 if (modifier == WIDEN)
4a00c761 4446 {
c3284718 4447 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4448 if (op_type == binary_op)
9771b263 4449 vec_oprnds1.create (1);
4a00c761 4450 }
30862efc 4451 else if (modifier == NARROW)
9771b263
DN
4452 vec_oprnds0.create (
4453 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4454 }
4455 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4456 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4457
4a00c761 4458 last_oprnd = op0;
ebfd146a
IR
4459 prev_stmt_info = NULL;
4460 switch (modifier)
4461 {
4462 case NONE:
4463 for (j = 0; j < ncopies; j++)
4464 {
ebfd146a 4465 if (j == 0)
306b0c92 4466 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4467 else
4468 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4469
9771b263 4470 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4471 {
4472 /* Arguments are ready, create the new vector stmt. */
4473 if (code1 == CALL_EXPR)
4474 {
4475 new_stmt = gimple_build_call (decl1, 1, vop0);
4476 new_temp = make_ssa_name (vec_dest, new_stmt);
4477 gimple_call_set_lhs (new_stmt, new_temp);
4478 }
4479 else
4480 {
4481 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4482 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4483 new_temp = make_ssa_name (vec_dest, new_stmt);
4484 gimple_assign_set_lhs (new_stmt, new_temp);
4485 }
4486
4487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4488 if (slp_node)
9771b263 4489 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4490 else
4491 {
4492 if (!prev_stmt_info)
4493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4494 else
4495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4496 prev_stmt_info = vinfo_for_stmt (new_stmt);
4497 }
4a00c761 4498 }
ebfd146a
IR
4499 }
4500 break;
4501
4502 case WIDEN:
4503 /* In case the vectorization factor (VF) is bigger than the number
4504 of elements that we can fit in a vectype (nunits), we have to
4505 generate more than one vector stmt - i.e - we need to "unroll"
4506 the vector stmt by a factor VF/nunits. */
4507 for (j = 0; j < ncopies; j++)
4508 {
4a00c761 4509 /* Handle uses. */
ebfd146a 4510 if (j == 0)
4a00c761
JJ
4511 {
4512 if (slp_node)
4513 {
4514 if (code == WIDEN_LSHIFT_EXPR)
4515 {
4516 unsigned int k;
ebfd146a 4517
4a00c761
JJ
4518 vec_oprnd1 = op1;
4519 /* Store vec_oprnd1 for every vector stmt to be created
4520 for SLP_NODE. We check during the analysis that all
4521 the shift arguments are the same. */
4522 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4523 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4524
4525 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4526 slp_node);
4a00c761
JJ
4527 }
4528 else
4529 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4530 &vec_oprnds1, slp_node);
4a00c761
JJ
4531 }
4532 else
4533 {
81c40241 4534 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4535 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4536 if (op_type == binary_op)
4537 {
4538 if (code == WIDEN_LSHIFT_EXPR)
4539 vec_oprnd1 = op1;
4540 else
81c40241 4541 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4542 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4543 }
4544 }
4545 }
ebfd146a 4546 else
4a00c761
JJ
4547 {
4548 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4549 vec_oprnds0.truncate (0);
4550 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4551 if (op_type == binary_op)
4552 {
4553 if (code == WIDEN_LSHIFT_EXPR)
4554 vec_oprnd1 = op1;
4555 else
4556 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4557 vec_oprnd1);
9771b263
DN
4558 vec_oprnds1.truncate (0);
4559 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4560 }
4561 }
ebfd146a 4562
4a00c761
JJ
4563 /* Arguments are ready. Create the new vector stmts. */
4564 for (i = multi_step_cvt; i >= 0; i--)
4565 {
9771b263 4566 tree this_dest = vec_dsts[i];
4a00c761
JJ
4567 enum tree_code c1 = code1, c2 = code2;
4568 if (i == 0 && codecvt2 != ERROR_MARK)
4569 {
4570 c1 = codecvt1;
4571 c2 = codecvt2;
4572 }
4573 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4574 &vec_oprnds1,
4575 stmt, this_dest, gsi,
4576 c1, c2, decl1, decl2,
4577 op_type);
4578 }
4579
9771b263 4580 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4581 {
4582 if (cvt_type)
4583 {
4584 if (codecvt1 == CALL_EXPR)
4585 {
4586 new_stmt = gimple_build_call (decl1, 1, vop0);
4587 new_temp = make_ssa_name (vec_dest, new_stmt);
4588 gimple_call_set_lhs (new_stmt, new_temp);
4589 }
4590 else
4591 {
4592 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4593 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4594 new_stmt = gimple_build_assign (new_temp, codecvt1,
4595 vop0);
4a00c761
JJ
4596 }
4597
4598 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4599 }
4600 else
4601 new_stmt = SSA_NAME_DEF_STMT (vop0);
4602
4603 if (slp_node)
9771b263 4604 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4605 else
c689ce1e
RB
4606 {
4607 if (!prev_stmt_info)
4608 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4609 else
4610 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4611 prev_stmt_info = vinfo_for_stmt (new_stmt);
4612 }
4a00c761 4613 }
ebfd146a 4614 }
4a00c761
JJ
4615
4616 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4617 break;
4618
4619 case NARROW:
4620 /* In case the vectorization factor (VF) is bigger than the number
4621 of elements that we can fit in a vectype (nunits), we have to
4622 generate more than one vector stmt - i.e - we need to "unroll"
4623 the vector stmt by a factor VF/nunits. */
4624 for (j = 0; j < ncopies; j++)
4625 {
4626 /* Handle uses. */
4a00c761
JJ
4627 if (slp_node)
4628 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4629 slp_node);
ebfd146a
IR
4630 else
4631 {
9771b263 4632 vec_oprnds0.truncate (0);
4a00c761
JJ
4633 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4634 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4635 }
4636
4a00c761
JJ
4637 /* Arguments are ready. Create the new vector stmts. */
4638 if (cvt_type)
9771b263 4639 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4640 {
4641 if (codecvt1 == CALL_EXPR)
4642 {
4643 new_stmt = gimple_build_call (decl1, 1, vop0);
4644 new_temp = make_ssa_name (vec_dest, new_stmt);
4645 gimple_call_set_lhs (new_stmt, new_temp);
4646 }
4647 else
4648 {
4649 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4650 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4651 new_stmt = gimple_build_assign (new_temp, codecvt1,
4652 vop0);
4a00c761 4653 }
ebfd146a 4654
4a00c761 4655 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4656 vec_oprnds0[i] = new_temp;
4a00c761 4657 }
ebfd146a 4658
4a00c761
JJ
4659 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4660 stmt, vec_dsts, gsi,
4661 slp_node, code1,
4662 &prev_stmt_info);
ebfd146a
IR
4663 }
4664
4665 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4666 break;
ebfd146a
IR
4667 }
4668
9771b263
DN
4669 vec_oprnds0.release ();
4670 vec_oprnds1.release ();
9771b263 4671 interm_types.release ();
ebfd146a
IR
4672
4673 return true;
4674}
ff802fa1
IR
4675
4676
ebfd146a
IR
4677/* Function vectorizable_assignment.
4678
b8698a0f
L
4679 Check if STMT performs an assignment (copy) that can be vectorized.
4680 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4681 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4682 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4683
4684static bool
355fe088
TS
4685vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4686 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4687{
4688 tree vec_dest;
4689 tree scalar_dest;
4690 tree op;
4691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4693 tree new_temp;
355fe088 4694 gimple *def_stmt;
4fc5ebf1
JG
4695 enum vect_def_type dt[1] = {vect_unknown_def_type};
4696 int ndts = 1;
ebfd146a 4697 int ncopies;
f18b55bd 4698 int i, j;
6e1aa848 4699 vec<tree> vec_oprnds = vNULL;
ebfd146a 4700 tree vop;
a70d6342 4701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4702 vec_info *vinfo = stmt_info->vinfo;
355fe088 4703 gimple *new_stmt = NULL;
f18b55bd 4704 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4705 enum tree_code code;
4706 tree vectype_in;
ebfd146a 4707
a70d6342 4708 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4709 return false;
4710
66c16fd9
RB
4711 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4712 && ! vec_stmt)
ebfd146a
IR
4713 return false;
4714
4715 /* Is vectorizable assignment? */
4716 if (!is_gimple_assign (stmt))
4717 return false;
4718
4719 scalar_dest = gimple_assign_lhs (stmt);
4720 if (TREE_CODE (scalar_dest) != SSA_NAME)
4721 return false;
4722
fde9c428 4723 code = gimple_assign_rhs_code (stmt);
ebfd146a 4724 if (gimple_assign_single_p (stmt)
fde9c428
RG
4725 || code == PAREN_EXPR
4726 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4727 op = gimple_assign_rhs1 (stmt);
4728 else
4729 return false;
4730
7b7ec6c5
RG
4731 if (code == VIEW_CONVERT_EXPR)
4732 op = TREE_OPERAND (op, 0);
4733
465c8c19
JJ
4734 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4735 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4736
4737 /* Multiple types in SLP are handled by creating the appropriate number of
4738 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4739 case of SLP. */
fce57248 4740 if (slp_node)
465c8c19
JJ
4741 ncopies = 1;
4742 else
e8f142e2 4743 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4744
4745 gcc_assert (ncopies >= 1);
4746
81c40241 4747 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4748 {
73fbfcad 4749 if (dump_enabled_p ())
78c60e3d 4750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4751 "use not simple.\n");
ebfd146a
IR
4752 return false;
4753 }
4754
fde9c428
RG
4755 /* We can handle NOP_EXPR conversions that do not change the number
4756 of elements or the vector size. */
7b7ec6c5
RG
4757 if ((CONVERT_EXPR_CODE_P (code)
4758 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4759 && (!vectype_in
4760 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4761 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4762 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4763 return false;
4764
7b7b1813
RG
4765 /* We do not handle bit-precision changes. */
4766 if ((CONVERT_EXPR_CODE_P (code)
4767 || code == VIEW_CONVERT_EXPR)
4768 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4769 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4770 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4771 /* But a conversion that does not change the bit-pattern is ok. */
4772 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4773 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4774 && TYPE_UNSIGNED (TREE_TYPE (op)))
4775 /* Conversion between boolean types of different sizes is
4776 a simple assignment in case their vectypes are same
4777 boolean vectors. */
4778 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4779 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4780 {
73fbfcad 4781 if (dump_enabled_p ())
78c60e3d
SS
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4783 "type conversion to/from bit-precision "
e645e942 4784 "unsupported.\n");
7b7b1813
RG
4785 return false;
4786 }
4787
ebfd146a
IR
4788 if (!vec_stmt) /* transformation not required. */
4789 {
4790 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4791 if (dump_enabled_p ())
78c60e3d 4792 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4793 "=== vectorizable_assignment ===\n");
4fc5ebf1 4794 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4795 return true;
4796 }
4797
67b8dbac 4798 /* Transform. */
73fbfcad 4799 if (dump_enabled_p ())
e645e942 4800 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4801
4802 /* Handle def. */
4803 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4804
4805 /* Handle use. */
f18b55bd 4806 for (j = 0; j < ncopies; j++)
ebfd146a 4807 {
f18b55bd
IR
4808 /* Handle uses. */
4809 if (j == 0)
306b0c92 4810 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4811 else
4812 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4813
4814 /* Arguments are ready. create the new vector stmt. */
9771b263 4815 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4816 {
7b7ec6c5
RG
4817 if (CONVERT_EXPR_CODE_P (code)
4818 || code == VIEW_CONVERT_EXPR)
4a73490d 4819 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4820 new_stmt = gimple_build_assign (vec_dest, vop);
4821 new_temp = make_ssa_name (vec_dest, new_stmt);
4822 gimple_assign_set_lhs (new_stmt, new_temp);
4823 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4824 if (slp_node)
9771b263 4825 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4826 }
ebfd146a
IR
4827
4828 if (slp_node)
f18b55bd
IR
4829 continue;
4830
4831 if (j == 0)
4832 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4833 else
4834 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4835
4836 prev_stmt_info = vinfo_for_stmt (new_stmt);
4837 }
b8698a0f 4838
9771b263 4839 vec_oprnds.release ();
ebfd146a
IR
4840 return true;
4841}
4842
9dc3f7de 4843
1107f3ae
IR
4844/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4845 either as shift by a scalar or by a vector. */
4846
4847bool
4848vect_supportable_shift (enum tree_code code, tree scalar_type)
4849{
4850
ef4bddc2 4851 machine_mode vec_mode;
1107f3ae
IR
4852 optab optab;
4853 int icode;
4854 tree vectype;
4855
4856 vectype = get_vectype_for_scalar_type (scalar_type);
4857 if (!vectype)
4858 return false;
4859
4860 optab = optab_for_tree_code (code, vectype, optab_scalar);
4861 if (!optab
4862 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4863 {
4864 optab = optab_for_tree_code (code, vectype, optab_vector);
4865 if (!optab
4866 || (optab_handler (optab, TYPE_MODE (vectype))
4867 == CODE_FOR_nothing))
4868 return false;
4869 }
4870
4871 vec_mode = TYPE_MODE (vectype);
4872 icode = (int) optab_handler (optab, vec_mode);
4873 if (icode == CODE_FOR_nothing)
4874 return false;
4875
4876 return true;
4877}
4878
4879
9dc3f7de
IR
4880/* Function vectorizable_shift.
4881
4882 Check if STMT performs a shift operation that can be vectorized.
4883 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4884 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4885 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4886
4887static bool
355fe088
TS
4888vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4889 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4890{
4891 tree vec_dest;
4892 tree scalar_dest;
4893 tree op0, op1 = NULL;
4894 tree vec_oprnd1 = NULL_TREE;
4895 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4896 tree vectype;
4897 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4898 enum tree_code code;
ef4bddc2 4899 machine_mode vec_mode;
9dc3f7de
IR
4900 tree new_temp;
4901 optab optab;
4902 int icode;
ef4bddc2 4903 machine_mode optab_op2_mode;
355fe088 4904 gimple *def_stmt;
9dc3f7de 4905 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4906 int ndts = 2;
355fe088 4907 gimple *new_stmt = NULL;
9dc3f7de
IR
4908 stmt_vec_info prev_stmt_info;
4909 int nunits_in;
4910 int nunits_out;
4911 tree vectype_out;
cede2577 4912 tree op1_vectype;
9dc3f7de
IR
4913 int ncopies;
4914 int j, i;
6e1aa848
DN
4915 vec<tree> vec_oprnds0 = vNULL;
4916 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4917 tree vop0, vop1;
4918 unsigned int k;
49eab32e 4919 bool scalar_shift_arg = true;
9dc3f7de 4920 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4921 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4922
4923 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4924 return false;
4925
66c16fd9
RB
4926 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4927 && ! vec_stmt)
9dc3f7de
IR
4928 return false;
4929
4930 /* Is STMT a vectorizable binary/unary operation? */
4931 if (!is_gimple_assign (stmt))
4932 return false;
4933
4934 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4935 return false;
4936
4937 code = gimple_assign_rhs_code (stmt);
4938
4939 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4940 || code == RROTATE_EXPR))
4941 return false;
4942
4943 scalar_dest = gimple_assign_lhs (stmt);
4944 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 4945 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 4946 {
73fbfcad 4947 if (dump_enabled_p ())
78c60e3d 4948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4949 "bit-precision shifts not supported.\n");
7b7b1813
RG
4950 return false;
4951 }
9dc3f7de
IR
4952
4953 op0 = gimple_assign_rhs1 (stmt);
81c40241 4954 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4955 {
73fbfcad 4956 if (dump_enabled_p ())
78c60e3d 4957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4958 "use not simple.\n");
9dc3f7de
IR
4959 return false;
4960 }
4961 /* If op0 is an external or constant def use a vector type with
4962 the same size as the output vector type. */
4963 if (!vectype)
4964 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4965 if (vec_stmt)
4966 gcc_assert (vectype);
4967 if (!vectype)
4968 {
73fbfcad 4969 if (dump_enabled_p ())
78c60e3d 4970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4971 "no vectype for scalar type\n");
9dc3f7de
IR
4972 return false;
4973 }
4974
4975 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4976 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4977 if (nunits_out != nunits_in)
4978 return false;
4979
4980 op1 = gimple_assign_rhs2 (stmt);
81c40241 4981 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4982 {
73fbfcad 4983 if (dump_enabled_p ())
78c60e3d 4984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4985 "use not simple.\n");
9dc3f7de
IR
4986 return false;
4987 }
4988
9dc3f7de
IR
4989 /* Multiple types in SLP are handled by creating the appropriate number of
4990 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4991 case of SLP. */
fce57248 4992 if (slp_node)
9dc3f7de
IR
4993 ncopies = 1;
4994 else
e8f142e2 4995 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
4996
4997 gcc_assert (ncopies >= 1);
4998
4999 /* Determine whether the shift amount is a vector, or scalar. If the
5000 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5001
dbfa87aa
YR
5002 if ((dt[1] == vect_internal_def
5003 || dt[1] == vect_induction_def)
5004 && !slp_node)
49eab32e
JJ
5005 scalar_shift_arg = false;
5006 else if (dt[1] == vect_constant_def
5007 || dt[1] == vect_external_def
5008 || dt[1] == vect_internal_def)
5009 {
5010 /* In SLP, need to check whether the shift count is the same,
5011 in loops if it is a constant or invariant, it is always
5012 a scalar shift. */
5013 if (slp_node)
5014 {
355fe088
TS
5015 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5016 gimple *slpstmt;
49eab32e 5017
9771b263 5018 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5019 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5020 scalar_shift_arg = false;
5021 }
60d393e8
RB
5022
5023 /* If the shift amount is computed by a pattern stmt we cannot
5024 use the scalar amount directly thus give up and use a vector
5025 shift. */
5026 if (dt[1] == vect_internal_def)
5027 {
5028 gimple *def = SSA_NAME_DEF_STMT (op1);
5029 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5030 scalar_shift_arg = false;
5031 }
49eab32e
JJ
5032 }
5033 else
5034 {
73fbfcad 5035 if (dump_enabled_p ())
78c60e3d 5036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5037 "operand mode requires invariant argument.\n");
49eab32e
JJ
5038 return false;
5039 }
5040
9dc3f7de 5041 /* Vector shifted by vector. */
49eab32e 5042 if (!scalar_shift_arg)
9dc3f7de
IR
5043 {
5044 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5045 if (dump_enabled_p ())
78c60e3d 5046 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5047 "vector/vector shift/rotate found.\n");
78c60e3d 5048
aa948027
JJ
5049 if (!op1_vectype)
5050 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5051 if (op1_vectype == NULL_TREE
5052 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5053 {
73fbfcad 5054 if (dump_enabled_p ())
78c60e3d
SS
5055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5056 "unusable type for last operand in"
e645e942 5057 " vector/vector shift/rotate.\n");
cede2577
JJ
5058 return false;
5059 }
9dc3f7de
IR
5060 }
5061 /* See if the machine has a vector shifted by scalar insn and if not
5062 then see if it has a vector shifted by vector insn. */
49eab32e 5063 else
9dc3f7de
IR
5064 {
5065 optab = optab_for_tree_code (code, vectype, optab_scalar);
5066 if (optab
5067 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5068 {
73fbfcad 5069 if (dump_enabled_p ())
78c60e3d 5070 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5071 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5072 }
5073 else
5074 {
5075 optab = optab_for_tree_code (code, vectype, optab_vector);
5076 if (optab
5077 && (optab_handler (optab, TYPE_MODE (vectype))
5078 != CODE_FOR_nothing))
5079 {
49eab32e
JJ
5080 scalar_shift_arg = false;
5081
73fbfcad 5082 if (dump_enabled_p ())
78c60e3d 5083 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5084 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5085
5086 /* Unlike the other binary operators, shifts/rotates have
5087 the rhs being int, instead of the same type as the lhs,
5088 so make sure the scalar is the right type if we are
aa948027 5089 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5090 if (dt[1] == vect_constant_def)
5091 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5092 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5093 TREE_TYPE (op1)))
5094 {
5095 if (slp_node
5096 && TYPE_MODE (TREE_TYPE (vectype))
5097 != TYPE_MODE (TREE_TYPE (op1)))
5098 {
73fbfcad 5099 if (dump_enabled_p ())
78c60e3d
SS
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5101 "unusable type for last operand in"
e645e942 5102 " vector/vector shift/rotate.\n");
21c0a521 5103 return false;
aa948027
JJ
5104 }
5105 if (vec_stmt && !slp_node)
5106 {
5107 op1 = fold_convert (TREE_TYPE (vectype), op1);
5108 op1 = vect_init_vector (stmt, op1,
5109 TREE_TYPE (vectype), NULL);
5110 }
5111 }
9dc3f7de
IR
5112 }
5113 }
5114 }
9dc3f7de
IR
5115
5116 /* Supportable by target? */
5117 if (!optab)
5118 {
73fbfcad 5119 if (dump_enabled_p ())
78c60e3d 5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5121 "no optab.\n");
9dc3f7de
IR
5122 return false;
5123 }
5124 vec_mode = TYPE_MODE (vectype);
5125 icode = (int) optab_handler (optab, vec_mode);
5126 if (icode == CODE_FOR_nothing)
5127 {
73fbfcad 5128 if (dump_enabled_p ())
78c60e3d 5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5130 "op not supported by target.\n");
9dc3f7de
IR
5131 /* Check only during analysis. */
5132 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb
RS
5133 || (!vec_stmt
5134 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5135 return false;
73fbfcad 5136 if (dump_enabled_p ())
e645e942
TJ
5137 dump_printf_loc (MSG_NOTE, vect_location,
5138 "proceeding using word mode.\n");
9dc3f7de
IR
5139 }
5140
5141 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5142 if (!vec_stmt
5143 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5144 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5145 {
73fbfcad 5146 if (dump_enabled_p ())
78c60e3d 5147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5148 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5149 return false;
5150 }
5151
5152 if (!vec_stmt) /* transformation not required. */
5153 {
5154 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5155 if (dump_enabled_p ())
e645e942
TJ
5156 dump_printf_loc (MSG_NOTE, vect_location,
5157 "=== vectorizable_shift ===\n");
4fc5ebf1 5158 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5159 return true;
5160 }
5161
67b8dbac 5162 /* Transform. */
9dc3f7de 5163
73fbfcad 5164 if (dump_enabled_p ())
78c60e3d 5165 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5166 "transform binary/unary operation.\n");
9dc3f7de
IR
5167
5168 /* Handle def. */
5169 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5170
9dc3f7de
IR
5171 prev_stmt_info = NULL;
5172 for (j = 0; j < ncopies; j++)
5173 {
5174 /* Handle uses. */
5175 if (j == 0)
5176 {
5177 if (scalar_shift_arg)
5178 {
5179 /* Vector shl and shr insn patterns can be defined with scalar
5180 operand 2 (shift operand). In this case, use constant or loop
5181 invariant op1 directly, without extending it to vector mode
5182 first. */
5183 optab_op2_mode = insn_data[icode].operand[2].mode;
5184 if (!VECTOR_MODE_P (optab_op2_mode))
5185 {
73fbfcad 5186 if (dump_enabled_p ())
78c60e3d 5187 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5188 "operand 1 using scalar mode.\n");
9dc3f7de 5189 vec_oprnd1 = op1;
8930f723 5190 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5191 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5192 if (slp_node)
5193 {
5194 /* Store vec_oprnd1 for every vector stmt to be created
5195 for SLP_NODE. We check during the analysis that all
5196 the shift arguments are the same.
5197 TODO: Allow different constants for different vector
5198 stmts generated for an SLP instance. */
5199 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5200 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5201 }
5202 }
5203 }
5204
5205 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5206 (a special case for certain kind of vector shifts); otherwise,
5207 operand 1 should be of a vector type (the usual case). */
5208 if (vec_oprnd1)
5209 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5210 slp_node);
9dc3f7de
IR
5211 else
5212 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5213 slp_node);
9dc3f7de
IR
5214 }
5215 else
5216 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5217
5218 /* Arguments are ready. Create the new vector stmt. */
9771b263 5219 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5220 {
9771b263 5221 vop1 = vec_oprnds1[i];
0d0e4a03 5222 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5223 new_temp = make_ssa_name (vec_dest, new_stmt);
5224 gimple_assign_set_lhs (new_stmt, new_temp);
5225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5226 if (slp_node)
9771b263 5227 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5228 }
5229
5230 if (slp_node)
5231 continue;
5232
5233 if (j == 0)
5234 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5235 else
5236 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5237 prev_stmt_info = vinfo_for_stmt (new_stmt);
5238 }
5239
9771b263
DN
5240 vec_oprnds0.release ();
5241 vec_oprnds1.release ();
9dc3f7de
IR
5242
5243 return true;
5244}
5245
5246
ebfd146a
IR
5247/* Function vectorizable_operation.
5248
16949072
RG
5249 Check if STMT performs a binary, unary or ternary operation that can
5250 be vectorized.
b8698a0f 5251 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5252 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5253 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5254
5255static bool
355fe088
TS
5256vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5257 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5258{
00f07b86 5259 tree vec_dest;
ebfd146a 5260 tree scalar_dest;
16949072 5261 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5262 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5263 tree vectype;
ebfd146a 5264 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5265 enum tree_code code, orig_code;
ef4bddc2 5266 machine_mode vec_mode;
ebfd146a
IR
5267 tree new_temp;
5268 int op_type;
00f07b86 5269 optab optab;
523ba738 5270 bool target_support_p;
355fe088 5271 gimple *def_stmt;
16949072
RG
5272 enum vect_def_type dt[3]
5273 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5274 int ndts = 3;
355fe088 5275 gimple *new_stmt = NULL;
ebfd146a 5276 stmt_vec_info prev_stmt_info;
b690cc0f 5277 int nunits_in;
ebfd146a
IR
5278 int nunits_out;
5279 tree vectype_out;
5280 int ncopies;
5281 int j, i;
6e1aa848
DN
5282 vec<tree> vec_oprnds0 = vNULL;
5283 vec<tree> vec_oprnds1 = vNULL;
5284 vec<tree> vec_oprnds2 = vNULL;
16949072 5285 tree vop0, vop1, vop2;
a70d6342 5286 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5287 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5288
a70d6342 5289 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5290 return false;
5291
66c16fd9
RB
5292 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5293 && ! vec_stmt)
ebfd146a
IR
5294 return false;
5295
5296 /* Is STMT a vectorizable binary/unary operation? */
5297 if (!is_gimple_assign (stmt))
5298 return false;
5299
5300 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5301 return false;
5302
0eb952ea 5303 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5304
1af4ebf5
MG
5305 /* For pointer addition and subtraction, we should use the normal
5306 plus and minus for the vector operation. */
ebfd146a
IR
5307 if (code == POINTER_PLUS_EXPR)
5308 code = PLUS_EXPR;
1af4ebf5
MG
5309 if (code == POINTER_DIFF_EXPR)
5310 code = MINUS_EXPR;
ebfd146a
IR
5311
5312 /* Support only unary or binary operations. */
5313 op_type = TREE_CODE_LENGTH (code);
16949072 5314 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5315 {
73fbfcad 5316 if (dump_enabled_p ())
78c60e3d 5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5318 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5319 op_type);
ebfd146a
IR
5320 return false;
5321 }
5322
b690cc0f
RG
5323 scalar_dest = gimple_assign_lhs (stmt);
5324 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5325
7b7b1813
RG
5326 /* Most operations cannot handle bit-precision types without extra
5327 truncations. */
045c1278 5328 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5329 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5330 /* Exception are bitwise binary operations. */
5331 && code != BIT_IOR_EXPR
5332 && code != BIT_XOR_EXPR
5333 && code != BIT_AND_EXPR)
5334 {
73fbfcad 5335 if (dump_enabled_p ())
78c60e3d 5336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5337 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5338 return false;
5339 }
5340
ebfd146a 5341 op0 = gimple_assign_rhs1 (stmt);
81c40241 5342 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5343 {
73fbfcad 5344 if (dump_enabled_p ())
78c60e3d 5345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5346 "use not simple.\n");
ebfd146a
IR
5347 return false;
5348 }
b690cc0f
RG
5349 /* If op0 is an external or constant def use a vector type with
5350 the same size as the output vector type. */
5351 if (!vectype)
b036c6c5
IE
5352 {
5353 /* For boolean type we cannot determine vectype by
5354 invariant value (don't know whether it is a vector
5355 of booleans or vector of integers). We use output
5356 vectype because operations on boolean don't change
5357 type. */
2568d8a1 5358 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5359 {
2568d8a1 5360 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5361 {
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5364 "not supported operation on bool value.\n");
5365 return false;
5366 }
5367 vectype = vectype_out;
5368 }
5369 else
5370 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5371 }
7d8930a0
IR
5372 if (vec_stmt)
5373 gcc_assert (vectype);
5374 if (!vectype)
5375 {
73fbfcad 5376 if (dump_enabled_p ())
7d8930a0 5377 {
78c60e3d
SS
5378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5379 "no vectype for scalar type ");
5380 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5381 TREE_TYPE (op0));
e645e942 5382 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5383 }
5384
5385 return false;
5386 }
b690cc0f
RG
5387
5388 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5389 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5390 if (nunits_out != nunits_in)
5391 return false;
ebfd146a 5392
16949072 5393 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5394 {
5395 op1 = gimple_assign_rhs2 (stmt);
81c40241 5396 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5397 {
73fbfcad 5398 if (dump_enabled_p ())
78c60e3d 5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5400 "use not simple.\n");
ebfd146a
IR
5401 return false;
5402 }
5403 }
16949072
RG
5404 if (op_type == ternary_op)
5405 {
5406 op2 = gimple_assign_rhs3 (stmt);
81c40241 5407 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5408 {
73fbfcad 5409 if (dump_enabled_p ())
78c60e3d 5410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5411 "use not simple.\n");
16949072
RG
5412 return false;
5413 }
5414 }
ebfd146a 5415
b690cc0f 5416 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5417 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5418 case of SLP. */
fce57248 5419 if (slp_node)
b690cc0f
RG
5420 ncopies = 1;
5421 else
e8f142e2 5422 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5423
5424 gcc_assert (ncopies >= 1);
5425
9dc3f7de 5426 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5427 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5428 || code == RROTATE_EXPR)
9dc3f7de 5429 return false;
ebfd146a 5430
ebfd146a 5431 /* Supportable by target? */
00f07b86
RH
5432
5433 vec_mode = TYPE_MODE (vectype);
5434 if (code == MULT_HIGHPART_EXPR)
523ba738 5435 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5436 else
5437 {
5438 optab = optab_for_tree_code (code, vectype, optab_default);
5439 if (!optab)
5deb57cb 5440 {
73fbfcad 5441 if (dump_enabled_p ())
78c60e3d 5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5443 "no optab.\n");
00f07b86 5444 return false;
5deb57cb 5445 }
523ba738
RS
5446 target_support_p = (optab_handler (optab, vec_mode)
5447 != CODE_FOR_nothing);
5deb57cb
JJ
5448 }
5449
523ba738 5450 if (!target_support_p)
ebfd146a 5451 {
73fbfcad 5452 if (dump_enabled_p ())
78c60e3d 5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5454 "op not supported by target.\n");
ebfd146a
IR
5455 /* Check only during analysis. */
5456 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb 5457 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5458 return false;
73fbfcad 5459 if (dump_enabled_p ())
e645e942
TJ
5460 dump_printf_loc (MSG_NOTE, vect_location,
5461 "proceeding using word mode.\n");
383d9c83
IR
5462 }
5463
4a00c761 5464 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5465 if (!VECTOR_MODE_P (vec_mode)
5466 && !vec_stmt
ca09abcb 5467 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5468 {
73fbfcad 5469 if (dump_enabled_p ())
78c60e3d 5470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5471 "not worthwhile without SIMD support.\n");
e34842c6 5472 return false;
7d8930a0 5473 }
ebfd146a 5474
ebfd146a
IR
5475 if (!vec_stmt) /* transformation not required. */
5476 {
4a00c761 5477 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5478 if (dump_enabled_p ())
78c60e3d 5479 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5480 "=== vectorizable_operation ===\n");
4fc5ebf1 5481 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5482 return true;
5483 }
5484
67b8dbac 5485 /* Transform. */
ebfd146a 5486
73fbfcad 5487 if (dump_enabled_p ())
78c60e3d 5488 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5489 "transform binary/unary operation.\n");
383d9c83 5490
ebfd146a 5491 /* Handle def. */
00f07b86 5492 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5493
0eb952ea
JJ
5494 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5495 vectors with unsigned elements, but the result is signed. So, we
5496 need to compute the MINUS_EXPR into vectype temporary and
5497 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5498 tree vec_cvt_dest = NULL_TREE;
5499 if (orig_code == POINTER_DIFF_EXPR)
5500 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5501
ebfd146a
IR
5502 /* In case the vectorization factor (VF) is bigger than the number
5503 of elements that we can fit in a vectype (nunits), we have to generate
5504 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5505 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5506 from one copy of the vector stmt to the next, in the field
5507 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5508 stages to find the correct vector defs to be used when vectorizing
5509 stmts that use the defs of the current stmt. The example below
5510 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5511 we need to create 4 vectorized stmts):
5512
5513 before vectorization:
5514 RELATED_STMT VEC_STMT
5515 S1: x = memref - -
5516 S2: z = x + 1 - -
5517
5518 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5519 there):
5520 RELATED_STMT VEC_STMT
5521 VS1_0: vx0 = memref0 VS1_1 -
5522 VS1_1: vx1 = memref1 VS1_2 -
5523 VS1_2: vx2 = memref2 VS1_3 -
5524 VS1_3: vx3 = memref3 - -
5525 S1: x = load - VS1_0
5526 S2: z = x + 1 - -
5527
5528 step2: vectorize stmt S2 (done here):
5529 To vectorize stmt S2 we first need to find the relevant vector
5530 def for the first operand 'x'. This is, as usual, obtained from
5531 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5532 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5533 relevant vector def 'vx0'. Having found 'vx0' we can generate
5534 the vector stmt VS2_0, and as usual, record it in the
5535 STMT_VINFO_VEC_STMT of stmt S2.
5536 When creating the second copy (VS2_1), we obtain the relevant vector
5537 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5538 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5539 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5540 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5541 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5542 chain of stmts and pointers:
5543 RELATED_STMT VEC_STMT
5544 VS1_0: vx0 = memref0 VS1_1 -
5545 VS1_1: vx1 = memref1 VS1_2 -
5546 VS1_2: vx2 = memref2 VS1_3 -
5547 VS1_3: vx3 = memref3 - -
5548 S1: x = load - VS1_0
5549 VS2_0: vz0 = vx0 + v1 VS2_1 -
5550 VS2_1: vz1 = vx1 + v1 VS2_2 -
5551 VS2_2: vz2 = vx2 + v1 VS2_3 -
5552 VS2_3: vz3 = vx3 + v1 - -
5553 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5554
5555 prev_stmt_info = NULL;
5556 for (j = 0; j < ncopies; j++)
5557 {
5558 /* Handle uses. */
5559 if (j == 0)
4a00c761
JJ
5560 {
5561 if (op_type == binary_op || op_type == ternary_op)
5562 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5563 slp_node);
4a00c761
JJ
5564 else
5565 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5566 slp_node);
4a00c761 5567 if (op_type == ternary_op)
c392943c 5568 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5569 slp_node);
4a00c761 5570 }
ebfd146a 5571 else
4a00c761
JJ
5572 {
5573 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5574 if (op_type == ternary_op)
5575 {
9771b263
DN
5576 tree vec_oprnd = vec_oprnds2.pop ();
5577 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5578 vec_oprnd));
4a00c761
JJ
5579 }
5580 }
5581
5582 /* Arguments are ready. Create the new vector stmt. */
9771b263 5583 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5584 {
4a00c761 5585 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5586 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5587 vop2 = ((op_type == ternary_op)
9771b263 5588 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5589 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5590 new_temp = make_ssa_name (vec_dest, new_stmt);
5591 gimple_assign_set_lhs (new_stmt, new_temp);
5592 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5593 if (vec_cvt_dest)
5594 {
5595 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5596 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5597 new_temp);
5598 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5599 gimple_assign_set_lhs (new_stmt, new_temp);
5600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5601 }
4a00c761 5602 if (slp_node)
9771b263 5603 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5604 }
5605
4a00c761
JJ
5606 if (slp_node)
5607 continue;
5608
5609 if (j == 0)
5610 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5611 else
5612 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5613 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5614 }
5615
9771b263
DN
5616 vec_oprnds0.release ();
5617 vec_oprnds1.release ();
5618 vec_oprnds2.release ();
ebfd146a 5619
ebfd146a
IR
5620 return true;
5621}
5622
f702e7d4 5623/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5624
5625static void
f702e7d4 5626ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5627{
5628 if (!dr->aux)
5629 return;
5630
52639a61 5631 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5632 {
52639a61 5633 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5634
f702e7d4
RS
5635 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5636
428f0c67 5637 if (decl_in_symtab_p (base_decl))
f702e7d4 5638 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5639 else
5640 {
f702e7d4 5641 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5642 DECL_USER_ALIGN (base_decl) = 1;
5643 }
52639a61 5644 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5645 }
5646}
5647
ebfd146a 5648
44fc7854
BE
5649/* Function get_group_alias_ptr_type.
5650
5651 Return the alias type for the group starting at FIRST_STMT. */
5652
5653static tree
5654get_group_alias_ptr_type (gimple *first_stmt)
5655{
5656 struct data_reference *first_dr, *next_dr;
5657 gimple *next_stmt;
5658
5659 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5660 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5661 while (next_stmt)
5662 {
5663 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5664 if (get_alias_set (DR_REF (first_dr))
5665 != get_alias_set (DR_REF (next_dr)))
5666 {
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_NOTE, vect_location,
5669 "conflicting alias set types.\n");
5670 return ptr_type_node;
5671 }
5672 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5673 }
5674 return reference_alias_ptr_type (DR_REF (first_dr));
5675}
5676
5677
ebfd146a
IR
5678/* Function vectorizable_store.
5679
b8698a0f
L
5680 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5681 can be vectorized.
5682 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5683 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5684 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5685
5686static bool
355fe088 5687vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5688 slp_tree slp_node)
ebfd146a
IR
5689{
5690 tree scalar_dest;
5691 tree data_ref;
5692 tree op;
5693 tree vec_oprnd = NULL_TREE;
5694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5695 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5696 tree elem_type;
ebfd146a 5697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5698 struct loop *loop = NULL;
ef4bddc2 5699 machine_mode vec_mode;
ebfd146a
IR
5700 tree dummy;
5701 enum dr_alignment_support alignment_support_scheme;
355fe088 5702 gimple *def_stmt;
ebfd146a
IR
5703 enum vect_def_type dt;
5704 stmt_vec_info prev_stmt_info = NULL;
5705 tree dataref_ptr = NULL_TREE;
74bf76ed 5706 tree dataref_offset = NULL_TREE;
355fe088 5707 gimple *ptr_incr = NULL;
ebfd146a
IR
5708 int ncopies;
5709 int j;
2de001ee
RS
5710 gimple *next_stmt, *first_stmt;
5711 bool grouped_store;
ebfd146a 5712 unsigned int group_size, i;
6e1aa848
DN
5713 vec<tree> oprnds = vNULL;
5714 vec<tree> result_chain = vNULL;
ebfd146a 5715 bool inv_p;
09dfa495 5716 tree offset = NULL_TREE;
6e1aa848 5717 vec<tree> vec_oprnds = vNULL;
ebfd146a 5718 bool slp = (slp_node != NULL);
ebfd146a 5719 unsigned int vec_num;
a70d6342 5720 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5721 vec_info *vinfo = stmt_info->vinfo;
272c6793 5722 tree aggr_type;
134c85ca 5723 gather_scatter_info gs_info;
3bab6342 5724 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5725 gimple *new_stmt;
d9f21f6a 5726 poly_uint64 vf;
2de001ee 5727 vec_load_store_type vls_type;
44fc7854 5728 tree ref_type;
a70d6342 5729
a70d6342 5730 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5731 return false;
5732
66c16fd9
RB
5733 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5734 && ! vec_stmt)
ebfd146a
IR
5735 return false;
5736
5737 /* Is vectorizable store? */
5738
5739 if (!is_gimple_assign (stmt))
5740 return false;
5741
5742 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5743 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5744 && is_pattern_stmt_p (stmt_info))
5745 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5746 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5747 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5748 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5749 && TREE_CODE (scalar_dest) != COMPONENT_REF
5750 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5751 && TREE_CODE (scalar_dest) != REALPART_EXPR
5752 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5753 return false;
5754
fce57248
RS
5755 /* Cannot have hybrid store SLP -- that would mean storing to the
5756 same location twice. */
5757 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5758
ebfd146a 5759 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5760
f4d09712 5761 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 5762 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5763
5764 if (loop_vinfo)
b17dc4d4
RB
5765 {
5766 loop = LOOP_VINFO_LOOP (loop_vinfo);
5767 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5768 }
5769 else
5770 vf = 1;
465c8c19
JJ
5771
5772 /* Multiple types in SLP are handled by creating the appropriate number of
5773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5774 case of SLP. */
fce57248 5775 if (slp)
465c8c19
JJ
5776 ncopies = 1;
5777 else
e8f142e2 5778 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5779
5780 gcc_assert (ncopies >= 1);
5781
5782 /* FORNOW. This restriction should be relaxed. */
5783 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5784 {
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5787 "multiple types in nested loop.\n");
5788 return false;
5789 }
5790
ebfd146a 5791 op = gimple_assign_rhs1 (stmt);
f4d09712 5792
2f391428 5793 /* In the case this is a store from a constant make sure
11a82e25 5794 native_encode_expr can handle it. */
2f391428 5795 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
11a82e25
RB
5796 return false;
5797
f4d09712 5798 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5799 {
73fbfcad 5800 if (dump_enabled_p ())
78c60e3d 5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5802 "use not simple.\n");
ebfd146a
IR
5803 return false;
5804 }
5805
2de001ee
RS
5806 if (dt == vect_constant_def || dt == vect_external_def)
5807 vls_type = VLS_STORE_INVARIANT;
5808 else
5809 vls_type = VLS_STORE;
5810
f4d09712
KY
5811 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5812 return false;
5813
272c6793 5814 elem_type = TREE_TYPE (vectype);
ebfd146a 5815 vec_mode = TYPE_MODE (vectype);
7b7b1813 5816
ebfd146a
IR
5817 /* FORNOW. In some cases can vectorize even if data-type not supported
5818 (e.g. - array initialization with 0). */
947131ba 5819 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5820 return false;
5821
5822 if (!STMT_VINFO_DATA_REF (stmt_info))
5823 return false;
5824
2de001ee 5825 vect_memory_access_type memory_access_type;
62da9e14 5826 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
2de001ee
RS
5827 &memory_access_type, &gs_info))
5828 return false;
3bab6342 5829
ebfd146a
IR
5830 if (!vec_stmt) /* transformation not required. */
5831 {
2de001ee 5832 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 5833 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5834 /* The SLP costs are calculated during SLP analysis. */
5835 if (!PURE_SLP_STMT (stmt_info))
2de001ee 5836 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
2e8ab70c 5837 NULL, NULL, NULL);
ebfd146a
IR
5838 return true;
5839 }
2de001ee 5840 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5841
67b8dbac 5842 /* Transform. */
ebfd146a 5843
f702e7d4 5844 ensure_base_align (dr);
c716e67f 5845
2de001ee 5846 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342
AT
5847 {
5848 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
134c85ca 5849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5851 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5852 edge pe = loop_preheader_edge (loop);
5853 gimple_seq seq;
5854 basic_block new_bb;
5855 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
5856 poly_uint64 scatter_off_nunits
5857 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 5858
4d694b27 5859 if (known_eq (nunits, scatter_off_nunits))
3bab6342 5860 modifier = NONE;
4d694b27 5861 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 5862 {
3bab6342
AT
5863 modifier = WIDEN;
5864
4d694b27
RS
5865 /* Currently gathers and scatters are only supported for
5866 fixed-length vectors. */
5867 unsigned int count = scatter_off_nunits.to_constant ();
5868 vec_perm_builder sel (count, count, 1);
5869 for (i = 0; i < (unsigned int) count; ++i)
5870 sel.quick_push (i | (count / 2));
3bab6342 5871
4d694b27 5872 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
5873 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5874 indices);
3bab6342
AT
5875 gcc_assert (perm_mask != NULL_TREE);
5876 }
4d694b27 5877 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 5878 {
3bab6342
AT
5879 modifier = NARROW;
5880
4d694b27
RS
5881 /* Currently gathers and scatters are only supported for
5882 fixed-length vectors. */
5883 unsigned int count = nunits.to_constant ();
5884 vec_perm_builder sel (count, count, 1);
5885 for (i = 0; i < (unsigned int) count; ++i)
5886 sel.quick_push (i | (count / 2));
3bab6342 5887
4d694b27 5888 vec_perm_indices indices (sel, 2, count);
e3342de4 5889 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5890 gcc_assert (perm_mask != NULL_TREE);
5891 ncopies *= 2;
5892 }
5893 else
5894 gcc_unreachable ();
5895
134c85ca 5896 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5897 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5898 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5899 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5900 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5901 scaletype = TREE_VALUE (arglist);
5902
5903 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5904 && TREE_CODE (rettype) == VOID_TYPE);
5905
134c85ca 5906 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
5907 if (!is_gimple_min_invariant (ptr))
5908 {
5909 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5910 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5911 gcc_assert (!new_bb);
5912 }
5913
5914 /* Currently we support only unconditional scatter stores,
5915 so mask should be all ones. */
5916 mask = build_int_cst (masktype, -1);
5917 mask = vect_init_vector (stmt, mask, masktype, NULL);
5918
134c85ca 5919 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
5920
5921 prev_stmt_info = NULL;
5922 for (j = 0; j < ncopies; ++j)
5923 {
5924 if (j == 0)
5925 {
5926 src = vec_oprnd1
81c40241 5927 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5928 op = vec_oprnd0
134c85ca 5929 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
5930 }
5931 else if (modifier != NONE && (j & 1))
5932 {
5933 if (modifier == WIDEN)
5934 {
5935 src = vec_oprnd1
5936 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5937 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5938 stmt, gsi);
5939 }
5940 else if (modifier == NARROW)
5941 {
5942 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5943 stmt, gsi);
5944 op = vec_oprnd0
134c85ca
RS
5945 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5946 vec_oprnd0);
3bab6342
AT
5947 }
5948 else
5949 gcc_unreachable ();
5950 }
5951 else
5952 {
5953 src = vec_oprnd1
5954 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5955 op = vec_oprnd0
134c85ca
RS
5956 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5957 vec_oprnd0);
3bab6342
AT
5958 }
5959
5960 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5961 {
5962 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5963 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5964 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5965 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5966 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5967 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5968 src = var;
5969 }
5970
5971 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5972 {
5973 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5974 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5975 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5976 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5977 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5979 op = var;
5980 }
5981
5982 new_stmt
134c85ca 5983 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
5984
5985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986
5987 if (prev_stmt_info == NULL)
5988 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5989 else
5990 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5991 prev_stmt_info = vinfo_for_stmt (new_stmt);
5992 }
5993 return true;
5994 }
5995
2de001ee 5996 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
0d0293ac 5997 if (grouped_store)
ebfd146a 5998 {
2de001ee 5999 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 6000 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6001 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 6002
e14c1050 6003 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
6004
6005 /* FORNOW */
a70d6342 6006 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6007
6008 /* We vectorize all the stmts of the interleaving group when we
6009 reach the last stmt in the group. */
e14c1050
IR
6010 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6011 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6012 && !slp)
6013 {
6014 *vec_stmt = NULL;
6015 return true;
6016 }
6017
6018 if (slp)
4b5caab7 6019 {
0d0293ac 6020 grouped_store = false;
4b5caab7
IR
6021 /* VEC_NUM is the number of vect stmts to be created for this
6022 group. */
6023 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6024 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6025 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6026 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 6027 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 6028 }
ebfd146a 6029 else
4b5caab7
IR
6030 /* VEC_NUM is the number of vect stmts to be created for this
6031 group. */
ebfd146a 6032 vec_num = group_size;
44fc7854
BE
6033
6034 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6035 }
b8698a0f 6036 else
ebfd146a
IR
6037 {
6038 first_stmt = stmt;
6039 first_dr = dr;
6040 group_size = vec_num = 1;
44fc7854 6041 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a 6042 }
b8698a0f 6043
73fbfcad 6044 if (dump_enabled_p ())
78c60e3d 6045 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6046 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6047
2de001ee
RS
6048 if (memory_access_type == VMAT_ELEMENTWISE
6049 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6050 {
6051 gimple_stmt_iterator incr_gsi;
6052 bool insert_after;
355fe088 6053 gimple *incr;
f2e2a985
MM
6054 tree offvar;
6055 tree ivstep;
6056 tree running_off;
6057 gimple_seq stmts = NULL;
6058 tree stride_base, stride_step, alias_off;
6059 tree vec_oprnd;
f502d50e 6060 unsigned int g;
4d694b27
RS
6061 /* Checked by get_load_store_type. */
6062 unsigned int const_nunits = nunits.to_constant ();
f2e2a985
MM
6063
6064 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6065
6066 stride_base
6067 = fold_build_pointer_plus
f502d50e 6068 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6069 size_binop (PLUS_EXPR,
f502d50e 6070 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6071 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6072 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6073
6074 /* For a store with loop-invariant (but other than power-of-2)
6075 stride (i.e. not a grouped access) like so:
6076
6077 for (i = 0; i < n; i += stride)
6078 array[i] = ...;
6079
6080 we generate a new induction variable and new stores from
6081 the components of the (vectorized) rhs:
6082
6083 for (j = 0; ; j += VF*stride)
6084 vectemp = ...;
6085 tmp1 = vectemp[0];
6086 array[j] = tmp1;
6087 tmp2 = vectemp[1];
6088 array[j + stride] = tmp2;
6089 ...
6090 */
6091
4d694b27 6092 unsigned nstores = const_nunits;
b17dc4d4 6093 unsigned lnel = 1;
cee62fee 6094 tree ltype = elem_type;
04199738 6095 tree lvectype = vectype;
cee62fee
MM
6096 if (slp)
6097 {
4d694b27
RS
6098 if (group_size < const_nunits
6099 && const_nunits % group_size == 0)
b17dc4d4 6100 {
4d694b27 6101 nstores = const_nunits / group_size;
b17dc4d4
RB
6102 lnel = group_size;
6103 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6104 lvectype = vectype;
6105
6106 /* First check if vec_extract optab doesn't support extraction
6107 of vector elts directly. */
b397965c 6108 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6109 machine_mode vmode;
6110 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6111 || !VECTOR_MODE_P (vmode)
04199738
RB
6112 || (convert_optab_handler (vec_extract_optab,
6113 TYPE_MODE (vectype), vmode)
6114 == CODE_FOR_nothing))
6115 {
6116 /* Try to avoid emitting an extract of vector elements
6117 by performing the extracts using an integer type of the
6118 same size, extracting from a vector of those and then
6119 re-interpreting it as the original vector type if
6120 supported. */
6121 unsigned lsize
6122 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6123 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6124 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6125 /* If we can't construct such a vector fall back to
6126 element extracts from the original vector type and
6127 element size stores. */
4d694b27 6128 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6129 && VECTOR_MODE_P (vmode)
04199738
RB
6130 && (convert_optab_handler (vec_extract_optab,
6131 vmode, elmode)
6132 != CODE_FOR_nothing))
6133 {
4d694b27 6134 nstores = lnunits;
04199738
RB
6135 lnel = group_size;
6136 ltype = build_nonstandard_integer_type (lsize, 1);
6137 lvectype = build_vector_type (ltype, nstores);
6138 }
6139 /* Else fall back to vector extraction anyway.
6140 Fewer stores are more important than avoiding spilling
6141 of the vector we extract from. Compared to the
6142 construction case in vectorizable_load no store-forwarding
6143 issue exists here for reasonable archs. */
6144 }
b17dc4d4 6145 }
4d694b27
RS
6146 else if (group_size >= const_nunits
6147 && group_size % const_nunits == 0)
b17dc4d4
RB
6148 {
6149 nstores = 1;
4d694b27 6150 lnel = const_nunits;
b17dc4d4 6151 ltype = vectype;
04199738 6152 lvectype = vectype;
b17dc4d4 6153 }
cee62fee
MM
6154 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6155 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6156 }
6157
f2e2a985
MM
6158 ivstep = stride_step;
6159 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6160 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6161
6162 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6163
6164 create_iv (stride_base, ivstep, NULL,
6165 loop, &incr_gsi, insert_after,
6166 &offvar, NULL);
6167 incr = gsi_stmt (incr_gsi);
310213d4 6168 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6169
6170 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6171 if (stmts)
6172 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6173
6174 prev_stmt_info = NULL;
44fc7854 6175 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6176 next_stmt = first_stmt;
6177 for (g = 0; g < group_size; g++)
f2e2a985 6178 {
f502d50e
MM
6179 running_off = offvar;
6180 if (g)
f2e2a985 6181 {
f502d50e
MM
6182 tree size = TYPE_SIZE_UNIT (ltype);
6183 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6184 size);
f502d50e 6185 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6186 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6187 running_off, pos);
f2e2a985 6188 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6189 running_off = newoff;
f502d50e 6190 }
b17dc4d4
RB
6191 unsigned int group_el = 0;
6192 unsigned HOST_WIDE_INT
6193 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6194 for (j = 0; j < ncopies; j++)
6195 {
6196 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6197 and first_stmt == stmt. */
6198 if (j == 0)
6199 {
6200 if (slp)
6201 {
6202 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6203 slp_node);
f502d50e
MM
6204 vec_oprnd = vec_oprnds[0];
6205 }
6206 else
6207 {
6208 gcc_assert (gimple_assign_single_p (next_stmt));
6209 op = gimple_assign_rhs1 (next_stmt);
81c40241 6210 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6211 }
6212 }
f2e2a985 6213 else
f502d50e
MM
6214 {
6215 if (slp)
6216 vec_oprnd = vec_oprnds[j];
6217 else
c079cbac 6218 {
81c40241 6219 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6220 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6221 }
f502d50e 6222 }
04199738
RB
6223 /* Pun the vector to extract from if necessary. */
6224 if (lvectype != vectype)
6225 {
6226 tree tem = make_ssa_name (lvectype);
6227 gimple *pun
6228 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6229 lvectype, vec_oprnd));
6230 vect_finish_stmt_generation (stmt, pun, gsi);
6231 vec_oprnd = tem;
6232 }
f502d50e
MM
6233 for (i = 0; i < nstores; i++)
6234 {
6235 tree newref, newoff;
355fe088 6236 gimple *incr, *assign;
f502d50e
MM
6237 tree size = TYPE_SIZE (ltype);
6238 /* Extract the i'th component. */
6239 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6240 bitsize_int (i), size);
6241 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6242 size, pos);
6243
6244 elem = force_gimple_operand_gsi (gsi, elem, true,
6245 NULL_TREE, true,
6246 GSI_SAME_STMT);
6247
b17dc4d4
RB
6248 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6249 group_el * elsz);
f502d50e 6250 newref = build2 (MEM_REF, ltype,
b17dc4d4 6251 running_off, this_off);
f502d50e
MM
6252
6253 /* And store it to *running_off. */
6254 assign = gimple_build_assign (newref, elem);
6255 vect_finish_stmt_generation (stmt, assign, gsi);
6256
b17dc4d4
RB
6257 group_el += lnel;
6258 if (! slp
6259 || group_el == group_size)
6260 {
6261 newoff = copy_ssa_name (running_off, NULL);
6262 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6263 running_off, stride_step);
6264 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6265
b17dc4d4
RB
6266 running_off = newoff;
6267 group_el = 0;
6268 }
225ce44b
RB
6269 if (g == group_size - 1
6270 && !slp)
f502d50e
MM
6271 {
6272 if (j == 0 && i == 0)
225ce44b
RB
6273 STMT_VINFO_VEC_STMT (stmt_info)
6274 = *vec_stmt = assign;
f502d50e
MM
6275 else
6276 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6277 prev_stmt_info = vinfo_for_stmt (assign);
6278 }
6279 }
f2e2a985 6280 }
f502d50e 6281 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6282 if (slp)
6283 break;
f2e2a985 6284 }
778dd3b6
RB
6285
6286 vec_oprnds.release ();
f2e2a985
MM
6287 return true;
6288 }
6289
8c681247 6290 auto_vec<tree> dr_chain (group_size);
9771b263 6291 oprnds.create (group_size);
ebfd146a 6292
720f5239 6293 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6294 gcc_assert (alignment_support_scheme);
272c6793
RS
6295 /* Targets with store-lane instructions must not require explicit
6296 realignment. */
2de001ee 6297 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
6298 || alignment_support_scheme == dr_aligned
6299 || alignment_support_scheme == dr_unaligned_supported);
6300
62da9e14
RS
6301 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6302 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6303 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6304
2de001ee 6305 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6306 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6307 else
6308 aggr_type = vectype;
ebfd146a
IR
6309
6310 /* In case the vectorization factor (VF) is bigger than the number
6311 of elements that we can fit in a vectype (nunits), we have to generate
6312 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6313 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6314 vect_get_vec_def_for_copy_stmt. */
6315
0d0293ac 6316 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6317
6318 S1: &base + 2 = x2
6319 S2: &base = x0
6320 S3: &base + 1 = x1
6321 S4: &base + 3 = x3
6322
6323 We create vectorized stores starting from base address (the access of the
6324 first stmt in the chain (S2 in the above example), when the last store stmt
6325 of the chain (S4) is reached:
6326
6327 VS1: &base = vx2
6328 VS2: &base + vec_size*1 = vx0
6329 VS3: &base + vec_size*2 = vx1
6330 VS4: &base + vec_size*3 = vx3
6331
6332 Then permutation statements are generated:
6333
3fcc1b55
JJ
6334 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6335 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6336 ...
b8698a0f 6337
ebfd146a
IR
6338 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6339 (the order of the data-refs in the output of vect_permute_store_chain
6340 corresponds to the order of scalar stmts in the interleaving chain - see
6341 the documentation of vect_permute_store_chain()).
6342
6343 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6344 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6345 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6346 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6347 */
6348
6349 prev_stmt_info = NULL;
6350 for (j = 0; j < ncopies; j++)
6351 {
ebfd146a
IR
6352
6353 if (j == 0)
6354 {
6355 if (slp)
6356 {
6357 /* Get vectorized arguments for SLP_NODE. */
d092494c 6358 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6359 NULL, slp_node);
ebfd146a 6360
9771b263 6361 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6362 }
6363 else
6364 {
b8698a0f
L
6365 /* For interleaved stores we collect vectorized defs for all the
6366 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6367 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6368 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6369
0d0293ac 6370 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6371 OPRNDS are of size 1. */
b8698a0f 6372 next_stmt = first_stmt;
ebfd146a
IR
6373 for (i = 0; i < group_size; i++)
6374 {
b8698a0f
L
6375 /* Since gaps are not supported for interleaved stores,
6376 GROUP_SIZE is the exact number of stmts in the chain.
6377 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6378 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
6379 iteration of the loop will be executed. */
6380 gcc_assert (next_stmt
6381 && gimple_assign_single_p (next_stmt));
6382 op = gimple_assign_rhs1 (next_stmt);
6383
81c40241 6384 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6385 dr_chain.quick_push (vec_oprnd);
6386 oprnds.quick_push (vec_oprnd);
e14c1050 6387 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
6388 }
6389 }
6390
6391 /* We should have catched mismatched types earlier. */
6392 gcc_assert (useless_type_conversion_p (vectype,
6393 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6394 bool simd_lane_access_p
6395 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6396 if (simd_lane_access_p
6397 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6398 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6399 && integer_zerop (DR_OFFSET (first_dr))
6400 && integer_zerop (DR_INIT (first_dr))
6401 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6402 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6403 {
6404 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6405 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6406 inv_p = false;
74bf76ed
JJ
6407 }
6408 else
6409 dataref_ptr
6410 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6411 simd_lane_access_p ? loop : NULL,
09dfa495 6412 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6413 simd_lane_access_p, &inv_p);
a70d6342 6414 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6415 }
b8698a0f 6416 else
ebfd146a 6417 {
b8698a0f
L
6418 /* For interleaved stores we created vectorized defs for all the
6419 defs stored in OPRNDS in the previous iteration (previous copy).
6420 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6421 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6422 next copy.
0d0293ac 6423 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6424 OPRNDS are of size 1. */
6425 for (i = 0; i < group_size; i++)
6426 {
9771b263 6427 op = oprnds[i];
81c40241 6428 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6429 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6430 dr_chain[i] = vec_oprnd;
6431 oprnds[i] = vec_oprnd;
ebfd146a 6432 }
74bf76ed
JJ
6433 if (dataref_offset)
6434 dataref_offset
6435 = int_const_binop (PLUS_EXPR, dataref_offset,
6436 TYPE_SIZE_UNIT (aggr_type));
6437 else
6438 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6439 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6440 }
6441
2de001ee 6442 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6443 {
272c6793 6444 tree vec_array;
267d3070 6445
272c6793
RS
6446 /* Combine all the vectors into an array. */
6447 vec_array = create_vector_array (vectype, vec_num);
6448 for (i = 0; i < vec_num; i++)
c2d7ab2a 6449 {
9771b263 6450 vec_oprnd = dr_chain[i];
272c6793 6451 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6452 }
b8698a0f 6453
272c6793
RS
6454 /* Emit:
6455 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
44fc7854 6456 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
6457 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6458 vec_array);
6459 gimple_call_set_lhs (call, data_ref);
6460 gimple_call_set_nothrow (call, true);
6461 new_stmt = call;
267d3070 6462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6463 }
6464 else
6465 {
6466 new_stmt = NULL;
0d0293ac 6467 if (grouped_store)
272c6793 6468 {
b6b9227d
JJ
6469 if (j == 0)
6470 result_chain.create (group_size);
272c6793
RS
6471 /* Permute. */
6472 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6473 &result_chain);
6474 }
c2d7ab2a 6475
272c6793
RS
6476 next_stmt = first_stmt;
6477 for (i = 0; i < vec_num; i++)
6478 {
644ffefd 6479 unsigned align, misalign;
272c6793
RS
6480
6481 if (i > 0)
6482 /* Bump the vector pointer. */
6483 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6484 stmt, NULL_TREE);
6485
6486 if (slp)
9771b263 6487 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6488 else if (grouped_store)
6489 /* For grouped stores vectorized defs are interleaved in
272c6793 6490 vect_permute_store_chain(). */
9771b263 6491 vec_oprnd = result_chain[i];
272c6793 6492
69a2e8a1 6493 data_ref = fold_build2 (MEM_REF, vectype,
aed93b23
RB
6494 dataref_ptr,
6495 dataref_offset
6496 ? dataref_offset
44fc7854 6497 : build_int_cst (ref_type, 0));
f702e7d4 6498 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6499 if (aligned_access_p (first_dr))
644ffefd 6500 misalign = 0;
272c6793
RS
6501 else if (DR_MISALIGNMENT (first_dr) == -1)
6502 {
25f68d90 6503 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6504 misalign = 0;
272c6793
RS
6505 TREE_TYPE (data_ref)
6506 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6507 align * BITS_PER_UNIT);
272c6793
RS
6508 }
6509 else
6510 {
6511 TREE_TYPE (data_ref)
6512 = build_aligned_type (TREE_TYPE (data_ref),
6513 TYPE_ALIGN (elem_type));
644ffefd 6514 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6515 }
aed93b23
RB
6516 if (dataref_offset == NULL_TREE
6517 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6518 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6519 misalign);
c2d7ab2a 6520
62da9e14 6521 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6522 {
6523 tree perm_mask = perm_mask_for_reverse (vectype);
6524 tree perm_dest
6525 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6526 vectype);
b731b390 6527 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6528
6529 /* Generate the permute statement. */
355fe088 6530 gimple *perm_stmt
0d0e4a03
JJ
6531 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6532 vec_oprnd, perm_mask);
09dfa495
BM
6533 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6534
6535 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6536 vec_oprnd = new_temp;
6537 }
6538
272c6793
RS
6539 /* Arguments are ready. Create the new vector stmt. */
6540 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6542
6543 if (slp)
6544 continue;
6545
e14c1050 6546 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6547 if (!next_stmt)
6548 break;
6549 }
ebfd146a 6550 }
1da0876c
RS
6551 if (!slp)
6552 {
6553 if (j == 0)
6554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6555 else
6556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6557 prev_stmt_info = vinfo_for_stmt (new_stmt);
6558 }
ebfd146a
IR
6559 }
6560
9771b263
DN
6561 oprnds.release ();
6562 result_chain.release ();
6563 vec_oprnds.release ();
ebfd146a
IR
6564
6565 return true;
6566}
6567
557be5a8
AL
6568/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6569 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6570 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6571 vect_gen_perm_mask_checked. */
a1e53f3f 6572
3fcc1b55 6573tree
4aae3cb3 6574vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6575{
b00cb3bf 6576 tree mask_type;
a1e53f3f 6577
b00cb3bf
RS
6578 unsigned int nunits = sel.length ();
6579 gcc_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6580
6581 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 6582 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
6583}
6584
7ac7e286 6585/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6586 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6587
6588tree
4aae3cb3 6589vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6590{
7ac7e286 6591 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6592 return vect_gen_perm_mask_any (vectype, sel);
6593}
6594
aec7ae7d
JJ
6595/* Given a vector variable X and Y, that was generated for the scalar
6596 STMT, generate instructions to permute the vector elements of X and Y
6597 using permutation mask MASK_VEC, insert them at *GSI and return the
6598 permuted vector variable. */
a1e53f3f
L
6599
6600static tree
355fe088 6601permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6602 gimple_stmt_iterator *gsi)
a1e53f3f
L
6603{
6604 tree vectype = TREE_TYPE (x);
aec7ae7d 6605 tree perm_dest, data_ref;
355fe088 6606 gimple *perm_stmt;
a1e53f3f 6607
acdcd61b 6608 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6609 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6610
6611 /* Generate the permute statement. */
0d0e4a03 6612 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6613 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6614
6615 return data_ref;
6616}
6617
6b916b36
RB
6618/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6619 inserting them on the loops preheader edge. Returns true if we
6620 were successful in doing so (and thus STMT can be moved then),
6621 otherwise returns false. */
6622
6623static bool
355fe088 6624hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6625{
6626 ssa_op_iter i;
6627 tree op;
6628 bool any = false;
6629
6630 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6631 {
355fe088 6632 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6633 if (!gimple_nop_p (def_stmt)
6634 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6635 {
6636 /* Make sure we don't need to recurse. While we could do
6637 so in simple cases when there are more complex use webs
6638 we don't have an easy way to preserve stmt order to fulfil
6639 dependencies within them. */
6640 tree op2;
6641 ssa_op_iter i2;
d1417442
JJ
6642 if (gimple_code (def_stmt) == GIMPLE_PHI)
6643 return false;
6b916b36
RB
6644 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6645 {
355fe088 6646 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6647 if (!gimple_nop_p (def_stmt2)
6648 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6649 return false;
6650 }
6651 any = true;
6652 }
6653 }
6654
6655 if (!any)
6656 return true;
6657
6658 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6659 {
355fe088 6660 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6661 if (!gimple_nop_p (def_stmt)
6662 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6663 {
6664 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6665 gsi_remove (&gsi, false);
6666 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6667 }
6668 }
6669
6670 return true;
6671}
6672
ebfd146a
IR
6673/* vectorizable_load.
6674
b8698a0f
L
6675 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6676 can be vectorized.
6677 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6678 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6679 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6680
6681static bool
355fe088 6682vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6683 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6684{
6685 tree scalar_dest;
6686 tree vec_dest = NULL;
6687 tree data_ref = NULL;
6688 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6689 stmt_vec_info prev_stmt_info;
ebfd146a 6690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6691 struct loop *loop = NULL;
ebfd146a 6692 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6693 bool nested_in_vect_loop = false;
c716e67f 6694 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6695 tree elem_type;
ebfd146a 6696 tree new_temp;
ef4bddc2 6697 machine_mode mode;
355fe088 6698 gimple *new_stmt = NULL;
ebfd146a
IR
6699 tree dummy;
6700 enum dr_alignment_support alignment_support_scheme;
6701 tree dataref_ptr = NULL_TREE;
74bf76ed 6702 tree dataref_offset = NULL_TREE;
355fe088 6703 gimple *ptr_incr = NULL;
ebfd146a 6704 int ncopies;
4d694b27
RS
6705 int i, j;
6706 unsigned int group_size;
6707 poly_uint64 group_gap_adj;
ebfd146a
IR
6708 tree msq = NULL_TREE, lsq;
6709 tree offset = NULL_TREE;
356bbc4c 6710 tree byte_offset = NULL_TREE;
ebfd146a 6711 tree realignment_token = NULL_TREE;
538dd0b7 6712 gphi *phi = NULL;
6e1aa848 6713 vec<tree> dr_chain = vNULL;
0d0293ac 6714 bool grouped_load = false;
355fe088 6715 gimple *first_stmt;
4f0a0218 6716 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6717 bool inv_p;
6718 bool compute_in_loop = false;
6719 struct loop *at_loop;
6720 int vec_num;
6721 bool slp = (slp_node != NULL);
6722 bool slp_perm = false;
6723 enum tree_code code;
a70d6342 6724 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 6725 poly_uint64 vf;
272c6793 6726 tree aggr_type;
134c85ca 6727 gather_scatter_info gs_info;
310213d4 6728 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6729 tree ref_type;
a70d6342 6730
465c8c19
JJ
6731 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6732 return false;
6733
66c16fd9
RB
6734 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6735 && ! vec_stmt)
465c8c19
JJ
6736 return false;
6737
6738 /* Is vectorizable load? */
6739 if (!is_gimple_assign (stmt))
6740 return false;
6741
6742 scalar_dest = gimple_assign_lhs (stmt);
6743 if (TREE_CODE (scalar_dest) != SSA_NAME)
6744 return false;
6745
6746 code = gimple_assign_rhs_code (stmt);
6747 if (code != ARRAY_REF
6748 && code != BIT_FIELD_REF
6749 && code != INDIRECT_REF
6750 && code != COMPONENT_REF
6751 && code != IMAGPART_EXPR
6752 && code != REALPART_EXPR
6753 && code != MEM_REF
6754 && TREE_CODE_CLASS (code) != tcc_declaration)
6755 return false;
6756
6757 if (!STMT_VINFO_DATA_REF (stmt_info))
6758 return false;
6759
6760 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 6761 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 6762
a70d6342
IR
6763 if (loop_vinfo)
6764 {
6765 loop = LOOP_VINFO_LOOP (loop_vinfo);
6766 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6767 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6768 }
6769 else
3533e503 6770 vf = 1;
ebfd146a
IR
6771
6772 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6774 case of SLP. */
fce57248 6775 if (slp)
ebfd146a
IR
6776 ncopies = 1;
6777 else
e8f142e2 6778 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6779
6780 gcc_assert (ncopies >= 1);
6781
6782 /* FORNOW. This restriction should be relaxed. */
6783 if (nested_in_vect_loop && ncopies > 1)
6784 {
73fbfcad 6785 if (dump_enabled_p ())
78c60e3d 6786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6787 "multiple types in nested loop.\n");
ebfd146a
IR
6788 return false;
6789 }
6790
f2556b68
RB
6791 /* Invalidate assumptions made by dependence analysis when vectorization
6792 on the unrolled body effectively re-orders stmts. */
6793 if (ncopies > 1
6794 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6795 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6796 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6797 {
6798 if (dump_enabled_p ())
6799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6800 "cannot perform implicit CSE when unrolling "
6801 "with negative dependence distance\n");
6802 return false;
6803 }
6804
7b7b1813 6805 elem_type = TREE_TYPE (vectype);
947131ba 6806 mode = TYPE_MODE (vectype);
ebfd146a
IR
6807
6808 /* FORNOW. In some cases can vectorize even if data-type not supported
6809 (e.g. - data copies). */
947131ba 6810 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6811 {
73fbfcad 6812 if (dump_enabled_p ())
78c60e3d 6813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6814 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6815 return false;
6816 }
6817
ebfd146a 6818 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6819 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6820 {
0d0293ac 6821 grouped_load = true;
ebfd146a 6822 /* FORNOW */
2de001ee
RS
6823 gcc_assert (!nested_in_vect_loop);
6824 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6825
e14c1050 6826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 6827 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 6828
b1af7da6
RB
6829 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6830 slp_perm = true;
6831
f2556b68
RB
6832 /* Invalidate assumptions made by dependence analysis when vectorization
6833 on the unrolled body effectively re-orders stmts. */
6834 if (!PURE_SLP_STMT (stmt_info)
6835 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6836 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6837 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6838 {
6839 if (dump_enabled_p ())
6840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6841 "cannot perform implicit CSE when performing "
6842 "group loads with negative dependence distance\n");
6843 return false;
6844 }
96bb56b2
RB
6845
6846 /* Similarly when the stmt is a load that is both part of a SLP
6847 instance and a loop vectorized stmt via the same-dr mechanism
6848 we have to give up. */
6849 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6850 && (STMT_SLP_TYPE (stmt_info)
6851 != STMT_SLP_TYPE (vinfo_for_stmt
6852 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6853 {
6854 if (dump_enabled_p ())
6855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6856 "conflicting SLP types for CSEd load\n");
6857 return false;
6858 }
ebfd146a
IR
6859 }
6860
2de001ee 6861 vect_memory_access_type memory_access_type;
62da9e14 6862 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
2de001ee
RS
6863 &memory_access_type, &gs_info))
6864 return false;
a1e53f3f 6865
ebfd146a
IR
6866 if (!vec_stmt) /* transformation not required. */
6867 {
2de001ee
RS
6868 if (!slp)
6869 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 6870 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6871 /* The SLP costs are calculated during SLP analysis. */
6872 if (!PURE_SLP_STMT (stmt_info))
2de001ee 6873 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 6874 NULL, NULL, NULL);
ebfd146a
IR
6875 return true;
6876 }
6877
2de001ee
RS
6878 if (!slp)
6879 gcc_assert (memory_access_type
6880 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6881
73fbfcad 6882 if (dump_enabled_p ())
78c60e3d 6883 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6884 "transform load. ncopies = %d\n", ncopies);
ebfd146a 6885
67b8dbac 6886 /* Transform. */
ebfd146a 6887
f702e7d4 6888 ensure_base_align (dr);
c716e67f 6889
2de001ee 6890 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d
JJ
6891 {
6892 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 6893 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
aec7ae7d 6894 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6895 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6896 edge pe = loop_preheader_edge (loop);
6897 gimple_seq seq;
6898 basic_block new_bb;
6899 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6900 poly_uint64 gather_off_nunits
6901 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
aec7ae7d 6902
4d694b27 6903 if (known_eq (nunits, gather_off_nunits))
aec7ae7d 6904 modifier = NONE;
4d694b27 6905 else if (known_eq (nunits * 2, gather_off_nunits))
aec7ae7d 6906 {
aec7ae7d
JJ
6907 modifier = WIDEN;
6908
4d694b27
RS
6909 /* Currently widening gathers are only supported for
6910 fixed-length vectors. */
6911 int count = gather_off_nunits.to_constant ();
6912 vec_perm_builder sel (count, count, 1);
6913 for (i = 0; i < count; ++i)
6914 sel.quick_push (i | (count / 2));
aec7ae7d 6915
4d694b27 6916 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6917 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6918 indices);
aec7ae7d 6919 }
4d694b27 6920 else if (known_eq (nunits, gather_off_nunits * 2))
aec7ae7d 6921 {
aec7ae7d
JJ
6922 modifier = NARROW;
6923
4d694b27
RS
6924 /* Currently narrowing gathers are only supported for
6925 fixed-length vectors. */
6926 int count = nunits.to_constant ();
6927 vec_perm_builder sel (count, count, 1);
6928 for (i = 0; i < count; ++i)
6929 sel.quick_push (i < count / 2 ? i : i + count / 2);
aec7ae7d 6930
4d694b27 6931 vec_perm_indices indices (sel, 2, count);
e3342de4 6932 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
aec7ae7d
JJ
6933 ncopies *= 2;
6934 }
6935 else
6936 gcc_unreachable ();
6937
134c85ca 6938 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
aec7ae7d
JJ
6939 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6940 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6941 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6942 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6943 scaletype = TREE_VALUE (arglist);
d3c2fee0 6944 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6945
6946 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6947
134c85ca 6948 ptr = fold_convert (ptrtype, gs_info.base);
aec7ae7d
JJ
6949 if (!is_gimple_min_invariant (ptr))
6950 {
6951 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6952 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6953 gcc_assert (!new_bb);
6954 }
6955
6956 /* Currently we support only unconditional gather loads,
6957 so mask should be all ones. */
d3c2fee0
AI
6958 if (TREE_CODE (masktype) == INTEGER_TYPE)
6959 mask = build_int_cst (masktype, -1);
6960 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6961 {
6962 mask = build_int_cst (TREE_TYPE (masktype), -1);
6963 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6964 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6965 }
aec7ae7d
JJ
6966 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6967 {
6968 REAL_VALUE_TYPE r;
6969 long tmp[6];
6970 for (j = 0; j < 6; ++j)
6971 tmp[j] = -1;
6972 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6973 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6974 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6975 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6976 }
6977 else
6978 gcc_unreachable ();
aec7ae7d 6979
134c85ca 6980 scale = build_int_cst (scaletype, gs_info.scale);
aec7ae7d 6981
d3c2fee0
AI
6982 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6983 merge = build_int_cst (TREE_TYPE (rettype), 0);
6984 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6985 {
6986 REAL_VALUE_TYPE r;
6987 long tmp[6];
6988 for (j = 0; j < 6; ++j)
6989 tmp[j] = 0;
6990 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6991 merge = build_real (TREE_TYPE (rettype), r);
6992 }
6993 else
6994 gcc_unreachable ();
6995 merge = build_vector_from_val (rettype, merge);
6996 merge = vect_init_vector (stmt, merge, rettype, NULL);
6997
aec7ae7d
JJ
6998 prev_stmt_info = NULL;
6999 for (j = 0; j < ncopies; ++j)
7000 {
7001 if (modifier == WIDEN && (j & 1))
7002 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
7003 perm_mask, stmt, gsi);
7004 else if (j == 0)
7005 op = vec_oprnd0
134c85ca 7006 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
aec7ae7d
JJ
7007 else
7008 op = vec_oprnd0
134c85ca 7009 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
aec7ae7d
JJ
7010
7011 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7012 {
7013 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
7014 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 7015 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
7016 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7017 new_stmt
0d0e4a03 7018 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
7019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7020 op = var;
7021 }
7022
7023 new_stmt
134c85ca 7024 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
7025
7026 if (!useless_type_conversion_p (vectype, rettype))
7027 {
7028 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
7029 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 7030 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
7031 gimple_call_set_lhs (new_stmt, op);
7032 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 7033 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
7034 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
7035 new_stmt
0d0e4a03 7036 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
7037 }
7038 else
7039 {
7040 var = make_ssa_name (vec_dest, new_stmt);
7041 gimple_call_set_lhs (new_stmt, var);
7042 }
7043
7044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7045
7046 if (modifier == NARROW)
7047 {
7048 if ((j & 1) == 0)
7049 {
7050 prev_res = var;
7051 continue;
7052 }
7053 var = permute_vec_elements (prev_res, var,
7054 perm_mask, stmt, gsi);
7055 new_stmt = SSA_NAME_DEF_STMT (var);
7056 }
7057
7058 if (prev_stmt_info == NULL)
7059 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7060 else
7061 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7062 prev_stmt_info = vinfo_for_stmt (new_stmt);
7063 }
7064 return true;
7065 }
2de001ee
RS
7066
7067 if (memory_access_type == VMAT_ELEMENTWISE
7068 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7069 {
7070 gimple_stmt_iterator incr_gsi;
7071 bool insert_after;
355fe088 7072 gimple *incr;
7d75abc8 7073 tree offvar;
7d75abc8
MM
7074 tree ivstep;
7075 tree running_off;
9771b263 7076 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 7077 gimple_seq stmts = NULL;
14ac6aa2 7078 tree stride_base, stride_step, alias_off;
4d694b27
RS
7079 /* Checked by get_load_store_type. */
7080 unsigned int const_nunits = nunits.to_constant ();
14ac6aa2
RB
7081
7082 gcc_assert (!nested_in_vect_loop);
7d75abc8 7083
f502d50e 7084 if (slp && grouped_load)
44fc7854
BE
7085 {
7086 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7087 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7088 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7089 ref_type = get_group_alias_ptr_type (first_stmt);
7090 }
ab313a8c 7091 else
44fc7854
BE
7092 {
7093 first_stmt = stmt;
7094 first_dr = dr;
7095 group_size = 1;
7096 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7097 }
ab313a8c 7098
14ac6aa2
RB
7099 stride_base
7100 = fold_build_pointer_plus
ab313a8c 7101 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7102 size_binop (PLUS_EXPR,
ab313a8c
RB
7103 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7104 convert_to_ptrofftype (DR_INIT (first_dr))));
7105 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7106
7107 /* For a load with loop-invariant (but other than power-of-2)
7108 stride (i.e. not a grouped access) like so:
7109
7110 for (i = 0; i < n; i += stride)
7111 ... = array[i];
7112
7113 we generate a new induction variable and new accesses to
7114 form a new vector (or vectors, depending on ncopies):
7115
7116 for (j = 0; ; j += VF*stride)
7117 tmp1 = array[j];
7118 tmp2 = array[j + stride];
7119 ...
7120 vectemp = {tmp1, tmp2, ...}
7121 */
7122
ab313a8c
RB
7123 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7124 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7125
7126 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7127
ab313a8c 7128 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7129 loop, &incr_gsi, insert_after,
7130 &offvar, NULL);
7131 incr = gsi_stmt (incr_gsi);
310213d4 7132 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7133
ab313a8c
RB
7134 stride_step = force_gimple_operand (unshare_expr (stride_step),
7135 &stmts, true, NULL_TREE);
7d75abc8
MM
7136 if (stmts)
7137 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7138
7139 prev_stmt_info = NULL;
7140 running_off = offvar;
44fc7854 7141 alias_off = build_int_cst (ref_type, 0);
4d694b27 7142 int nloads = const_nunits;
e09b4c37 7143 int lnel = 1;
7b5fc413 7144 tree ltype = TREE_TYPE (vectype);
ea60dd34 7145 tree lvectype = vectype;
b266b968 7146 auto_vec<tree> dr_chain;
2de001ee 7147 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7148 {
4d694b27 7149 if (group_size < const_nunits)
e09b4c37 7150 {
ff03930a
JJ
7151 /* First check if vec_init optab supports construction from
7152 vector elts directly. */
b397965c 7153 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7154 machine_mode vmode;
7155 if (mode_for_vector (elmode, group_size).exists (&vmode)
7156 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7157 && (convert_optab_handler (vec_init_optab,
7158 TYPE_MODE (vectype), vmode)
7159 != CODE_FOR_nothing))
ea60dd34 7160 {
4d694b27 7161 nloads = const_nunits / group_size;
ea60dd34 7162 lnel = group_size;
ff03930a
JJ
7163 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7164 }
7165 else
7166 {
7167 /* Otherwise avoid emitting a constructor of vector elements
7168 by performing the loads using an integer type of the same
7169 size, constructing a vector of those and then
7170 re-interpreting it as the original vector type.
7171 This avoids a huge runtime penalty due to the general
7172 inability to perform store forwarding from smaller stores
7173 to a larger load. */
7174 unsigned lsize
7175 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7176 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7177 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7178 /* If we can't construct such a vector fall back to
7179 element loads of the original vector type. */
4d694b27 7180 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7181 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7182 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7183 != CODE_FOR_nothing))
7184 {
4d694b27 7185 nloads = lnunits;
ff03930a
JJ
7186 lnel = group_size;
7187 ltype = build_nonstandard_integer_type (lsize, 1);
7188 lvectype = build_vector_type (ltype, nloads);
7189 }
ea60dd34 7190 }
e09b4c37 7191 }
2de001ee 7192 else
e09b4c37 7193 {
ea60dd34 7194 nloads = 1;
4d694b27 7195 lnel = const_nunits;
e09b4c37 7196 ltype = vectype;
e09b4c37 7197 }
2de001ee
RS
7198 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7199 }
7200 if (slp)
7201 {
66c16fd9
RB
7202 /* For SLP permutation support we need to load the whole group,
7203 not only the number of vector stmts the permutation result
7204 fits in. */
b266b968 7205 if (slp_perm)
66c16fd9 7206 {
d9f21f6a
RS
7207 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7208 variable VF. */
7209 unsigned int const_vf = vf.to_constant ();
4d694b27 7210 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7211 dr_chain.create (ncopies);
7212 }
7213 else
7214 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7215 }
4d694b27 7216 unsigned int group_el = 0;
e09b4c37
RB
7217 unsigned HOST_WIDE_INT
7218 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7219 for (j = 0; j < ncopies; j++)
7220 {
7b5fc413 7221 if (nloads > 1)
e09b4c37
RB
7222 vec_alloc (v, nloads);
7223 for (i = 0; i < nloads; i++)
7b5fc413 7224 {
e09b4c37
RB
7225 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7226 group_el * elsz);
7227 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7228 build2 (MEM_REF, ltype,
7229 running_off, this_off));
7230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7231 if (nloads > 1)
7232 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7233 gimple_assign_lhs (new_stmt));
7234
7235 group_el += lnel;
7236 if (! slp
7237 || group_el == group_size)
7b5fc413 7238 {
e09b4c37
RB
7239 tree newoff = copy_ssa_name (running_off);
7240 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7241 running_off, stride_step);
7b5fc413
RB
7242 vect_finish_stmt_generation (stmt, incr, gsi);
7243
7244 running_off = newoff;
e09b4c37 7245 group_el = 0;
7b5fc413 7246 }
7b5fc413 7247 }
e09b4c37 7248 if (nloads > 1)
7d75abc8 7249 {
ea60dd34
RB
7250 tree vec_inv = build_constructor (lvectype, v);
7251 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7252 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7253 if (lvectype != vectype)
7254 {
7255 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7256 VIEW_CONVERT_EXPR,
7257 build1 (VIEW_CONVERT_EXPR,
7258 vectype, new_temp));
7259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7260 }
7d75abc8
MM
7261 }
7262
7b5fc413 7263 if (slp)
b266b968 7264 {
b266b968
RB
7265 if (slp_perm)
7266 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7267 else
7268 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7269 }
7d75abc8 7270 else
225ce44b
RB
7271 {
7272 if (j == 0)
7273 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7274 else
7275 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7276 prev_stmt_info = vinfo_for_stmt (new_stmt);
7277 }
7d75abc8 7278 }
b266b968 7279 if (slp_perm)
29afecdf
RB
7280 {
7281 unsigned n_perms;
7282 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7283 slp_node_instance, false, &n_perms);
7284 }
7d75abc8
MM
7285 return true;
7286 }
aec7ae7d 7287
0d0293ac 7288 if (grouped_load)
ebfd146a 7289 {
e14c1050 7290 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7291 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7292 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7293 without permutation. */
7294 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7295 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7296 /* For BB vectorization always use the first stmt to base
7297 the data ref pointer on. */
7298 if (bb_vinfo)
7299 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7300
ebfd146a 7301 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7302 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7303 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7304 ??? But we can only do so if there is exactly one
7305 as we have no way to get at the rest. Leave the CSE
7306 opportunity alone.
7307 ??? With the group load eventually participating
7308 in multiple different permutations (having multiple
7309 slp nodes which refer to the same group) the CSE
7310 is even wrong code. See PR56270. */
7311 && !slp)
ebfd146a
IR
7312 {
7313 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7314 return true;
7315 }
7316 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7317 group_gap_adj = 0;
ebfd146a
IR
7318
7319 /* VEC_NUM is the number of vect stmts to be created for this group. */
7320 if (slp)
7321 {
0d0293ac 7322 grouped_load = false;
91ff1504
RB
7323 /* For SLP permutation support we need to load the whole group,
7324 not only the number of vector stmts the permutation result
7325 fits in. */
7326 if (slp_perm)
b267968e 7327 {
d9f21f6a
RS
7328 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7329 variable VF. */
7330 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7331 unsigned int const_nunits = nunits.to_constant ();
7332 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7333 group_gap_adj = vf * group_size - nunits * vec_num;
7334 }
91ff1504 7335 else
b267968e
RB
7336 {
7337 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7338 group_gap_adj
7339 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7340 }
a70d6342 7341 }
ebfd146a 7342 else
9b999e8c 7343 vec_num = group_size;
44fc7854
BE
7344
7345 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7346 }
7347 else
7348 {
7349 first_stmt = stmt;
7350 first_dr = dr;
7351 group_size = vec_num = 1;
9b999e8c 7352 group_gap_adj = 0;
44fc7854 7353 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7354 }
7355
720f5239 7356 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7357 gcc_assert (alignment_support_scheme);
272c6793
RS
7358 /* Targets with load-lane instructions must not require explicit
7359 realignment. */
2de001ee 7360 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
7361 || alignment_support_scheme == dr_aligned
7362 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7363
7364 /* In case the vectorization factor (VF) is bigger than the number
7365 of elements that we can fit in a vectype (nunits), we have to generate
7366 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7367 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7368 from one copy of the vector stmt to the next, in the field
ff802fa1 7369 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7370 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7371 stmts that use the defs of the current stmt. The example below
7372 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7373 need to create 4 vectorized stmts):
ebfd146a
IR
7374
7375 before vectorization:
7376 RELATED_STMT VEC_STMT
7377 S1: x = memref - -
7378 S2: z = x + 1 - -
7379
7380 step 1: vectorize stmt S1:
7381 We first create the vector stmt VS1_0, and, as usual, record a
7382 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7383 Next, we create the vector stmt VS1_1, and record a pointer to
7384 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7385 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7386 stmts and pointers:
7387 RELATED_STMT VEC_STMT
7388 VS1_0: vx0 = memref0 VS1_1 -
7389 VS1_1: vx1 = memref1 VS1_2 -
7390 VS1_2: vx2 = memref2 VS1_3 -
7391 VS1_3: vx3 = memref3 - -
7392 S1: x = load - VS1_0
7393 S2: z = x + 1 - -
7394
b8698a0f
L
7395 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7396 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7397 stmt S2. */
7398
0d0293ac 7399 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7400
7401 S1: x2 = &base + 2
7402 S2: x0 = &base
7403 S3: x1 = &base + 1
7404 S4: x3 = &base + 3
7405
b8698a0f 7406 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7407 starting from the access of the first stmt of the chain:
7408
7409 VS1: vx0 = &base
7410 VS2: vx1 = &base + vec_size*1
7411 VS3: vx3 = &base + vec_size*2
7412 VS4: vx4 = &base + vec_size*3
7413
7414 Then permutation statements are generated:
7415
e2c83630
RH
7416 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7417 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7418 ...
7419
7420 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7421 (the order of the data-refs in the output of vect_permute_load_chain
7422 corresponds to the order of scalar stmts in the interleaving chain - see
7423 the documentation of vect_permute_load_chain()).
7424 The generation of permutation stmts and recording them in
0d0293ac 7425 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7426
b8698a0f 7427 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7428 permutation stmts above are created for every copy. The result vector
7429 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7430 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7431
7432 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7433 on a target that supports unaligned accesses (dr_unaligned_supported)
7434 we generate the following code:
7435 p = initial_addr;
7436 indx = 0;
7437 loop {
7438 p = p + indx * vectype_size;
7439 vec_dest = *(p);
7440 indx = indx + 1;
7441 }
7442
7443 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7444 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7445 then generate the following code, in which the data in each iteration is
7446 obtained by two vector loads, one from the previous iteration, and one
7447 from the current iteration:
7448 p1 = initial_addr;
7449 msq_init = *(floor(p1))
7450 p2 = initial_addr + VS - 1;
7451 realignment_token = call target_builtin;
7452 indx = 0;
7453 loop {
7454 p2 = p2 + indx * vectype_size
7455 lsq = *(floor(p2))
7456 vec_dest = realign_load (msq, lsq, realignment_token)
7457 indx = indx + 1;
7458 msq = lsq;
7459 } */
7460
7461 /* If the misalignment remains the same throughout the execution of the
7462 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7463 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7464 This can only occur when vectorizing memory accesses in the inner-loop
7465 nested within an outer-loop that is being vectorized. */
7466
d1e4b493 7467 if (nested_in_vect_loop
832b4117 7468 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
ebfd146a
IR
7469 {
7470 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7471 compute_in_loop = true;
7472 }
7473
7474 if ((alignment_support_scheme == dr_explicit_realign_optimized
7475 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7476 && !compute_in_loop)
ebfd146a
IR
7477 {
7478 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7479 alignment_support_scheme, NULL_TREE,
7480 &at_loop);
7481 if (alignment_support_scheme == dr_explicit_realign_optimized)
7482 {
538dd0b7 7483 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7484 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7485 size_one_node);
ebfd146a
IR
7486 }
7487 }
7488 else
7489 at_loop = loop;
7490
62da9e14 7491 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7492 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7493
2de001ee 7494 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7495 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7496 else
7497 aggr_type = vectype;
7498
ebfd146a 7499 prev_stmt_info = NULL;
4d694b27 7500 poly_uint64 group_elt = 0;
ebfd146a 7501 for (j = 0; j < ncopies; j++)
b8698a0f 7502 {
272c6793 7503 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7504 if (j == 0)
74bf76ed
JJ
7505 {
7506 bool simd_lane_access_p
7507 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7508 if (simd_lane_access_p
7509 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7510 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7511 && integer_zerop (DR_OFFSET (first_dr))
7512 && integer_zerop (DR_INIT (first_dr))
7513 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7514 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7515 && (alignment_support_scheme == dr_aligned
7516 || alignment_support_scheme == dr_unaligned_supported))
7517 {
7518 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7519 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7520 inv_p = false;
74bf76ed 7521 }
4f0a0218
RB
7522 else if (first_stmt_for_drptr
7523 && first_stmt != first_stmt_for_drptr)
7524 {
7525 dataref_ptr
7526 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7527 at_loop, offset, &dummy, gsi,
7528 &ptr_incr, simd_lane_access_p,
7529 &inv_p, byte_offset);
7530 /* Adjust the pointer by the difference to first_stmt. */
7531 data_reference_p ptrdr
7532 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7533 tree diff = fold_convert (sizetype,
7534 size_binop (MINUS_EXPR,
7535 DR_INIT (first_dr),
7536 DR_INIT (ptrdr)));
7537 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7538 stmt, diff);
7539 }
74bf76ed
JJ
7540 else
7541 dataref_ptr
7542 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7543 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7544 simd_lane_access_p, &inv_p,
7545 byte_offset);
74bf76ed
JJ
7546 }
7547 else if (dataref_offset)
7548 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7549 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7550 else
272c6793
RS
7551 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7552 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7553
0d0293ac 7554 if (grouped_load || slp_perm)
9771b263 7555 dr_chain.create (vec_num);
5ce1ee7f 7556
2de001ee 7557 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7558 {
272c6793
RS
7559 tree vec_array;
7560
7561 vec_array = create_vector_array (vectype, vec_num);
7562
7563 /* Emit:
7564 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
44fc7854 7565 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
7566 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7567 data_ref);
7568 gimple_call_set_lhs (call, vec_array);
7569 gimple_call_set_nothrow (call, true);
7570 new_stmt = call;
272c6793 7571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7572
272c6793
RS
7573 /* Extract each vector into an SSA_NAME. */
7574 for (i = 0; i < vec_num; i++)
ebfd146a 7575 {
272c6793
RS
7576 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7577 vec_array, i);
9771b263 7578 dr_chain.quick_push (new_temp);
272c6793
RS
7579 }
7580
7581 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7582 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7583 }
7584 else
7585 {
7586 for (i = 0; i < vec_num; i++)
7587 {
7588 if (i > 0)
7589 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7590 stmt, NULL_TREE);
7591
7592 /* 2. Create the vector-load in the loop. */
7593 switch (alignment_support_scheme)
7594 {
7595 case dr_aligned:
7596 case dr_unaligned_supported:
be1ac4ec 7597 {
644ffefd
MJ
7598 unsigned int align, misalign;
7599
272c6793 7600 data_ref
aed93b23
RB
7601 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7602 dataref_offset
7603 ? dataref_offset
44fc7854 7604 : build_int_cst (ref_type, 0));
f702e7d4 7605 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7606 if (alignment_support_scheme == dr_aligned)
7607 {
7608 gcc_assert (aligned_access_p (first_dr));
644ffefd 7609 misalign = 0;
272c6793
RS
7610 }
7611 else if (DR_MISALIGNMENT (first_dr) == -1)
7612 {
25f68d90 7613 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7614 misalign = 0;
272c6793
RS
7615 TREE_TYPE (data_ref)
7616 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7617 align * BITS_PER_UNIT);
272c6793
RS
7618 }
7619 else
7620 {
7621 TREE_TYPE (data_ref)
7622 = build_aligned_type (TREE_TYPE (data_ref),
7623 TYPE_ALIGN (elem_type));
644ffefd 7624 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7625 }
aed93b23
RB
7626 if (dataref_offset == NULL_TREE
7627 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7628 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7629 align, misalign);
272c6793 7630 break;
be1ac4ec 7631 }
272c6793 7632 case dr_explicit_realign:
267d3070 7633 {
272c6793 7634 tree ptr, bump;
272c6793 7635
d88981fc 7636 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7637
7638 if (compute_in_loop)
7639 msq = vect_setup_realignment (first_stmt, gsi,
7640 &realignment_token,
7641 dr_explicit_realign,
7642 dataref_ptr, NULL);
7643
aed93b23
RB
7644 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7645 ptr = copy_ssa_name (dataref_ptr);
7646 else
7647 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7648 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7649 new_stmt = gimple_build_assign
7650 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7651 build_int_cst
7652 (TREE_TYPE (dataref_ptr),
f702e7d4 7653 -(HOST_WIDE_INT) align));
272c6793
RS
7654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7655 data_ref
7656 = build2 (MEM_REF, vectype, ptr,
44fc7854 7657 build_int_cst (ref_type, 0));
272c6793
RS
7658 vec_dest = vect_create_destination_var (scalar_dest,
7659 vectype);
7660 new_stmt = gimple_build_assign (vec_dest, data_ref);
7661 new_temp = make_ssa_name (vec_dest, new_stmt);
7662 gimple_assign_set_lhs (new_stmt, new_temp);
7663 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7664 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7666 msq = new_temp;
7667
d88981fc 7668 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7669 TYPE_SIZE_UNIT (elem_type));
d88981fc 7670 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7671 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7672 new_stmt = gimple_build_assign
7673 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7674 build_int_cst
f702e7d4 7675 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7676 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7677 gimple_assign_set_lhs (new_stmt, ptr);
7678 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7679 data_ref
7680 = build2 (MEM_REF, vectype, ptr,
44fc7854 7681 build_int_cst (ref_type, 0));
272c6793 7682 break;
267d3070 7683 }
272c6793 7684 case dr_explicit_realign_optimized:
f702e7d4
RS
7685 {
7686 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7687 new_temp = copy_ssa_name (dataref_ptr);
7688 else
7689 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7690 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7691 new_stmt = gimple_build_assign
7692 (new_temp, BIT_AND_EXPR, dataref_ptr,
7693 build_int_cst (TREE_TYPE (dataref_ptr),
7694 -(HOST_WIDE_INT) align));
7695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7696 data_ref
7697 = build2 (MEM_REF, vectype, new_temp,
7698 build_int_cst (ref_type, 0));
7699 break;
7700 }
272c6793
RS
7701 default:
7702 gcc_unreachable ();
7703 }
ebfd146a 7704 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7705 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7706 new_temp = make_ssa_name (vec_dest, new_stmt);
7707 gimple_assign_set_lhs (new_stmt, new_temp);
7708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7709
272c6793
RS
7710 /* 3. Handle explicit realignment if necessary/supported.
7711 Create in loop:
7712 vec_dest = realign_load (msq, lsq, realignment_token) */
7713 if (alignment_support_scheme == dr_explicit_realign_optimized
7714 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7715 {
272c6793
RS
7716 lsq = gimple_assign_lhs (new_stmt);
7717 if (!realignment_token)
7718 realignment_token = dataref_ptr;
7719 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7720 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7721 msq, lsq, realignment_token);
272c6793
RS
7722 new_temp = make_ssa_name (vec_dest, new_stmt);
7723 gimple_assign_set_lhs (new_stmt, new_temp);
7724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7725
7726 if (alignment_support_scheme == dr_explicit_realign_optimized)
7727 {
7728 gcc_assert (phi);
7729 if (i == vec_num - 1 && j == ncopies - 1)
7730 add_phi_arg (phi, lsq,
7731 loop_latch_edge (containing_loop),
9e227d60 7732 UNKNOWN_LOCATION);
272c6793
RS
7733 msq = lsq;
7734 }
ebfd146a 7735 }
ebfd146a 7736
59fd17e3
RB
7737 /* 4. Handle invariant-load. */
7738 if (inv_p && !bb_vinfo)
7739 {
59fd17e3 7740 gcc_assert (!grouped_load);
d1417442
JJ
7741 /* If we have versioned for aliasing or the loop doesn't
7742 have any data dependencies that would preclude this,
7743 then we are sure this is a loop invariant load and
7744 thus we can insert it on the preheader edge. */
7745 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7746 && !nested_in_vect_loop
6b916b36 7747 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7748 {
7749 if (dump_enabled_p ())
7750 {
7751 dump_printf_loc (MSG_NOTE, vect_location,
7752 "hoisting out of the vectorized "
7753 "loop: ");
7754 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7755 }
b731b390 7756 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7757 gsi_insert_on_edge_immediate
7758 (loop_preheader_edge (loop),
7759 gimple_build_assign (tem,
7760 unshare_expr
7761 (gimple_assign_rhs1 (stmt))));
7762 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7763 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7764 set_vinfo_for_stmt (new_stmt,
7765 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7766 }
7767 else
7768 {
7769 gimple_stmt_iterator gsi2 = *gsi;
7770 gsi_next (&gsi2);
7771 new_temp = vect_init_vector (stmt, scalar_dest,
7772 vectype, &gsi2);
34cd48e5 7773 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7774 }
59fd17e3
RB
7775 }
7776
62da9e14 7777 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7778 {
aec7ae7d
JJ
7779 tree perm_mask = perm_mask_for_reverse (vectype);
7780 new_temp = permute_vec_elements (new_temp, new_temp,
7781 perm_mask, stmt, gsi);
ebfd146a
IR
7782 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7783 }
267d3070 7784
272c6793 7785 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7786 vect_transform_grouped_load (). */
7787 if (grouped_load || slp_perm)
9771b263 7788 dr_chain.quick_push (new_temp);
267d3070 7789
272c6793
RS
7790 /* Store vector loads in the corresponding SLP_NODE. */
7791 if (slp && !slp_perm)
9771b263 7792 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7793
7794 /* With SLP permutation we load the gaps as well, without
7795 we need to skip the gaps after we manage to fully load
7796 all elements. group_gap_adj is GROUP_SIZE here. */
7797 group_elt += nunits;
d9f21f6a
RS
7798 if (maybe_ne (group_gap_adj, 0U)
7799 && !slp_perm
7800 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 7801 {
d9f21f6a
RS
7802 poly_wide_int bump_val
7803 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7804 * group_gap_adj);
8e6cdc90 7805 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7806 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7807 stmt, bump);
7808 group_elt = 0;
7809 }
272c6793 7810 }
9b999e8c
RB
7811 /* Bump the vector pointer to account for a gap or for excess
7812 elements loaded for a permuted SLP load. */
d9f21f6a 7813 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 7814 {
d9f21f6a
RS
7815 poly_wide_int bump_val
7816 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7817 * group_gap_adj);
8e6cdc90 7818 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7819 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7820 stmt, bump);
7821 }
ebfd146a
IR
7822 }
7823
7824 if (slp && !slp_perm)
7825 continue;
7826
7827 if (slp_perm)
7828 {
29afecdf 7829 unsigned n_perms;
01d8bf07 7830 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7831 slp_node_instance, false,
7832 &n_perms))
ebfd146a 7833 {
9771b263 7834 dr_chain.release ();
ebfd146a
IR
7835 return false;
7836 }
7837 }
7838 else
7839 {
0d0293ac 7840 if (grouped_load)
ebfd146a 7841 {
2de001ee 7842 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7843 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7844 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7845 }
7846 else
7847 {
7848 if (j == 0)
7849 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7850 else
7851 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7852 prev_stmt_info = vinfo_for_stmt (new_stmt);
7853 }
7854 }
9771b263 7855 dr_chain.release ();
ebfd146a
IR
7856 }
7857
ebfd146a
IR
7858 return true;
7859}
7860
7861/* Function vect_is_simple_cond.
b8698a0f 7862
ebfd146a
IR
7863 Input:
7864 LOOP - the loop that is being vectorized.
7865 COND - Condition that is checked for simple use.
7866
e9e1d143
RG
7867 Output:
7868 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7869 *DTS - The def types for the arguments of the comparison
e9e1d143 7870
ebfd146a
IR
7871 Returns whether a COND can be vectorized. Checks whether
7872 condition operands are supportable using vec_is_simple_use. */
7873
87aab9b2 7874static bool
4fc5ebf1 7875vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7876 tree *comp_vectype, enum vect_def_type *dts,
7877 tree vectype)
ebfd146a
IR
7878{
7879 tree lhs, rhs;
e9e1d143 7880 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7881
a414c77f
IE
7882 /* Mask case. */
7883 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7884 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7885 {
7886 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7887 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 7888 &dts[0], comp_vectype)
a414c77f
IE
7889 || !*comp_vectype
7890 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7891 return false;
7892 return true;
7893 }
7894
ebfd146a
IR
7895 if (!COMPARISON_CLASS_P (cond))
7896 return false;
7897
7898 lhs = TREE_OPERAND (cond, 0);
7899 rhs = TREE_OPERAND (cond, 1);
7900
7901 if (TREE_CODE (lhs) == SSA_NAME)
7902 {
355fe088 7903 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 7904 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
7905 return false;
7906 }
4fc5ebf1
JG
7907 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7908 || TREE_CODE (lhs) == FIXED_CST)
7909 dts[0] = vect_constant_def;
7910 else
ebfd146a
IR
7911 return false;
7912
7913 if (TREE_CODE (rhs) == SSA_NAME)
7914 {
355fe088 7915 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 7916 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
7917 return false;
7918 }
4fc5ebf1
JG
7919 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7920 || TREE_CODE (rhs) == FIXED_CST)
7921 dts[1] = vect_constant_def;
7922 else
ebfd146a
IR
7923 return false;
7924
28b33016
IE
7925 if (vectype1 && vectype2
7926 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7927 return false;
7928
e9e1d143 7929 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
7930 /* Invariant comparison. */
7931 if (! *comp_vectype)
7932 {
7933 tree scalar_type = TREE_TYPE (lhs);
7934 /* If we can widen the comparison to match vectype do so. */
7935 if (INTEGRAL_TYPE_P (scalar_type)
7936 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7937 TYPE_SIZE (TREE_TYPE (vectype))))
7938 scalar_type = build_nonstandard_integer_type
7939 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7940 TYPE_UNSIGNED (scalar_type));
7941 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7942 }
7943
ebfd146a
IR
7944 return true;
7945}
7946
7947/* vectorizable_condition.
7948
b8698a0f
L
7949 Check if STMT is conditional modify expression that can be vectorized.
7950 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7951 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7952 at GSI.
7953
7954 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7955 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7956 else clause if it is 2).
ebfd146a
IR
7957
7958 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7959
4bbe8262 7960bool
355fe088
TS
7961vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7962 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7963 slp_tree slp_node)
ebfd146a
IR
7964{
7965 tree scalar_dest = NULL_TREE;
7966 tree vec_dest = NULL_TREE;
01216d27
JJ
7967 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7968 tree then_clause, else_clause;
ebfd146a 7969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7970 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7971 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7972 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7973 tree vec_compare;
ebfd146a
IR
7974 tree new_temp;
7975 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
7976 enum vect_def_type dts[4]
7977 = {vect_unknown_def_type, vect_unknown_def_type,
7978 vect_unknown_def_type, vect_unknown_def_type};
7979 int ndts = 4;
f7e531cf 7980 int ncopies;
01216d27 7981 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 7982 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7983 int i, j;
7984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7985 vec<tree> vec_oprnds0 = vNULL;
7986 vec<tree> vec_oprnds1 = vNULL;
7987 vec<tree> vec_oprnds2 = vNULL;
7988 vec<tree> vec_oprnds3 = vNULL;
74946978 7989 tree vec_cmp_type;
a414c77f 7990 bool masked = false;
b8698a0f 7991
f7e531cf
IR
7992 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7993 return false;
7994
af29617a
AH
7995 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7996 {
7997 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7998 return false;
ebfd146a 7999
af29617a
AH
8000 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8001 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8002 && reduc_def))
8003 return false;
ebfd146a 8004
af29617a
AH
8005 /* FORNOW: not yet supported. */
8006 if (STMT_VINFO_LIVE_P (stmt_info))
8007 {
8008 if (dump_enabled_p ())
8009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8010 "value used after loop.\n");
8011 return false;
8012 }
ebfd146a
IR
8013 }
8014
8015 /* Is vectorizable conditional operation? */
8016 if (!is_gimple_assign (stmt))
8017 return false;
8018
8019 code = gimple_assign_rhs_code (stmt);
8020
8021 if (code != COND_EXPR)
8022 return false;
8023
465c8c19 8024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8025 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8026
fce57248 8027 if (slp_node)
465c8c19
JJ
8028 ncopies = 1;
8029 else
e8f142e2 8030 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8031
8032 gcc_assert (ncopies >= 1);
8033 if (reduc_index && ncopies > 1)
8034 return false; /* FORNOW */
8035
4e71066d
RG
8036 cond_expr = gimple_assign_rhs1 (stmt);
8037 then_clause = gimple_assign_rhs2 (stmt);
8038 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8039
4fc5ebf1 8040 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8041 &comp_vectype, &dts[0], vectype)
e9e1d143 8042 || !comp_vectype)
ebfd146a
IR
8043 return false;
8044
81c40241 8045 gimple *def_stmt;
4fc5ebf1 8046 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8047 &vectype1))
8048 return false;
4fc5ebf1 8049 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8050 &vectype2))
ebfd146a 8051 return false;
2947d3b2
IE
8052
8053 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8054 return false;
8055
8056 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8057 return false;
8058
28b33016
IE
8059 masked = !COMPARISON_CLASS_P (cond_expr);
8060 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8061
74946978
MP
8062 if (vec_cmp_type == NULL_TREE)
8063 return false;
784fb9b3 8064
01216d27
JJ
8065 cond_code = TREE_CODE (cond_expr);
8066 if (!masked)
8067 {
8068 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8069 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8070 }
8071
8072 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8073 {
8074 /* Boolean values may have another representation in vectors
8075 and therefore we prefer bit operations over comparison for
8076 them (which also works for scalar masks). We store opcodes
8077 to use in bitop1 and bitop2. Statement is vectorized as
8078 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8079 depending on bitop1 and bitop2 arity. */
8080 switch (cond_code)
8081 {
8082 case GT_EXPR:
8083 bitop1 = BIT_NOT_EXPR;
8084 bitop2 = BIT_AND_EXPR;
8085 break;
8086 case GE_EXPR:
8087 bitop1 = BIT_NOT_EXPR;
8088 bitop2 = BIT_IOR_EXPR;
8089 break;
8090 case LT_EXPR:
8091 bitop1 = BIT_NOT_EXPR;
8092 bitop2 = BIT_AND_EXPR;
8093 std::swap (cond_expr0, cond_expr1);
8094 break;
8095 case LE_EXPR:
8096 bitop1 = BIT_NOT_EXPR;
8097 bitop2 = BIT_IOR_EXPR;
8098 std::swap (cond_expr0, cond_expr1);
8099 break;
8100 case NE_EXPR:
8101 bitop1 = BIT_XOR_EXPR;
8102 break;
8103 case EQ_EXPR:
8104 bitop1 = BIT_XOR_EXPR;
8105 bitop2 = BIT_NOT_EXPR;
8106 break;
8107 default:
8108 return false;
8109 }
8110 cond_code = SSA_NAME;
8111 }
8112
b8698a0f 8113 if (!vec_stmt)
ebfd146a
IR
8114 {
8115 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8116 if (bitop1 != NOP_EXPR)
8117 {
8118 machine_mode mode = TYPE_MODE (comp_vectype);
8119 optab optab;
8120
8121 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8122 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8123 return false;
8124
8125 if (bitop2 != NOP_EXPR)
8126 {
8127 optab = optab_for_tree_code (bitop2, comp_vectype,
8128 optab_default);
8129 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8130 return false;
8131 }
8132 }
4fc5ebf1
JG
8133 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8134 cond_code))
8135 {
8136 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8137 return true;
8138 }
8139 return false;
ebfd146a
IR
8140 }
8141
f7e531cf
IR
8142 /* Transform. */
8143
8144 if (!slp_node)
8145 {
9771b263
DN
8146 vec_oprnds0.create (1);
8147 vec_oprnds1.create (1);
8148 vec_oprnds2.create (1);
8149 vec_oprnds3.create (1);
f7e531cf 8150 }
ebfd146a
IR
8151
8152 /* Handle def. */
8153 scalar_dest = gimple_assign_lhs (stmt);
8154 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8155
8156 /* Handle cond expr. */
a855b1b1
MM
8157 for (j = 0; j < ncopies; j++)
8158 {
538dd0b7 8159 gassign *new_stmt = NULL;
a855b1b1
MM
8160 if (j == 0)
8161 {
f7e531cf
IR
8162 if (slp_node)
8163 {
00f96dc9
TS
8164 auto_vec<tree, 4> ops;
8165 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8166
a414c77f 8167 if (masked)
01216d27 8168 ops.safe_push (cond_expr);
a414c77f
IE
8169 else
8170 {
01216d27
JJ
8171 ops.safe_push (cond_expr0);
8172 ops.safe_push (cond_expr1);
a414c77f 8173 }
9771b263
DN
8174 ops.safe_push (then_clause);
8175 ops.safe_push (else_clause);
306b0c92 8176 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8177 vec_oprnds3 = vec_defs.pop ();
8178 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8179 if (!masked)
8180 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8181 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8182 }
8183 else
8184 {
355fe088 8185 gimple *gtemp;
a414c77f
IE
8186 if (masked)
8187 {
8188 vec_cond_lhs
8189 = vect_get_vec_def_for_operand (cond_expr, stmt,
8190 comp_vectype);
8191 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8192 &gtemp, &dts[0]);
8193 }
8194 else
8195 {
01216d27
JJ
8196 vec_cond_lhs
8197 = vect_get_vec_def_for_operand (cond_expr0,
8198 stmt, comp_vectype);
8199 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8200
8201 vec_cond_rhs
8202 = vect_get_vec_def_for_operand (cond_expr1,
8203 stmt, comp_vectype);
8204 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8205 }
f7e531cf
IR
8206 if (reduc_index == 1)
8207 vec_then_clause = reduc_def;
8208 else
8209 {
8210 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8211 stmt);
8212 vect_is_simple_use (then_clause, loop_vinfo,
8213 &gtemp, &dts[2]);
f7e531cf
IR
8214 }
8215 if (reduc_index == 2)
8216 vec_else_clause = reduc_def;
8217 else
8218 {
8219 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8220 stmt);
8221 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8222 }
a855b1b1
MM
8223 }
8224 }
8225 else
8226 {
a414c77f
IE
8227 vec_cond_lhs
8228 = vect_get_vec_def_for_stmt_copy (dts[0],
8229 vec_oprnds0.pop ());
8230 if (!masked)
8231 vec_cond_rhs
8232 = vect_get_vec_def_for_stmt_copy (dts[1],
8233 vec_oprnds1.pop ());
8234
a855b1b1 8235 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8236 vec_oprnds2.pop ());
a855b1b1 8237 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8238 vec_oprnds3.pop ());
f7e531cf
IR
8239 }
8240
8241 if (!slp_node)
8242 {
9771b263 8243 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8244 if (!masked)
8245 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8246 vec_oprnds2.quick_push (vec_then_clause);
8247 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8248 }
8249
9dc3f7de 8250 /* Arguments are ready. Create the new vector stmt. */
9771b263 8251 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8252 {
9771b263
DN
8253 vec_then_clause = vec_oprnds2[i];
8254 vec_else_clause = vec_oprnds3[i];
a855b1b1 8255
a414c77f
IE
8256 if (masked)
8257 vec_compare = vec_cond_lhs;
8258 else
8259 {
8260 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8261 if (bitop1 == NOP_EXPR)
8262 vec_compare = build2 (cond_code, vec_cmp_type,
8263 vec_cond_lhs, vec_cond_rhs);
8264 else
8265 {
8266 new_temp = make_ssa_name (vec_cmp_type);
8267 if (bitop1 == BIT_NOT_EXPR)
8268 new_stmt = gimple_build_assign (new_temp, bitop1,
8269 vec_cond_rhs);
8270 else
8271 new_stmt
8272 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8273 vec_cond_rhs);
8274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8275 if (bitop2 == NOP_EXPR)
8276 vec_compare = new_temp;
8277 else if (bitop2 == BIT_NOT_EXPR)
8278 {
8279 /* Instead of doing ~x ? y : z do x ? z : y. */
8280 vec_compare = new_temp;
8281 std::swap (vec_then_clause, vec_else_clause);
8282 }
8283 else
8284 {
8285 vec_compare = make_ssa_name (vec_cmp_type);
8286 new_stmt
8287 = gimple_build_assign (vec_compare, bitop2,
8288 vec_cond_lhs, new_temp);
8289 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8290 }
8291 }
a414c77f 8292 }
5958f9e2
JJ
8293 new_temp = make_ssa_name (vec_dest);
8294 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8295 vec_compare, vec_then_clause,
8296 vec_else_clause);
f7e531cf
IR
8297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8298 if (slp_node)
9771b263 8299 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8300 }
8301
8302 if (slp_node)
8303 continue;
8304
8305 if (j == 0)
8306 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8307 else
8308 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8309
8310 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8311 }
b8698a0f 8312
9771b263
DN
8313 vec_oprnds0.release ();
8314 vec_oprnds1.release ();
8315 vec_oprnds2.release ();
8316 vec_oprnds3.release ();
f7e531cf 8317
ebfd146a
IR
8318 return true;
8319}
8320
42fd8198
IE
8321/* vectorizable_comparison.
8322
8323 Check if STMT is comparison expression that can be vectorized.
8324 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8325 comparison, put it in VEC_STMT, and insert it at GSI.
8326
8327 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8328
fce57248 8329static bool
42fd8198
IE
8330vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8331 gimple **vec_stmt, tree reduc_def,
8332 slp_tree slp_node)
8333{
8334 tree lhs, rhs1, rhs2;
8335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8336 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8337 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8338 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8339 tree new_temp;
8340 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8341 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8342 int ndts = 2;
42fd8198
IE
8343 unsigned nunits;
8344 int ncopies;
49e76ff1 8345 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8346 stmt_vec_info prev_stmt_info = NULL;
8347 int i, j;
8348 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8349 vec<tree> vec_oprnds0 = vNULL;
8350 vec<tree> vec_oprnds1 = vNULL;
8351 gimple *def_stmt;
8352 tree mask_type;
8353 tree mask;
8354
c245362b
IE
8355 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8356 return false;
8357
30480bcd 8358 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8359 return false;
8360
8361 mask_type = vectype;
8362 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8363
fce57248 8364 if (slp_node)
42fd8198
IE
8365 ncopies = 1;
8366 else
e8f142e2 8367 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8368
8369 gcc_assert (ncopies >= 1);
42fd8198
IE
8370 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8371 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8372 && reduc_def))
8373 return false;
8374
8375 if (STMT_VINFO_LIVE_P (stmt_info))
8376 {
8377 if (dump_enabled_p ())
8378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8379 "value used after loop.\n");
8380 return false;
8381 }
8382
8383 if (!is_gimple_assign (stmt))
8384 return false;
8385
8386 code = gimple_assign_rhs_code (stmt);
8387
8388 if (TREE_CODE_CLASS (code) != tcc_comparison)
8389 return false;
8390
8391 rhs1 = gimple_assign_rhs1 (stmt);
8392 rhs2 = gimple_assign_rhs2 (stmt);
8393
8394 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8395 &dts[0], &vectype1))
8396 return false;
8397
8398 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8399 &dts[1], &vectype2))
8400 return false;
8401
8402 if (vectype1 && vectype2
8403 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8404 return false;
8405
8406 vectype = vectype1 ? vectype1 : vectype2;
8407
8408 /* Invariant comparison. */
8409 if (!vectype)
8410 {
69a9a66f
RB
8411 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8412 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
42fd8198
IE
8413 return false;
8414 }
8415 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8416 return false;
8417
49e76ff1
IE
8418 /* Can't compare mask and non-mask types. */
8419 if (vectype1 && vectype2
8420 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8421 return false;
8422
8423 /* Boolean values may have another representation in vectors
8424 and therefore we prefer bit operations over comparison for
8425 them (which also works for scalar masks). We store opcodes
8426 to use in bitop1 and bitop2. Statement is vectorized as
8427 BITOP2 (rhs1 BITOP1 rhs2) or
8428 rhs1 BITOP2 (BITOP1 rhs2)
8429 depending on bitop1 and bitop2 arity. */
8430 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8431 {
8432 if (code == GT_EXPR)
8433 {
8434 bitop1 = BIT_NOT_EXPR;
8435 bitop2 = BIT_AND_EXPR;
8436 }
8437 else if (code == GE_EXPR)
8438 {
8439 bitop1 = BIT_NOT_EXPR;
8440 bitop2 = BIT_IOR_EXPR;
8441 }
8442 else if (code == LT_EXPR)
8443 {
8444 bitop1 = BIT_NOT_EXPR;
8445 bitop2 = BIT_AND_EXPR;
8446 std::swap (rhs1, rhs2);
264d951a 8447 std::swap (dts[0], dts[1]);
49e76ff1
IE
8448 }
8449 else if (code == LE_EXPR)
8450 {
8451 bitop1 = BIT_NOT_EXPR;
8452 bitop2 = BIT_IOR_EXPR;
8453 std::swap (rhs1, rhs2);
264d951a 8454 std::swap (dts[0], dts[1]);
49e76ff1
IE
8455 }
8456 else
8457 {
8458 bitop1 = BIT_XOR_EXPR;
8459 if (code == EQ_EXPR)
8460 bitop2 = BIT_NOT_EXPR;
8461 }
8462 }
8463
42fd8198
IE
8464 if (!vec_stmt)
8465 {
8466 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8467 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8468 dts, ndts, NULL, NULL);
49e76ff1 8469 if (bitop1 == NOP_EXPR)
96592eed 8470 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8471 else
8472 {
8473 machine_mode mode = TYPE_MODE (vectype);
8474 optab optab;
8475
8476 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8477 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8478 return false;
8479
8480 if (bitop2 != NOP_EXPR)
8481 {
8482 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8483 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8484 return false;
8485 }
8486 return true;
8487 }
42fd8198
IE
8488 }
8489
8490 /* Transform. */
8491 if (!slp_node)
8492 {
8493 vec_oprnds0.create (1);
8494 vec_oprnds1.create (1);
8495 }
8496
8497 /* Handle def. */
8498 lhs = gimple_assign_lhs (stmt);
8499 mask = vect_create_destination_var (lhs, mask_type);
8500
8501 /* Handle cmp expr. */
8502 for (j = 0; j < ncopies; j++)
8503 {
8504 gassign *new_stmt = NULL;
8505 if (j == 0)
8506 {
8507 if (slp_node)
8508 {
8509 auto_vec<tree, 2> ops;
8510 auto_vec<vec<tree>, 2> vec_defs;
8511
8512 ops.safe_push (rhs1);
8513 ops.safe_push (rhs2);
306b0c92 8514 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8515 vec_oprnds1 = vec_defs.pop ();
8516 vec_oprnds0 = vec_defs.pop ();
8517 }
8518 else
8519 {
e4af0bc4
IE
8520 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8521 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8522 }
8523 }
8524 else
8525 {
8526 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8527 vec_oprnds0.pop ());
8528 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8529 vec_oprnds1.pop ());
8530 }
8531
8532 if (!slp_node)
8533 {
8534 vec_oprnds0.quick_push (vec_rhs1);
8535 vec_oprnds1.quick_push (vec_rhs2);
8536 }
8537
8538 /* Arguments are ready. Create the new vector stmt. */
8539 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8540 {
8541 vec_rhs2 = vec_oprnds1[i];
8542
8543 new_temp = make_ssa_name (mask);
49e76ff1
IE
8544 if (bitop1 == NOP_EXPR)
8545 {
8546 new_stmt = gimple_build_assign (new_temp, code,
8547 vec_rhs1, vec_rhs2);
8548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8549 }
8550 else
8551 {
8552 if (bitop1 == BIT_NOT_EXPR)
8553 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8554 else
8555 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8556 vec_rhs2);
8557 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8558 if (bitop2 != NOP_EXPR)
8559 {
8560 tree res = make_ssa_name (mask);
8561 if (bitop2 == BIT_NOT_EXPR)
8562 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8563 else
8564 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8565 new_temp);
8566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8567 }
8568 }
42fd8198
IE
8569 if (slp_node)
8570 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8571 }
8572
8573 if (slp_node)
8574 continue;
8575
8576 if (j == 0)
8577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8578 else
8579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8580
8581 prev_stmt_info = vinfo_for_stmt (new_stmt);
8582 }
8583
8584 vec_oprnds0.release ();
8585 vec_oprnds1.release ();
8586
8587 return true;
8588}
ebfd146a 8589
68a0f2ff
RS
8590/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8591 can handle all live statements in the node. Otherwise return true
8592 if STMT is not live or if vectorizable_live_operation can handle it.
8593 GSI and VEC_STMT are as for vectorizable_live_operation. */
8594
8595static bool
8596can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8597 slp_tree slp_node, gimple **vec_stmt)
8598{
8599 if (slp_node)
8600 {
8601 gimple *slp_stmt;
8602 unsigned int i;
8603 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8604 {
8605 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8606 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8607 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8608 vec_stmt))
8609 return false;
8610 }
8611 }
8612 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8613 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8614 return false;
8615
8616 return true;
8617}
8618
8644a673 8619/* Make sure the statement is vectorizable. */
ebfd146a
IR
8620
8621bool
891ad31c
RB
8622vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8623 slp_instance node_instance)
ebfd146a 8624{
8644a673 8625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8626 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8627 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8628 bool ok;
355fe088 8629 gimple *pattern_stmt;
363477c0 8630 gimple_seq pattern_def_seq;
ebfd146a 8631
73fbfcad 8632 if (dump_enabled_p ())
ebfd146a 8633 {
78c60e3d
SS
8634 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8636 }
ebfd146a 8637
1825a1f3 8638 if (gimple_has_volatile_ops (stmt))
b8698a0f 8639 {
73fbfcad 8640 if (dump_enabled_p ())
78c60e3d 8641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8642 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8643
8644 return false;
8645 }
b8698a0f
L
8646
8647 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8648 to include:
8649 - the COND_EXPR which is the loop exit condition
8650 - any LABEL_EXPRs in the loop
b8698a0f 8651 - computations that are used only for array indexing or loop control.
8644a673 8652 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8653 instance, therefore, all the statements are relevant.
ebfd146a 8654
d092494c 8655 Pattern statement needs to be analyzed instead of the original statement
83197f37 8656 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8657 statements. In basic blocks we are called from some SLP instance
8658 traversal, don't analyze pattern stmts instead, the pattern stmts
8659 already will be part of SLP instance. */
83197f37
IR
8660
8661 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8662 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8663 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8664 {
9d5e7640 8665 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8666 && pattern_stmt
9d5e7640
IR
8667 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8668 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8669 {
83197f37 8670 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8671 stmt = pattern_stmt;
8672 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8673 if (dump_enabled_p ())
9d5e7640 8674 {
78c60e3d
SS
8675 dump_printf_loc (MSG_NOTE, vect_location,
8676 "==> examining pattern statement: ");
8677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8678 }
8679 }
8680 else
8681 {
73fbfcad 8682 if (dump_enabled_p ())
e645e942 8683 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8684
9d5e7640
IR
8685 return true;
8686 }
8644a673 8687 }
83197f37 8688 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8689 && node == NULL
83197f37
IR
8690 && pattern_stmt
8691 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8692 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8693 {
8694 /* Analyze PATTERN_STMT too. */
73fbfcad 8695 if (dump_enabled_p ())
83197f37 8696 {
78c60e3d
SS
8697 dump_printf_loc (MSG_NOTE, vect_location,
8698 "==> examining pattern statement: ");
8699 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8700 }
8701
891ad31c
RB
8702 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8703 node_instance))
83197f37
IR
8704 return false;
8705 }
ebfd146a 8706
1107f3ae 8707 if (is_pattern_stmt_p (stmt_info)
079c527f 8708 && node == NULL
363477c0 8709 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8710 {
363477c0 8711 gimple_stmt_iterator si;
1107f3ae 8712
363477c0
JJ
8713 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8714 {
355fe088 8715 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8716 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8717 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8718 {
8719 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8720 if (dump_enabled_p ())
363477c0 8721 {
78c60e3d
SS
8722 dump_printf_loc (MSG_NOTE, vect_location,
8723 "==> examining pattern def statement: ");
8724 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8725 }
1107f3ae 8726
363477c0 8727 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8728 need_to_vectorize, node, node_instance))
363477c0
JJ
8729 return false;
8730 }
8731 }
8732 }
1107f3ae 8733
8644a673
IR
8734 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8735 {
8736 case vect_internal_def:
8737 break;
ebfd146a 8738
8644a673 8739 case vect_reduction_def:
7c5222ff 8740 case vect_nested_cycle:
14a61437
RB
8741 gcc_assert (!bb_vinfo
8742 && (relevance == vect_used_in_outer
8743 || relevance == vect_used_in_outer_by_reduction
8744 || relevance == vect_used_by_reduction
b28ead45
AH
8745 || relevance == vect_unused_in_scope
8746 || relevance == vect_used_only_live));
8644a673
IR
8747 break;
8748
8749 case vect_induction_def:
e7baeb39
RB
8750 gcc_assert (!bb_vinfo);
8751 break;
8752
8644a673
IR
8753 case vect_constant_def:
8754 case vect_external_def:
8755 case vect_unknown_def_type:
8756 default:
8757 gcc_unreachable ();
8758 }
ebfd146a 8759
8644a673 8760 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8761 {
8644a673 8762 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8763 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8764 || (is_gimple_call (stmt)
8765 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8766 *need_to_vectorize = true;
ebfd146a
IR
8767 }
8768
b1af7da6
RB
8769 if (PURE_SLP_STMT (stmt_info) && !node)
8770 {
8771 dump_printf_loc (MSG_NOTE, vect_location,
8772 "handled only by SLP analysis\n");
8773 return true;
8774 }
8775
8776 ok = true;
8777 if (!bb_vinfo
8778 && (STMT_VINFO_RELEVANT_P (stmt_info)
8779 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8780 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8781 || vectorizable_conversion (stmt, NULL, NULL, node)
8782 || vectorizable_shift (stmt, NULL, NULL, node)
8783 || vectorizable_operation (stmt, NULL, NULL, node)
8784 || vectorizable_assignment (stmt, NULL, NULL, node)
8785 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8786 || vectorizable_call (stmt, NULL, NULL, node)
8787 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8788 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8789 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8790 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8791 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8792 else
8793 {
8794 if (bb_vinfo)
8795 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8796 || vectorizable_conversion (stmt, NULL, NULL, node)
8797 || vectorizable_shift (stmt, NULL, NULL, node)
8798 || vectorizable_operation (stmt, NULL, NULL, node)
8799 || vectorizable_assignment (stmt, NULL, NULL, node)
8800 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8801 || vectorizable_call (stmt, NULL, NULL, node)
8802 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8803 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8804 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8805 }
8644a673
IR
8806
8807 if (!ok)
ebfd146a 8808 {
73fbfcad 8809 if (dump_enabled_p ())
8644a673 8810 {
78c60e3d
SS
8811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8812 "not vectorized: relevant stmt not ");
8813 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8815 }
b8698a0f 8816
ebfd146a
IR
8817 return false;
8818 }
8819
a70d6342
IR
8820 if (bb_vinfo)
8821 return true;
8822
8644a673
IR
8823 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8824 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8825 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8826 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8827 {
73fbfcad 8828 if (dump_enabled_p ())
8644a673 8829 {
78c60e3d 8830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8831 "not vectorized: live stmt not supported: ");
78c60e3d 8832 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8833 }
b8698a0f 8834
8644a673 8835 return false;
ebfd146a
IR
8836 }
8837
ebfd146a
IR
8838 return true;
8839}
8840
8841
8842/* Function vect_transform_stmt.
8843
8844 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8845
8846bool
355fe088 8847vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8848 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8849 slp_instance slp_node_instance)
8850{
8851 bool is_store = false;
355fe088 8852 gimple *vec_stmt = NULL;
ebfd146a 8853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8854 bool done;
ebfd146a 8855
fce57248 8856 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8857 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8858
ebfd146a
IR
8859 switch (STMT_VINFO_TYPE (stmt_info))
8860 {
8861 case type_demotion_vec_info_type:
ebfd146a 8862 case type_promotion_vec_info_type:
ebfd146a
IR
8863 case type_conversion_vec_info_type:
8864 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8865 gcc_assert (done);
8866 break;
8867
8868 case induc_vec_info_type:
e7baeb39 8869 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8870 gcc_assert (done);
8871 break;
8872
9dc3f7de
IR
8873 case shift_vec_info_type:
8874 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8875 gcc_assert (done);
8876 break;
8877
ebfd146a
IR
8878 case op_vec_info_type:
8879 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8880 gcc_assert (done);
8881 break;
8882
8883 case assignment_vec_info_type:
8884 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8885 gcc_assert (done);
8886 break;
8887
8888 case load_vec_info_type:
b8698a0f 8889 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8890 slp_node_instance);
8891 gcc_assert (done);
8892 break;
8893
8894 case store_vec_info_type:
8895 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8896 gcc_assert (done);
0d0293ac 8897 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8898 {
8899 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8900 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8901 one are skipped, and there vec_stmt_info shouldn't be freed
8902 meanwhile. */
0d0293ac 8903 *grouped_store = true;
ebfd146a
IR
8904 if (STMT_VINFO_VEC_STMT (stmt_info))
8905 is_store = true;
8906 }
8907 else
8908 is_store = true;
8909 break;
8910
8911 case condition_vec_info_type:
f7e531cf 8912 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8913 gcc_assert (done);
8914 break;
8915
42fd8198
IE
8916 case comparison_vec_info_type:
8917 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8918 gcc_assert (done);
8919 break;
8920
ebfd146a 8921 case call_vec_info_type:
190c2236 8922 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8923 stmt = gsi_stmt (*gsi);
8e4284d0 8924 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
5ce9450f 8925 is_store = true;
ebfd146a
IR
8926 break;
8927
0136f8f0
AH
8928 case call_simd_clone_vec_info_type:
8929 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8930 stmt = gsi_stmt (*gsi);
8931 break;
8932
ebfd146a 8933 case reduc_vec_info_type:
891ad31c
RB
8934 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8935 slp_node_instance);
ebfd146a
IR
8936 gcc_assert (done);
8937 break;
8938
8939 default:
8940 if (!STMT_VINFO_LIVE_P (stmt_info))
8941 {
73fbfcad 8942 if (dump_enabled_p ())
78c60e3d 8943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8944 "stmt not supported.\n");
ebfd146a
IR
8945 gcc_unreachable ();
8946 }
8947 }
8948
225ce44b
RB
8949 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8950 This would break hybrid SLP vectorization. */
8951 if (slp_node)
d90f8440
RB
8952 gcc_assert (!vec_stmt
8953 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8954
ebfd146a
IR
8955 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8956 is being vectorized, but outside the immediately enclosing loop. */
8957 if (vec_stmt
a70d6342
IR
8958 && STMT_VINFO_LOOP_VINFO (stmt_info)
8959 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8960 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8961 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8962 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8963 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8964 vect_used_in_outer_by_reduction))
ebfd146a 8965 {
a70d6342
IR
8966 struct loop *innerloop = LOOP_VINFO_LOOP (
8967 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8968 imm_use_iterator imm_iter;
8969 use_operand_p use_p;
8970 tree scalar_dest;
355fe088 8971 gimple *exit_phi;
ebfd146a 8972
73fbfcad 8973 if (dump_enabled_p ())
78c60e3d 8974 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8975 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8976
8977 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8978 (to be used when vectorizing outer-loop stmts that use the DEF of
8979 STMT). */
8980 if (gimple_code (stmt) == GIMPLE_PHI)
8981 scalar_dest = PHI_RESULT (stmt);
8982 else
8983 scalar_dest = gimple_assign_lhs (stmt);
8984
8985 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8986 {
8987 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8988 {
8989 exit_phi = USE_STMT (use_p);
8990 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8991 }
8992 }
8993 }
8994
8995 /* Handle stmts whose DEF is used outside the loop-nest that is
8996 being vectorized. */
68a0f2ff 8997 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 8998 {
68a0f2ff 8999 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
9000 gcc_assert (done);
9001 }
9002
9003 if (vec_stmt)
83197f37 9004 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9005
b8698a0f 9006 return is_store;
ebfd146a
IR
9007}
9008
9009
b8698a0f 9010/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9011 stmt_vec_info. */
9012
9013void
355fe088 9014vect_remove_stores (gimple *first_stmt)
ebfd146a 9015{
355fe088
TS
9016 gimple *next = first_stmt;
9017 gimple *tmp;
ebfd146a
IR
9018 gimple_stmt_iterator next_si;
9019
9020 while (next)
9021 {
78048b1c
JJ
9022 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9023
9024 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9025 if (is_pattern_stmt_p (stmt_info))
9026 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9027 /* Free the attached stmt_vec_info and remove the stmt. */
9028 next_si = gsi_for_stmt (next);
3d3f2249 9029 unlink_stmt_vdef (next);
ebfd146a 9030 gsi_remove (&next_si, true);
3d3f2249 9031 release_defs (next);
ebfd146a
IR
9032 free_stmt_vec_info (next);
9033 next = tmp;
9034 }
9035}
9036
9037
9038/* Function new_stmt_vec_info.
9039
9040 Create and initialize a new stmt_vec_info struct for STMT. */
9041
9042stmt_vec_info
310213d4 9043new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9044{
9045 stmt_vec_info res;
9046 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9047
9048 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9049 STMT_VINFO_STMT (res) = stmt;
310213d4 9050 res->vinfo = vinfo;
8644a673 9051 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9052 STMT_VINFO_LIVE_P (res) = false;
9053 STMT_VINFO_VECTYPE (res) = NULL;
9054 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9055 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9056 STMT_VINFO_IN_PATTERN_P (res) = false;
9057 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9058 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9059 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9060 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9061 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9062
ebfd146a
IR
9063 if (gimple_code (stmt) == GIMPLE_PHI
9064 && is_loop_header_bb_p (gimple_bb (stmt)))
9065 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9066 else
8644a673
IR
9067 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9068
9771b263 9069 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9070 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9071 STMT_VINFO_NUM_SLP_USES (res) = 0;
9072
e14c1050
IR
9073 GROUP_FIRST_ELEMENT (res) = NULL;
9074 GROUP_NEXT_ELEMENT (res) = NULL;
9075 GROUP_SIZE (res) = 0;
9076 GROUP_STORE_COUNT (res) = 0;
9077 GROUP_GAP (res) = 0;
9078 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9079
9080 return res;
9081}
9082
9083
9084/* Create a hash table for stmt_vec_info. */
9085
9086void
9087init_stmt_vec_info_vec (void)
9088{
9771b263
DN
9089 gcc_assert (!stmt_vec_info_vec.exists ());
9090 stmt_vec_info_vec.create (50);
ebfd146a
IR
9091}
9092
9093
9094/* Free hash table for stmt_vec_info. */
9095
9096void
9097free_stmt_vec_info_vec (void)
9098{
93675444 9099 unsigned int i;
3161455c 9100 stmt_vec_info info;
93675444
JJ
9101 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9102 if (info != NULL)
3161455c 9103 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9104 gcc_assert (stmt_vec_info_vec.exists ());
9105 stmt_vec_info_vec.release ();
ebfd146a
IR
9106}
9107
9108
9109/* Free stmt vectorization related info. */
9110
9111void
355fe088 9112free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9113{
9114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9115
9116 if (!stmt_info)
9117 return;
9118
78048b1c
JJ
9119 /* Check if this statement has a related "pattern stmt"
9120 (introduced by the vectorizer during the pattern recognition
9121 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9122 too. */
9123 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9124 {
9125 stmt_vec_info patt_info
9126 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9127 if (patt_info)
9128 {
363477c0 9129 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9130 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9131 gimple_set_bb (patt_stmt, NULL);
9132 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9133 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9134 release_ssa_name (lhs);
363477c0
JJ
9135 if (seq)
9136 {
9137 gimple_stmt_iterator si;
9138 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9139 {
355fe088 9140 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9141 gimple_set_bb (seq_stmt, NULL);
7532abf2 9142 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9143 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9144 release_ssa_name (lhs);
9145 free_stmt_vec_info (seq_stmt);
9146 }
363477c0 9147 }
f0281fde 9148 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9149 }
9150 }
9151
9771b263 9152 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9153 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9154 set_vinfo_for_stmt (stmt, NULL);
9155 free (stmt_info);
9156}
9157
9158
bb67d9c7 9159/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9160
bb67d9c7 9161 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9162 by the target. */
9163
bb67d9c7 9164static tree
86e36728 9165get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9166{
c7d97b28 9167 tree orig_scalar_type = scalar_type;
3bd8f481 9168 scalar_mode inner_mode;
ef4bddc2 9169 machine_mode simd_mode;
86e36728 9170 poly_uint64 nunits;
ebfd146a
IR
9171 tree vectype;
9172
3bd8f481
RS
9173 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9174 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9175 return NULL_TREE;
9176
3bd8f481 9177 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9178
7b7b1813
RG
9179 /* For vector types of elements whose mode precision doesn't
9180 match their types precision we use a element type of mode
9181 precision. The vectorization routines will have to make sure
48f2e373
RB
9182 they support the proper result truncation/extension.
9183 We also make sure to build vector types with INTEGER_TYPE
9184 component type only. */
6d7971b8 9185 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9186 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9187 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9188 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9189 TYPE_UNSIGNED (scalar_type));
6d7971b8 9190
ccbf5bb4
RG
9191 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9192 When the component mode passes the above test simply use a type
9193 corresponding to that mode. The theory is that any use that
9194 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9195 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9196 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9197 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9198
9199 /* We can't build a vector type of elements with alignment bigger than
9200 their size. */
dfc2e2ac 9201 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9202 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9203 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9204
dfc2e2ac
RB
9205 /* If we felt back to using the mode fail if there was
9206 no scalar type for it. */
9207 if (scalar_type == NULL_TREE)
9208 return NULL_TREE;
9209
bb67d9c7
RG
9210 /* If no size was supplied use the mode the target prefers. Otherwise
9211 lookup a vector mode of the specified size. */
86e36728 9212 if (known_eq (size, 0U))
bb67d9c7 9213 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9214 else if (!multiple_p (size, nbytes, &nunits)
9215 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9216 return NULL_TREE;
4c8fd8ac 9217 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9218 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9219 return NULL_TREE;
ebfd146a
IR
9220
9221 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9222
9223 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9224 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9225 return NULL_TREE;
ebfd146a 9226
c7d97b28
RB
9227 /* Re-attach the address-space qualifier if we canonicalized the scalar
9228 type. */
9229 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9230 return build_qualified_type
9231 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9232
ebfd146a
IR
9233 return vectype;
9234}
9235
86e36728 9236poly_uint64 current_vector_size;
bb67d9c7
RG
9237
9238/* Function get_vectype_for_scalar_type.
9239
9240 Returns the vector type corresponding to SCALAR_TYPE as supported
9241 by the target. */
9242
9243tree
9244get_vectype_for_scalar_type (tree scalar_type)
9245{
9246 tree vectype;
9247 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9248 current_vector_size);
9249 if (vectype
86e36728 9250 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9251 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9252 return vectype;
9253}
9254
42fd8198
IE
9255/* Function get_mask_type_for_scalar_type.
9256
9257 Returns the mask type corresponding to a result of comparison
9258 of vectors of specified SCALAR_TYPE as supported by target. */
9259
9260tree
9261get_mask_type_for_scalar_type (tree scalar_type)
9262{
9263 tree vectype = get_vectype_for_scalar_type (scalar_type);
9264
9265 if (!vectype)
9266 return NULL;
9267
9268 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9269 current_vector_size);
9270}
9271
b690cc0f
RG
9272/* Function get_same_sized_vectype
9273
9274 Returns a vector type corresponding to SCALAR_TYPE of size
9275 VECTOR_TYPE if supported by the target. */
9276
9277tree
bb67d9c7 9278get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9279{
2568d8a1 9280 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9281 return build_same_sized_truth_vector_type (vector_type);
9282
bb67d9c7
RG
9283 return get_vectype_for_scalar_type_and_size
9284 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9285}
9286
ebfd146a
IR
9287/* Function vect_is_simple_use.
9288
9289 Input:
81c40241
RB
9290 VINFO - the vect info of the loop or basic block that is being vectorized.
9291 OPERAND - operand in the loop or bb.
9292 Output:
9293 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9294 DT - the type of definition
ebfd146a
IR
9295
9296 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9297 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9298 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9299 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9300 is the case in reduction/induction computations).
9301 For basic blocks, supportable operands are constants and bb invariants.
9302 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9303
9304bool
81c40241
RB
9305vect_is_simple_use (tree operand, vec_info *vinfo,
9306 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9307{
ebfd146a 9308 *def_stmt = NULL;
3fc356dc 9309 *dt = vect_unknown_def_type;
b8698a0f 9310
73fbfcad 9311 if (dump_enabled_p ())
ebfd146a 9312 {
78c60e3d
SS
9313 dump_printf_loc (MSG_NOTE, vect_location,
9314 "vect_is_simple_use: operand ");
9315 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9316 dump_printf (MSG_NOTE, "\n");
ebfd146a 9317 }
b8698a0f 9318
b758f602 9319 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9320 {
9321 *dt = vect_constant_def;
9322 return true;
9323 }
b8698a0f 9324
ebfd146a
IR
9325 if (is_gimple_min_invariant (operand))
9326 {
8644a673 9327 *dt = vect_external_def;
ebfd146a
IR
9328 return true;
9329 }
9330
ebfd146a
IR
9331 if (TREE_CODE (operand) != SSA_NAME)
9332 {
73fbfcad 9333 if (dump_enabled_p ())
af29617a
AH
9334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9335 "not ssa-name.\n");
ebfd146a
IR
9336 return false;
9337 }
b8698a0f 9338
3fc356dc 9339 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9340 {
3fc356dc
RB
9341 *dt = vect_external_def;
9342 return true;
ebfd146a
IR
9343 }
9344
3fc356dc 9345 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9346 if (dump_enabled_p ())
ebfd146a 9347 {
78c60e3d
SS
9348 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9349 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9350 }
9351
61d371eb 9352 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9353 *dt = vect_external_def;
ebfd146a
IR
9354 else
9355 {
3fc356dc 9356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9357 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9358 }
9359
2e8ab70c
RB
9360 if (dump_enabled_p ())
9361 {
9362 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9363 switch (*dt)
9364 {
9365 case vect_uninitialized_def:
9366 dump_printf (MSG_NOTE, "uninitialized\n");
9367 break;
9368 case vect_constant_def:
9369 dump_printf (MSG_NOTE, "constant\n");
9370 break;
9371 case vect_external_def:
9372 dump_printf (MSG_NOTE, "external\n");
9373 break;
9374 case vect_internal_def:
9375 dump_printf (MSG_NOTE, "internal\n");
9376 break;
9377 case vect_induction_def:
9378 dump_printf (MSG_NOTE, "induction\n");
9379 break;
9380 case vect_reduction_def:
9381 dump_printf (MSG_NOTE, "reduction\n");
9382 break;
9383 case vect_double_reduction_def:
9384 dump_printf (MSG_NOTE, "double reduction\n");
9385 break;
9386 case vect_nested_cycle:
9387 dump_printf (MSG_NOTE, "nested cycle\n");
9388 break;
9389 case vect_unknown_def_type:
9390 dump_printf (MSG_NOTE, "unknown\n");
9391 break;
9392 }
9393 }
9394
81c40241 9395 if (*dt == vect_unknown_def_type)
ebfd146a 9396 {
73fbfcad 9397 if (dump_enabled_p ())
78c60e3d 9398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9399 "Unsupported pattern.\n");
ebfd146a
IR
9400 return false;
9401 }
9402
ebfd146a
IR
9403 switch (gimple_code (*def_stmt))
9404 {
9405 case GIMPLE_PHI:
ebfd146a 9406 case GIMPLE_ASSIGN:
ebfd146a 9407 case GIMPLE_CALL:
81c40241 9408 break;
ebfd146a 9409 default:
73fbfcad 9410 if (dump_enabled_p ())
78c60e3d 9411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9412 "unsupported defining stmt:\n");
ebfd146a
IR
9413 return false;
9414 }
9415
9416 return true;
9417}
9418
81c40241 9419/* Function vect_is_simple_use.
b690cc0f 9420
81c40241 9421 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9422 type of OPERAND and stores it to *VECTYPE. If the definition of
9423 OPERAND is vect_uninitialized_def, vect_constant_def or
9424 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9425 is responsible to compute the best suited vector type for the
9426 scalar operand. */
9427
9428bool
81c40241
RB
9429vect_is_simple_use (tree operand, vec_info *vinfo,
9430 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9431{
81c40241 9432 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9433 return false;
9434
9435 /* Now get a vector type if the def is internal, otherwise supply
9436 NULL_TREE and leave it up to the caller to figure out a proper
9437 type for the use stmt. */
9438 if (*dt == vect_internal_def
9439 || *dt == vect_induction_def
9440 || *dt == vect_reduction_def
9441 || *dt == vect_double_reduction_def
9442 || *dt == vect_nested_cycle)
9443 {
9444 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9445
9446 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9447 && !STMT_VINFO_RELEVANT (stmt_info)
9448 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9449 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9450
b690cc0f
RG
9451 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9452 gcc_assert (*vectype != NULL_TREE);
9453 }
9454 else if (*dt == vect_uninitialized_def
9455 || *dt == vect_constant_def
9456 || *dt == vect_external_def)
9457 *vectype = NULL_TREE;
9458 else
9459 gcc_unreachable ();
9460
9461 return true;
9462}
9463
ebfd146a
IR
9464
9465/* Function supportable_widening_operation
9466
b8698a0f
L
9467 Check whether an operation represented by the code CODE is a
9468 widening operation that is supported by the target platform in
b690cc0f
RG
9469 vector form (i.e., when operating on arguments of type VECTYPE_IN
9470 producing a result of type VECTYPE_OUT).
b8698a0f 9471
ebfd146a
IR
9472 Widening operations we currently support are NOP (CONVERT), FLOAT
9473 and WIDEN_MULT. This function checks if these operations are supported
9474 by the target platform either directly (via vector tree-codes), or via
9475 target builtins.
9476
9477 Output:
b8698a0f
L
9478 - CODE1 and CODE2 are codes of vector operations to be used when
9479 vectorizing the operation, if available.
ebfd146a
IR
9480 - MULTI_STEP_CVT determines the number of required intermediate steps in
9481 case of multi-step conversion (like char->short->int - in that case
9482 MULTI_STEP_CVT will be 1).
b8698a0f
L
9483 - INTERM_TYPES contains the intermediate type required to perform the
9484 widening operation (short in the above example). */
ebfd146a
IR
9485
9486bool
355fe088 9487supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9488 tree vectype_out, tree vectype_in,
ebfd146a
IR
9489 enum tree_code *code1, enum tree_code *code2,
9490 int *multi_step_cvt,
9771b263 9491 vec<tree> *interm_types)
ebfd146a
IR
9492{
9493 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9494 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9495 struct loop *vect_loop = NULL;
ef4bddc2 9496 machine_mode vec_mode;
81f40b79 9497 enum insn_code icode1, icode2;
ebfd146a 9498 optab optab1, optab2;
b690cc0f
RG
9499 tree vectype = vectype_in;
9500 tree wide_vectype = vectype_out;
ebfd146a 9501 enum tree_code c1, c2;
4a00c761
JJ
9502 int i;
9503 tree prev_type, intermediate_type;
ef4bddc2 9504 machine_mode intermediate_mode, prev_mode;
4a00c761 9505 optab optab3, optab4;
ebfd146a 9506
4a00c761 9507 *multi_step_cvt = 0;
4ef69dfc
IR
9508 if (loop_info)
9509 vect_loop = LOOP_VINFO_LOOP (loop_info);
9510
ebfd146a
IR
9511 switch (code)
9512 {
9513 case WIDEN_MULT_EXPR:
6ae6116f
RH
9514 /* The result of a vectorized widening operation usually requires
9515 two vectors (because the widened results do not fit into one vector).
9516 The generated vector results would normally be expected to be
9517 generated in the same order as in the original scalar computation,
9518 i.e. if 8 results are generated in each vector iteration, they are
9519 to be organized as follows:
9520 vect1: [res1,res2,res3,res4],
9521 vect2: [res5,res6,res7,res8].
9522
9523 However, in the special case that the result of the widening
9524 operation is used in a reduction computation only, the order doesn't
9525 matter (because when vectorizing a reduction we change the order of
9526 the computation). Some targets can take advantage of this and
9527 generate more efficient code. For example, targets like Altivec,
9528 that support widen_mult using a sequence of {mult_even,mult_odd}
9529 generate the following vectors:
9530 vect1: [res1,res3,res5,res7],
9531 vect2: [res2,res4,res6,res8].
9532
9533 When vectorizing outer-loops, we execute the inner-loop sequentially
9534 (each vectorized inner-loop iteration contributes to VF outer-loop
9535 iterations in parallel). We therefore don't allow to change the
9536 order of the computation in the inner-loop during outer-loop
9537 vectorization. */
9538 /* TODO: Another case in which order doesn't *really* matter is when we
9539 widen and then contract again, e.g. (short)((int)x * y >> 8).
9540 Normally, pack_trunc performs an even/odd permute, whereas the
9541 repack from an even/odd expansion would be an interleave, which
9542 would be significantly simpler for e.g. AVX2. */
9543 /* In any case, in order to avoid duplicating the code below, recurse
9544 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9545 are properly set up for the caller. If we fail, we'll continue with
9546 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9547 if (vect_loop
9548 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9549 && !nested_in_vect_loop_p (vect_loop, stmt)
9550 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9551 stmt, vectype_out, vectype_in,
a86ec597
RH
9552 code1, code2, multi_step_cvt,
9553 interm_types))
ebc047a2
CH
9554 {
9555 /* Elements in a vector with vect_used_by_reduction property cannot
9556 be reordered if the use chain with this property does not have the
9557 same operation. One such an example is s += a * b, where elements
9558 in a and b cannot be reordered. Here we check if the vector defined
9559 by STMT is only directly used in the reduction statement. */
9560 tree lhs = gimple_assign_lhs (stmt);
9561 use_operand_p dummy;
355fe088 9562 gimple *use_stmt;
ebc047a2
CH
9563 stmt_vec_info use_stmt_info = NULL;
9564 if (single_imm_use (lhs, &dummy, &use_stmt)
9565 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9566 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9567 return true;
9568 }
4a00c761
JJ
9569 c1 = VEC_WIDEN_MULT_LO_EXPR;
9570 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9571 break;
9572
81c40241
RB
9573 case DOT_PROD_EXPR:
9574 c1 = DOT_PROD_EXPR;
9575 c2 = DOT_PROD_EXPR;
9576 break;
9577
9578 case SAD_EXPR:
9579 c1 = SAD_EXPR;
9580 c2 = SAD_EXPR;
9581 break;
9582
6ae6116f
RH
9583 case VEC_WIDEN_MULT_EVEN_EXPR:
9584 /* Support the recursion induced just above. */
9585 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9586 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9587 break;
9588
36ba4aae 9589 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9590 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9591 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9592 break;
9593
ebfd146a 9594 CASE_CONVERT:
4a00c761
JJ
9595 c1 = VEC_UNPACK_LO_EXPR;
9596 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9597 break;
9598
9599 case FLOAT_EXPR:
4a00c761
JJ
9600 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9601 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9602 break;
9603
9604 case FIX_TRUNC_EXPR:
9605 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9606 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9607 computing the operation. */
9608 return false;
9609
9610 default:
9611 gcc_unreachable ();
9612 }
9613
6ae6116f 9614 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9615 std::swap (c1, c2);
4a00c761 9616
ebfd146a
IR
9617 if (code == FIX_TRUNC_EXPR)
9618 {
9619 /* The signedness is determined from output operand. */
b690cc0f
RG
9620 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9621 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9622 }
9623 else
9624 {
9625 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9626 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9627 }
9628
9629 if (!optab1 || !optab2)
9630 return false;
9631
9632 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9633 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9634 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9635 return false;
9636
4a00c761
JJ
9637 *code1 = c1;
9638 *code2 = c2;
9639
9640 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9641 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9642 /* For scalar masks we may have different boolean
9643 vector types having the same QImode. Thus we
9644 add additional check for elements number. */
9645 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9646 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9647 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761 9648
b8698a0f 9649 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9650 types. */
ebfd146a 9651
4a00c761
JJ
9652 prev_type = vectype;
9653 prev_mode = vec_mode;
b8698a0f 9654
4a00c761
JJ
9655 if (!CONVERT_EXPR_CODE_P (code))
9656 return false;
b8698a0f 9657
4a00c761
JJ
9658 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9659 intermediate steps in promotion sequence. We try
9660 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9661 not. */
9771b263 9662 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9663 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9664 {
9665 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9666 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9667 {
9668 intermediate_type
9669 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9670 current_vector_size);
9671 if (intermediate_mode != TYPE_MODE (intermediate_type))
9672 return false;
9673 }
9674 else
9675 intermediate_type
9676 = lang_hooks.types.type_for_mode (intermediate_mode,
9677 TYPE_UNSIGNED (prev_type));
9678
4a00c761
JJ
9679 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9680 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9681
9682 if (!optab3 || !optab4
9683 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9684 || insn_data[icode1].operand[0].mode != intermediate_mode
9685 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9686 || insn_data[icode2].operand[0].mode != intermediate_mode
9687 || ((icode1 = optab_handler (optab3, intermediate_mode))
9688 == CODE_FOR_nothing)
9689 || ((icode2 = optab_handler (optab4, intermediate_mode))
9690 == CODE_FOR_nothing))
9691 break;
ebfd146a 9692
9771b263 9693 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9694 (*multi_step_cvt)++;
9695
9696 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9697 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9698 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9699 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9700 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761
JJ
9701
9702 prev_type = intermediate_type;
9703 prev_mode = intermediate_mode;
ebfd146a
IR
9704 }
9705
9771b263 9706 interm_types->release ();
4a00c761 9707 return false;
ebfd146a
IR
9708}
9709
9710
9711/* Function supportable_narrowing_operation
9712
b8698a0f
L
9713 Check whether an operation represented by the code CODE is a
9714 narrowing operation that is supported by the target platform in
b690cc0f
RG
9715 vector form (i.e., when operating on arguments of type VECTYPE_IN
9716 and producing a result of type VECTYPE_OUT).
b8698a0f 9717
ebfd146a 9718 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9719 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9720 the target platform directly via vector tree-codes.
9721
9722 Output:
b8698a0f
L
9723 - CODE1 is the code of a vector operation to be used when
9724 vectorizing the operation, if available.
ebfd146a
IR
9725 - MULTI_STEP_CVT determines the number of required intermediate steps in
9726 case of multi-step conversion (like int->short->char - in that case
9727 MULTI_STEP_CVT will be 1).
9728 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9729 narrowing operation (short in the above example). */
ebfd146a
IR
9730
9731bool
9732supportable_narrowing_operation (enum tree_code code,
b690cc0f 9733 tree vectype_out, tree vectype_in,
ebfd146a 9734 enum tree_code *code1, int *multi_step_cvt,
9771b263 9735 vec<tree> *interm_types)
ebfd146a 9736{
ef4bddc2 9737 machine_mode vec_mode;
ebfd146a
IR
9738 enum insn_code icode1;
9739 optab optab1, interm_optab;
b690cc0f
RG
9740 tree vectype = vectype_in;
9741 tree narrow_vectype = vectype_out;
ebfd146a 9742 enum tree_code c1;
3ae0661a 9743 tree intermediate_type, prev_type;
ef4bddc2 9744 machine_mode intermediate_mode, prev_mode;
ebfd146a 9745 int i;
4a00c761 9746 bool uns;
ebfd146a 9747
4a00c761 9748 *multi_step_cvt = 0;
ebfd146a
IR
9749 switch (code)
9750 {
9751 CASE_CONVERT:
9752 c1 = VEC_PACK_TRUNC_EXPR;
9753 break;
9754
9755 case FIX_TRUNC_EXPR:
9756 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9757 break;
9758
9759 case FLOAT_EXPR:
9760 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9761 tree code and optabs used for computing the operation. */
9762 return false;
9763
9764 default:
9765 gcc_unreachable ();
9766 }
9767
9768 if (code == FIX_TRUNC_EXPR)
9769 /* The signedness is determined from output operand. */
b690cc0f 9770 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9771 else
9772 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9773
9774 if (!optab1)
9775 return false;
9776
9777 vec_mode = TYPE_MODE (vectype);
947131ba 9778 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9779 return false;
9780
4a00c761
JJ
9781 *code1 = c1;
9782
9783 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9784 /* For scalar masks we may have different boolean
9785 vector types having the same QImode. Thus we
9786 add additional check for elements number. */
9787 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9788 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9789 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9790
ebfd146a
IR
9791 /* Check if it's a multi-step conversion that can be done using intermediate
9792 types. */
4a00c761 9793 prev_mode = vec_mode;
3ae0661a 9794 prev_type = vectype;
4a00c761
JJ
9795 if (code == FIX_TRUNC_EXPR)
9796 uns = TYPE_UNSIGNED (vectype_out);
9797 else
9798 uns = TYPE_UNSIGNED (vectype);
9799
9800 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9801 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9802 costly than signed. */
9803 if (code == FIX_TRUNC_EXPR && uns)
9804 {
9805 enum insn_code icode2;
9806
9807 intermediate_type
9808 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9809 interm_optab
9810 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9811 if (interm_optab != unknown_optab
4a00c761
JJ
9812 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9813 && insn_data[icode1].operand[0].mode
9814 == insn_data[icode2].operand[0].mode)
9815 {
9816 uns = false;
9817 optab1 = interm_optab;
9818 icode1 = icode2;
9819 }
9820 }
ebfd146a 9821
4a00c761
JJ
9822 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9823 intermediate steps in promotion sequence. We try
9824 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9825 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9826 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9827 {
9828 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9829 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9830 {
9831 intermediate_type
9832 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9833 current_vector_size);
9834 if (intermediate_mode != TYPE_MODE (intermediate_type))
9835 return false;
9836 }
9837 else
9838 intermediate_type
9839 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9840 interm_optab
9841 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9842 optab_default);
9843 if (!interm_optab
9844 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9845 || insn_data[icode1].operand[0].mode != intermediate_mode
9846 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9847 == CODE_FOR_nothing))
9848 break;
9849
9771b263 9850 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9851 (*multi_step_cvt)++;
9852
9853 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9854 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9855 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9856 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9857
9858 prev_mode = intermediate_mode;
3ae0661a 9859 prev_type = intermediate_type;
4a00c761 9860 optab1 = interm_optab;
ebfd146a
IR
9861 }
9862
9771b263 9863 interm_types->release ();
4a00c761 9864 return false;
ebfd146a 9865}