]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
poly_int: expand_assignment
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
ebfd146a 53
7ee2468b
SB
54/* For lang_hooks.types.type_for_mode. */
55#include "langhooks.h"
ebfd146a 56
2de001ee
RS
57/* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63};
64
c3e7ee41
BS
65/* Return the vectorized type for the given statement. */
66
67tree
68stmt_vectype (struct _stmt_vec_info *stmt_info)
69{
70 return STMT_VINFO_VECTYPE (stmt_info);
71}
72
73/* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75bool
76stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77{
355fe088 78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89}
90
91/* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95unsigned
92345349 96record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 98 int misalign, enum vect_cost_model_location where)
c3e7ee41 99{
cc9fe6bb
JH
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
92345349 106 if (body_cost_vec)
c3e7ee41 107 {
92345349 108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
c3e7ee41 113 return (unsigned)
92345349 114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
115 }
116 else
310213d4
RB
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
119}
120
272c6793
RS
121/* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123static tree
124create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125{
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128}
129
130/* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135static tree
355fe088 136read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
137 tree array, unsigned HOST_WIDE_INT n)
138{
139 tree vect_type, vect, vect_name, array_ref;
355fe088 140 gimple *new_stmt;
272c6793
RS
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
153
154 return vect_name;
155}
156
157/* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161static void
355fe088 162write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
163 tree array, unsigned HOST_WIDE_INT n)
164{
165 tree array_ref;
355fe088 166 gimple *new_stmt;
272c6793
RS
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
174}
175
176/* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180static tree
44fc7854 181create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 182{
44fc7854 183 tree mem_ref;
272c6793 184
272c6793
RS
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
644ffefd 187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
188 return mem_ref;
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197static void
355fe088 198vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 199 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
200{
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 204 gimple *pattern_stmt;
ebfd146a 205
73fbfcad 206 if (dump_enabled_p ())
66c16fd9
RB
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
ebfd146a 212
83197f37
IR
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
ebfd146a
IR
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
97ecdb46
JJ
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
83197f37 223
97ecdb46
JJ
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
ebfd146a
IR
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
73fbfcad 244 if (dump_enabled_p ())
78c60e3d 245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 246 "already marked relevant/live.\n");
ebfd146a
IR
247 return;
248 }
249
9771b263 250 worklist->safe_push (stmt);
ebfd146a
IR
251}
252
253
b28ead45
AH
254/* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258bool
259is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260{
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
818
b8698a0f 819/* Function vect_model_simple_cost.
ebfd146a 820
b8698a0f 821 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825void
b8698a0f 826vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 827 enum vect_def_type *dt,
4fc5ebf1 828 int ndts,
92345349
BS
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
831{
832 int i;
92345349 833 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
4fc5ebf1
JG
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
92345349 843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 845 stmt_info, 0, vect_prologue);
c3e7ee41
BS
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
c3e7ee41 850
73fbfcad 851 if (dump_enabled_p ())
78c60e3d
SS
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
e645e942 854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
855}
856
857
8bd37302
BS
858/* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863static void
864vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866{
867 int i, tmp;
92345349 868 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
8bd37302
BS
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
c3e7ee41
BS
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
8bd37302
BS
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
c3e7ee41 886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
8bd37302
BS
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
92345349
BS
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
8bd37302 896
73fbfcad 897 if (dump_enabled_p ())
78c60e3d
SS
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
901}
902
ebfd146a
IR
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
92345349
BS
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a 914{
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 919
8644a673 920 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
ebfd146a 923
892a981f
RS
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
720f5239 927 {
892a981f
RS
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 930 }
ebfd146a 931
892a981f
RS
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
272c6793 937 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 939 access is instead being provided by a permute-and-store operation,
2de001ee
RS
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 943 {
e1377713
ES
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
892a981f 946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
ebfd146a 950
73fbfcad 951 if (dump_enabled_p ())
78c60e3d 952 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 953 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 954 group_size);
ebfd146a
IR
955 }
956
cee62fee 957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 958 /* Costs of the stores. */
067bc855
RB
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
961 {
962 /* N scalar stores plus extracting the elements. */
963 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
964 inside_cost += record_stmt_cost (body_cost_vec,
965 ncopies * assumed_nunits,
966 scalar_store, stmt_info, 0, vect_body);
967 }
f2e2a985 968 else
892a981f 969 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 970
2de001ee
RS
971 if (memory_access_type == VMAT_ELEMENTWISE
972 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
973 {
974 /* N scalar stores plus extracting the elements. */
975 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
976 inside_cost += record_stmt_cost (body_cost_vec,
977 ncopies * assumed_nunits,
978 vec_to_scalar, stmt_info, 0, vect_body);
979 }
cee62fee 980
73fbfcad 981 if (dump_enabled_p ())
78c60e3d
SS
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_model_store_cost: inside_cost = %d, "
e645e942 984 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
985}
986
987
720f5239
IR
988/* Calculate cost of DR's memory access. */
989void
990vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 991 unsigned int *inside_cost,
92345349 992 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
993{
994 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 995 gimple *stmt = DR_STMT (dr);
c3e7ee41 996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
997
998 switch (alignment_support_scheme)
999 {
1000 case dr_aligned:
1001 {
92345349
BS
1002 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1003 vector_store, stmt_info, 0,
1004 vect_body);
720f5239 1005
73fbfcad 1006 if (dump_enabled_p ())
78c60e3d 1007 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1008 "vect_model_store_cost: aligned.\n");
720f5239
IR
1009 break;
1010 }
1011
1012 case dr_unaligned_supported:
1013 {
720f5239 1014 /* Here, we assign an additional cost for the unaligned store. */
92345349 1015 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1016 unaligned_store, stmt_info,
92345349 1017 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1018 if (dump_enabled_p ())
78c60e3d
SS
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: unaligned supported by "
e645e942 1021 "hardware.\n");
720f5239
IR
1022 break;
1023 }
1024
38eec4c6
UW
1025 case dr_unaligned_unsupported:
1026 {
1027 *inside_cost = VECT_MAX_COST;
1028
73fbfcad 1029 if (dump_enabled_p ())
78c60e3d 1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1031 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1032 break;
1033 }
1034
720f5239
IR
1035 default:
1036 gcc_unreachable ();
1037 }
1038}
1039
1040
ebfd146a
IR
1041/* Function vect_model_load_cost
1042
892a981f
RS
1043 Models cost for loads. In the case of grouped accesses, one access has
1044 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1045 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1046 access scheme chosen. */
1047
1048void
92345349 1049vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1050 vect_memory_access_type memory_access_type,
1051 slp_tree slp_node,
92345349
BS
1052 stmt_vector_for_cost *prologue_cost_vec,
1053 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1054{
892a981f
RS
1055 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1056 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1057 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1058 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1059
892a981f
RS
1060 /* Grouped loads read all elements in the group at once,
1061 so we want the DR for the first statement. */
1062 if (!slp_node && grouped_access_p)
ebfd146a 1063 {
892a981f
RS
1064 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1065 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1066 }
1067
892a981f
RS
1068 /* True if we should include any once-per-group costs as well as
1069 the cost of the statement itself. For SLP we only get called
1070 once per group anyhow. */
1071 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1072
272c6793 1073 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1074 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1075 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1076 include the cost of the permutes. */
1077 if (first_stmt_p
1078 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1079 {
2c23db6d
ES
1080 /* Uses an even and odd extract operations or shuffle operations
1081 for each needed permute. */
892a981f 1082 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1083 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1084 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1085 stmt_info, 0, vect_body);
ebfd146a 1086
73fbfcad 1087 if (dump_enabled_p ())
e645e942
TJ
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1090 group_size);
ebfd146a
IR
1091 }
1092
1093 /* The loads themselves. */
067bc855
RB
1094 if (memory_access_type == VMAT_ELEMENTWISE
1095 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1096 {
a21892ad
BS
1097 /* N scalar loads plus gathering them into a vector. */
1098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1099 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1100 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1101 ncopies * assumed_nunits,
92345349 1102 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1103 }
1104 else
892a981f 1105 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1106 &inside_cost, &prologue_cost,
1107 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1108 if (memory_access_type == VMAT_ELEMENTWISE
1109 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1110 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1111 stmt_info, 0, vect_body);
720f5239 1112
73fbfcad 1113 if (dump_enabled_p ())
78c60e3d
SS
1114 dump_printf_loc (MSG_NOTE, vect_location,
1115 "vect_model_load_cost: inside_cost = %d, "
e645e942 1116 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1117}
1118
1119
1120/* Calculate cost of DR's memory access. */
1121void
1122vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1123 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1124 unsigned int *prologue_cost,
1125 stmt_vector_for_cost *prologue_cost_vec,
1126 stmt_vector_for_cost *body_cost_vec,
1127 bool record_prologue_costs)
720f5239
IR
1128{
1129 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1130 gimple *stmt = DR_STMT (dr);
c3e7ee41 1131 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1132
1133 switch (alignment_support_scheme)
ebfd146a
IR
1134 {
1135 case dr_aligned:
1136 {
92345349
BS
1137 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1138 stmt_info, 0, vect_body);
ebfd146a 1139
73fbfcad 1140 if (dump_enabled_p ())
78c60e3d 1141 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1142 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1143
1144 break;
1145 }
1146 case dr_unaligned_supported:
1147 {
720f5239 1148 /* Here, we assign an additional cost for the unaligned load. */
92345349 1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1150 unaligned_load, stmt_info,
92345349 1151 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1152
73fbfcad 1153 if (dump_enabled_p ())
78c60e3d
SS
1154 dump_printf_loc (MSG_NOTE, vect_location,
1155 "vect_model_load_cost: unaligned supported by "
e645e942 1156 "hardware.\n");
ebfd146a
IR
1157
1158 break;
1159 }
1160 case dr_explicit_realign:
1161 {
92345349
BS
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1163 vector_load, stmt_info, 0, vect_body);
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1165 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1166
1167 /* FIXME: If the misalignment remains fixed across the iterations of
1168 the containing loop, the following cost should be added to the
92345349 1169 prologue costs. */
ebfd146a 1170 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1171 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1172 stmt_info, 0, vect_body);
ebfd146a 1173
73fbfcad 1174 if (dump_enabled_p ())
e645e942
TJ
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: explicit realign\n");
8bd37302 1177
ebfd146a
IR
1178 break;
1179 }
1180 case dr_explicit_realign_optimized:
1181 {
73fbfcad 1182 if (dump_enabled_p ())
e645e942 1183 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1184 "vect_model_load_cost: unaligned software "
e645e942 1185 "pipelined.\n");
ebfd146a
IR
1186
1187 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1188 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1189 if this is an access in a group of loads, which provide grouped
ebfd146a 1190 access, then the above cost should only be considered for one
ff802fa1 1191 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1192 and a realignment op. */
1193
92345349 1194 if (add_realign_cost && record_prologue_costs)
ebfd146a 1195 {
92345349
BS
1196 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1197 vector_stmt, stmt_info,
1198 0, vect_prologue);
ebfd146a 1199 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1200 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1201 vector_stmt, stmt_info,
1202 0, vect_prologue);
ebfd146a
IR
1203 }
1204
92345349
BS
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1206 stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1208 stmt_info, 0, vect_body);
8bd37302 1209
73fbfcad 1210 if (dump_enabled_p ())
78c60e3d 1211 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1212 "vect_model_load_cost: explicit realign optimized"
1213 "\n");
8bd37302 1214
ebfd146a
IR
1215 break;
1216 }
1217
38eec4c6
UW
1218 case dr_unaligned_unsupported:
1219 {
1220 *inside_cost = VECT_MAX_COST;
1221
73fbfcad 1222 if (dump_enabled_p ())
78c60e3d 1223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1224 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1225 break;
1226 }
1227
ebfd146a
IR
1228 default:
1229 gcc_unreachable ();
1230 }
ebfd146a
IR
1231}
1232
418b7df3
RG
1233/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1234 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1235
418b7df3 1236static void
355fe088 1237vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1238{
ebfd146a 1239 if (gsi)
418b7df3 1240 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1241 else
1242 {
418b7df3 1243 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1245
a70d6342
IR
1246 if (loop_vinfo)
1247 {
1248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1249 basic_block new_bb;
1250 edge pe;
a70d6342
IR
1251
1252 if (nested_in_vect_loop_p (loop, stmt))
1253 loop = loop->inner;
b8698a0f 1254
a70d6342 1255 pe = loop_preheader_edge (loop);
418b7df3 1256 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1257 gcc_assert (!new_bb);
1258 }
1259 else
1260 {
1261 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1262 basic_block bb;
1263 gimple_stmt_iterator gsi_bb_start;
1264
1265 gcc_assert (bb_vinfo);
1266 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1267 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1268 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1269 }
ebfd146a
IR
1270 }
1271
73fbfcad 1272 if (dump_enabled_p ())
ebfd146a 1273 {
78c60e3d
SS
1274 dump_printf_loc (MSG_NOTE, vect_location,
1275 "created new init_stmt: ");
1276 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1277 }
418b7df3
RG
1278}
1279
1280/* Function vect_init_vector.
ebfd146a 1281
5467ee52
RG
1282 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1283 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1284 vector type a vector with all elements equal to VAL is created first.
1285 Place the initialization at BSI if it is not NULL. Otherwise, place the
1286 initialization at the loop preheader.
418b7df3
RG
1287 Return the DEF of INIT_STMT.
1288 It will be used in the vectorization of STMT. */
1289
1290tree
355fe088 1291vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1292{
355fe088 1293 gimple *init_stmt;
418b7df3
RG
1294 tree new_temp;
1295
e412ece4
RB
1296 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1297 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1298 {
e412ece4
RB
1299 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1300 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1301 {
5a308cf1
IE
1302 /* Scalar boolean value should be transformed into
1303 all zeros or all ones value before building a vector. */
1304 if (VECTOR_BOOLEAN_TYPE_P (type))
1305 {
b3d51f23
IE
1306 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1307 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1308
1309 if (CONSTANT_CLASS_P (val))
1310 val = integer_zerop (val) ? false_val : true_val;
1311 else
1312 {
1313 new_temp = make_ssa_name (TREE_TYPE (type));
1314 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1315 val, true_val, false_val);
1316 vect_init_vector_1 (stmt, init_stmt, gsi);
1317 val = new_temp;
1318 }
1319 }
1320 else if (CONSTANT_CLASS_P (val))
42fd8198 1321 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1322 else
1323 {
b731b390 1324 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1325 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1326 init_stmt = gimple_build_assign (new_temp,
1327 fold_build1 (VIEW_CONVERT_EXPR,
1328 TREE_TYPE (type),
1329 val));
1330 else
1331 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1332 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1333 val = new_temp;
418b7df3
RG
1334 }
1335 }
5467ee52 1336 val = build_vector_from_val (type, val);
418b7df3
RG
1337 }
1338
0e22bb5a
RB
1339 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1340 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1341 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1342 return new_temp;
ebfd146a
IR
1343}
1344
c83a894c 1345/* Function vect_get_vec_def_for_operand_1.
a70d6342 1346
c83a894c
AH
1347 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1348 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1349
1350tree
c83a894c 1351vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1352{
1353 tree vec_oprnd;
355fe088 1354 gimple *vec_stmt;
ebfd146a 1355 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1356
1357 switch (dt)
1358 {
81c40241 1359 /* operand is a constant or a loop invariant. */
ebfd146a 1360 case vect_constant_def:
81c40241 1361 case vect_external_def:
c83a894c
AH
1362 /* Code should use vect_get_vec_def_for_operand. */
1363 gcc_unreachable ();
ebfd146a 1364
81c40241 1365 /* operand is defined inside the loop. */
8644a673 1366 case vect_internal_def:
ebfd146a 1367 {
ebfd146a
IR
1368 /* Get the def from the vectorized stmt. */
1369 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1370
ebfd146a 1371 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1372 /* Get vectorized pattern statement. */
1373 if (!vec_stmt
1374 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1375 && !STMT_VINFO_RELEVANT (def_stmt_info))
1376 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1377 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1378 gcc_assert (vec_stmt);
1379 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1380 vec_oprnd = PHI_RESULT (vec_stmt);
1381 else if (is_gimple_call (vec_stmt))
1382 vec_oprnd = gimple_call_lhs (vec_stmt);
1383 else
1384 vec_oprnd = gimple_assign_lhs (vec_stmt);
1385 return vec_oprnd;
1386 }
1387
c78e3652 1388 /* operand is defined by a loop header phi. */
ebfd146a 1389 case vect_reduction_def:
06066f92 1390 case vect_double_reduction_def:
7c5222ff 1391 case vect_nested_cycle:
ebfd146a
IR
1392 case vect_induction_def:
1393 {
1394 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1395
1396 /* Get the def from the vectorized stmt. */
1397 def_stmt_info = vinfo_for_stmt (def_stmt);
1398 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1399 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1400 vec_oprnd = PHI_RESULT (vec_stmt);
1401 else
1402 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1403 return vec_oprnd;
1404 }
1405
1406 default:
1407 gcc_unreachable ();
1408 }
1409}
1410
1411
c83a894c
AH
1412/* Function vect_get_vec_def_for_operand.
1413
1414 OP is an operand in STMT. This function returns a (vector) def that will be
1415 used in the vectorized stmt for STMT.
1416
1417 In the case that OP is an SSA_NAME which is defined in the loop, then
1418 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1419
1420 In case OP is an invariant or constant, a new stmt that creates a vector def
1421 needs to be introduced. VECTYPE may be used to specify a required type for
1422 vector invariant. */
1423
1424tree
1425vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1426{
1427 gimple *def_stmt;
1428 enum vect_def_type dt;
1429 bool is_simple_use;
1430 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1431 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1432
1433 if (dump_enabled_p ())
1434 {
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "vect_get_vec_def_for_operand: ");
1437 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1438 dump_printf (MSG_NOTE, "\n");
1439 }
1440
1441 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1442 gcc_assert (is_simple_use);
1443 if (def_stmt && dump_enabled_p ())
1444 {
1445 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1446 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1447 }
1448
1449 if (dt == vect_constant_def || dt == vect_external_def)
1450 {
1451 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1452 tree vector_type;
1453
1454 if (vectype)
1455 vector_type = vectype;
2568d8a1 1456 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1457 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1458 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1459 else
1460 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1461
1462 gcc_assert (vector_type);
1463 return vect_init_vector (stmt, op, vector_type, NULL);
1464 }
1465 else
1466 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1467}
1468
1469
ebfd146a
IR
1470/* Function vect_get_vec_def_for_stmt_copy
1471
ff802fa1 1472 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1475 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1477 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1478 DT is the type of the vector def VEC_OPRND.
1479
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1483 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1484 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1488 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1491
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1493
ebfd146a
IR
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1497 VS1.3: vx.3 = memref3
ebfd146a
IR
1498
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1503
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
b8698a0f
L
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1508 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1509 returns the vector-def 'vx.0'.
1510
b8698a0f
L
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515
b8698a0f
L
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525
1526tree
1527vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528{
355fe088 1529 gimple *vec_stmt_for_operand;
ebfd146a
IR
1530 stmt_vec_info def_stmt_info;
1531
1532 /* Do nothing; can reuse same def. */
8644a673 1533 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1534 return vec_oprnd;
1535
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546}
1547
1548
1549/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1551
c78e3652 1552void
b8698a0f 1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
ebfd146a 1556{
9771b263 1557 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1560 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1561
9771b263 1562 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1563 {
9771b263 1564 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1566 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1567 }
1568}
1569
1570
c78e3652 1571/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1572
c78e3652 1573void
355fe088 1574vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1575 vec<tree> *vec_oprnds0,
1576 vec<tree> *vec_oprnds1,
306b0c92 1577 slp_tree slp_node)
ebfd146a
IR
1578{
1579 if (slp_node)
d092494c
IR
1580 {
1581 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1582 auto_vec<tree> ops (nops);
1583 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1584
9771b263 1585 ops.quick_push (op0);
d092494c 1586 if (op1)
9771b263 1587 ops.quick_push (op1);
d092494c 1588
306b0c92 1589 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1590
37b5ec8f 1591 *vec_oprnds0 = vec_defs[0];
d092494c 1592 if (op1)
37b5ec8f 1593 *vec_oprnds1 = vec_defs[1];
d092494c 1594 }
ebfd146a
IR
1595 else
1596 {
1597 tree vec_oprnd;
1598
9771b263 1599 vec_oprnds0->create (1);
81c40241 1600 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1601 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1602
1603 if (op1)
1604 {
9771b263 1605 vec_oprnds1->create (1);
81c40241 1606 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1607 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1608 }
1609 }
1610}
1611
1612
1613/* Function vect_finish_stmt_generation.
1614
1615 Insert a new stmt. */
1616
1617void
355fe088 1618vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1619 gimple_stmt_iterator *gsi)
1620{
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1622 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1623
1624 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1625
54e8e2c3
RG
1626 if (!gsi_end_p (*gsi)
1627 && gimple_has_mem_ops (vec_stmt))
1628 {
355fe088 1629 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1630 tree vuse = gimple_vuse (at_stmt);
1631 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1632 {
1633 tree vdef = gimple_vdef (at_stmt);
1634 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1635 /* If we have an SSA vuse and insert a store, update virtual
1636 SSA form to avoid triggering the renamer. Do so only
1637 if we can easily see all uses - which is what almost always
1638 happens with the way vectorized stmts are inserted. */
1639 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1640 && ((is_gimple_assign (vec_stmt)
1641 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1642 || (is_gimple_call (vec_stmt)
1643 && !(gimple_call_flags (vec_stmt)
1644 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1645 {
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1649 }
1650 }
1651 }
ebfd146a
IR
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653
310213d4 1654 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1655
73fbfcad 1656 if (dump_enabled_p ())
ebfd146a 1657 {
78c60e3d
SS
1658 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1660 }
1661
ad885386 1662 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1663
1664 /* While EH edges will generally prevent vectorization, stmt might
1665 e.g. be in a must-not-throw region. Ensure newly created stmts
1666 that could throw are part of the same region. */
1667 int lp_nr = lookup_stmt_eh_lp (stmt);
1668 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1669 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1670}
1671
70439f0d
RS
1672/* We want to vectorize a call to combined function CFN with function
1673 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1674 as the types of all inputs. Check whether this is possible using
1675 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1676
70439f0d
RS
1677static internal_fn
1678vectorizable_internal_function (combined_fn cfn, tree fndecl,
1679 tree vectype_out, tree vectype_in)
ebfd146a 1680{
70439f0d
RS
1681 internal_fn ifn;
1682 if (internal_fn_p (cfn))
1683 ifn = as_internal_fn (cfn);
1684 else
1685 ifn = associated_internal_fn (fndecl);
1686 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1687 {
1688 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1689 if (info.vectorizable)
1690 {
1691 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1692 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1693 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1694 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1695 return ifn;
1696 }
1697 }
1698 return IFN_LAST;
ebfd146a
IR
1699}
1700
5ce9450f 1701
355fe088 1702static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1703 gimple_stmt_iterator *);
1704
62da9e14
RS
1705/* STMT is a non-strided load or store, meaning that it accesses
1706 elements with a known constant step. Return -1 if that step
1707 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1708
1709static int
1710compare_step_with_zero (gimple *stmt)
1711{
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1713 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1714 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1715 size_zero_node);
62da9e14
RS
1716}
1717
1718/* If the target supports a permute mask that reverses the elements in
1719 a vector of type VECTYPE, return that mask, otherwise return null. */
1720
1721static tree
1722perm_mask_for_reverse (tree vectype)
1723{
928686b1 1724 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1725
d980067b
RS
1726 /* The encoding has a single stepped pattern. */
1727 vec_perm_builder sel (nunits, 1, 3);
928686b1 1728 for (int i = 0; i < 3; ++i)
908a1a16 1729 sel.quick_push (nunits - 1 - i);
62da9e14 1730
e3342de4
RS
1731 vec_perm_indices indices (sel, 1, nunits);
1732 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1733 return NULL_TREE;
e3342de4 1734 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1735}
5ce9450f 1736
2de001ee
RS
1737/* A subroutine of get_load_store_type, with a subset of the same
1738 arguments. Handle the case where STMT is part of a grouped load
1739 or store.
1740
1741 For stores, the statements in the group are all consecutive
1742 and there is no gap at the end. For loads, the statements in the
1743 group might not be consecutive; there can be gaps between statements
1744 as well as at the end. */
1745
1746static bool
1747get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1748 vec_load_store_type vls_type,
1749 vect_memory_access_type *memory_access_type)
1750{
1751 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1752 vec_info *vinfo = stmt_info->vinfo;
1753 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1754 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1755 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1756 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1757 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1758 bool single_element_p = (stmt == first_stmt
1759 && !GROUP_NEXT_ELEMENT (stmt_info));
1760 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 1761 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1762
1763 /* True if the vectorized statements would access beyond the last
1764 statement in the group. */
1765 bool overrun_p = false;
1766
1767 /* True if we can cope with such overrun by peeling for gaps, so that
1768 there is at least one final scalar iteration after the vector loop. */
1769 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1770
1771 /* There can only be a gap at the end of the group if the stride is
1772 known at compile time. */
1773 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1774
1775 /* Stores can't yet have gaps. */
1776 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1777
1778 if (slp)
1779 {
1780 if (STMT_VINFO_STRIDED_P (stmt_info))
1781 {
1782 /* Try to use consecutive accesses of GROUP_SIZE elements,
1783 separated by the stride, until we have a complete vector.
1784 Fall back to scalar accesses if that isn't possible. */
928686b1 1785 if (multiple_p (nunits, group_size))
2de001ee
RS
1786 *memory_access_type = VMAT_STRIDED_SLP;
1787 else
1788 *memory_access_type = VMAT_ELEMENTWISE;
1789 }
1790 else
1791 {
1792 overrun_p = loop_vinfo && gap != 0;
1793 if (overrun_p && vls_type != VLS_LOAD)
1794 {
1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 "Grouped store with gaps requires"
1797 " non-consecutive accesses\n");
1798 return false;
1799 }
f702e7d4
RS
1800 /* An overrun is fine if the trailing elements are smaller
1801 than the alignment boundary B. Every vector access will
1802 be a multiple of B and so we are guaranteed to access a
1803 non-gap element in the same B-sized block. */
f9ef2c76 1804 if (overrun_p
f702e7d4
RS
1805 && gap < (vect_known_alignment_in_bytes (first_dr)
1806 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1807 overrun_p = false;
2de001ee
RS
1808 if (overrun_p && !can_overrun_p)
1809 {
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "Peeling for outer loop is not supported\n");
1813 return false;
1814 }
1815 *memory_access_type = VMAT_CONTIGUOUS;
1816 }
1817 }
1818 else
1819 {
1820 /* We can always handle this case using elementwise accesses,
1821 but see if something more efficient is available. */
1822 *memory_access_type = VMAT_ELEMENTWISE;
1823
1824 /* If there is a gap at the end of the group then these optimizations
1825 would access excess elements in the last iteration. */
1826 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1827 /* An overrun is fine if the trailing elements are smaller than the
1828 alignment boundary B. Every vector access will be a multiple of B
1829 and so we are guaranteed to access a non-gap element in the
1830 same B-sized block. */
f9ef2c76 1831 if (would_overrun_p
f702e7d4
RS
1832 && gap < (vect_known_alignment_in_bytes (first_dr)
1833 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1834 would_overrun_p = false;
f702e7d4 1835
2de001ee 1836 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1837 && (can_overrun_p || !would_overrun_p)
1838 && compare_step_with_zero (stmt) > 0)
2de001ee
RS
1839 {
1840 /* First try using LOAD/STORE_LANES. */
1841 if (vls_type == VLS_LOAD
1842 ? vect_load_lanes_supported (vectype, group_size)
1843 : vect_store_lanes_supported (vectype, group_size))
1844 {
1845 *memory_access_type = VMAT_LOAD_STORE_LANES;
1846 overrun_p = would_overrun_p;
1847 }
1848
1849 /* If that fails, try using permuting loads. */
1850 if (*memory_access_type == VMAT_ELEMENTWISE
1851 && (vls_type == VLS_LOAD
1852 ? vect_grouped_load_supported (vectype, single_element_p,
1853 group_size)
1854 : vect_grouped_store_supported (vectype, group_size)))
1855 {
1856 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1857 overrun_p = would_overrun_p;
1858 }
1859 }
1860 }
1861
1862 if (vls_type != VLS_LOAD && first_stmt == stmt)
1863 {
1864 /* STMT is the leader of the group. Check the operands of all the
1865 stmts of the group. */
1866 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1867 while (next_stmt)
1868 {
1869 gcc_assert (gimple_assign_single_p (next_stmt));
1870 tree op = gimple_assign_rhs1 (next_stmt);
1871 gimple *def_stmt;
1872 enum vect_def_type dt;
1873 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1874 {
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877 "use not simple.\n");
1878 return false;
1879 }
1880 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1881 }
1882 }
1883
1884 if (overrun_p)
1885 {
1886 gcc_assert (can_overrun_p);
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1889 "Data access with gaps requires scalar "
1890 "epilogue loop\n");
1891 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1892 }
1893
1894 return true;
1895}
1896
62da9e14
RS
1897/* A subroutine of get_load_store_type, with a subset of the same
1898 arguments. Handle the case where STMT is a load or store that
1899 accesses consecutive elements with a negative step. */
1900
1901static vect_memory_access_type
1902get_negative_load_store_type (gimple *stmt, tree vectype,
1903 vec_load_store_type vls_type,
1904 unsigned int ncopies)
1905{
1906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1907 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1908 dr_alignment_support alignment_support_scheme;
1909
1910 if (ncopies > 1)
1911 {
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1914 "multiple types with negative step.\n");
1915 return VMAT_ELEMENTWISE;
1916 }
1917
1918 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1919 if (alignment_support_scheme != dr_aligned
1920 && alignment_support_scheme != dr_unaligned_supported)
1921 {
1922 if (dump_enabled_p ())
1923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924 "negative step but alignment required.\n");
1925 return VMAT_ELEMENTWISE;
1926 }
1927
1928 if (vls_type == VLS_STORE_INVARIANT)
1929 {
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_NOTE, vect_location,
1932 "negative step with invariant source;"
1933 " no permute needed.\n");
1934 return VMAT_CONTIGUOUS_DOWN;
1935 }
1936
1937 if (!perm_mask_for_reverse (vectype))
1938 {
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 "negative step and reversing not supported.\n");
1942 return VMAT_ELEMENTWISE;
1943 }
1944
1945 return VMAT_CONTIGUOUS_REVERSE;
1946}
1947
2de001ee
RS
1948/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1949 if there is a memory access type that the vectorized form can use,
1950 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1951 or scatters, fill in GS_INFO accordingly.
1952
1953 SLP says whether we're performing SLP rather than loop vectorization.
62da9e14
RS
1954 VECTYPE is the vector type that the vectorized statements will use.
1955 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
1956
1957static bool
1958get_load_store_type (gimple *stmt, tree vectype, bool slp,
62da9e14 1959 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
1960 vect_memory_access_type *memory_access_type,
1961 gather_scatter_info *gs_info)
1962{
1963 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1964 vec_info *vinfo = stmt_info->vinfo;
1965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 1966 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1967 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1968 {
1969 *memory_access_type = VMAT_GATHER_SCATTER;
1970 gimple *def_stmt;
1971 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1972 gcc_unreachable ();
1973 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1974 &gs_info->offset_dt,
1975 &gs_info->offset_vectype))
1976 {
1977 if (dump_enabled_p ())
1978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1979 "%s index use not simple.\n",
1980 vls_type == VLS_LOAD ? "gather" : "scatter");
1981 return false;
1982 }
1983 }
1984 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1985 {
1986 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1987 memory_access_type))
1988 return false;
1989 }
1990 else if (STMT_VINFO_STRIDED_P (stmt_info))
1991 {
1992 gcc_assert (!slp);
1993 *memory_access_type = VMAT_ELEMENTWISE;
1994 }
1995 else
62da9e14
RS
1996 {
1997 int cmp = compare_step_with_zero (stmt);
1998 if (cmp < 0)
1999 *memory_access_type = get_negative_load_store_type
2000 (stmt, vectype, vls_type, ncopies);
2001 else if (cmp == 0)
2002 {
2003 gcc_assert (vls_type == VLS_LOAD);
2004 *memory_access_type = VMAT_INVARIANT;
2005 }
2006 else
2007 *memory_access_type = VMAT_CONTIGUOUS;
2008 }
2de001ee 2009
4d694b27
RS
2010 if ((*memory_access_type == VMAT_ELEMENTWISE
2011 || *memory_access_type == VMAT_STRIDED_SLP)
2012 && !nunits.is_constant ())
2013 {
2014 if (dump_enabled_p ())
2015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2016 "Not using elementwise accesses due to variable "
2017 "vectorization factor.\n");
2018 return false;
2019 }
2020
2de001ee
RS
2021 /* FIXME: At the moment the cost model seems to underestimate the
2022 cost of using elementwise accesses. This check preserves the
2023 traditional behavior until that can be fixed. */
2024 if (*memory_access_type == VMAT_ELEMENTWISE
2025 && !STMT_VINFO_STRIDED_P (stmt_info))
2026 {
2027 if (dump_enabled_p ())
2028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2029 "not falling back to elementwise accesses\n");
2030 return false;
2031 }
2032 return true;
2033}
2034
5ce9450f
JJ
2035/* Function vectorizable_mask_load_store.
2036
2037 Check if STMT performs a conditional load or store that can be vectorized.
2038 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2039 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2040 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2041
2042static bool
355fe088
TS
2043vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2044 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
2045{
2046 tree vec_dest = NULL;
2047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048 stmt_vec_info prev_stmt_info;
2049 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2050 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2051 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2052 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2053 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 2054 tree rhs_vectype = NULL_TREE;
045c1278 2055 tree mask_vectype;
5ce9450f 2056 tree elem_type;
355fe088 2057 gimple *new_stmt;
5ce9450f
JJ
2058 tree dummy;
2059 tree dataref_ptr = NULL_TREE;
355fe088 2060 gimple *ptr_incr;
4d694b27 2061 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5ce9450f
JJ
2062 int ncopies;
2063 int i, j;
2064 bool inv_p;
134c85ca 2065 gather_scatter_info gs_info;
2de001ee 2066 vec_load_store_type vls_type;
5ce9450f 2067 tree mask;
355fe088 2068 gimple *def_stmt;
5ce9450f
JJ
2069 enum vect_def_type dt;
2070
2071 if (slp_node != NULL)
2072 return false;
2073
e8f142e2 2074 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5ce9450f
JJ
2075 gcc_assert (ncopies >= 1);
2076
5ce9450f 2077 mask = gimple_call_arg (stmt, 2);
045c1278 2078
2568d8a1 2079 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
5ce9450f
JJ
2080 return false;
2081
2082 /* FORNOW. This restriction should be relaxed. */
2083 if (nested_in_vect_loop && ncopies > 1)
2084 {
2085 if (dump_enabled_p ())
2086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2087 "multiple types in nested loop.");
2088 return false;
2089 }
2090
2091 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2092 return false;
2093
66c16fd9
RB
2094 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2095 && ! vec_stmt)
5ce9450f
JJ
2096 return false;
2097
2098 if (!STMT_VINFO_DATA_REF (stmt_info))
2099 return false;
2100
2101 elem_type = TREE_TYPE (vectype);
2102
045c1278
IE
2103 if (TREE_CODE (mask) != SSA_NAME)
2104 return false;
2105
2106 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2107 return false;
2108
2109 if (!mask_vectype)
2110 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2111
dc6a3147 2112 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
928686b1
RS
2113 || maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2114 TYPE_VECTOR_SUBPARTS (vectype)))
045c1278
IE
2115 return false;
2116
2de001ee 2117 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
57e2f6ad
IE
2118 {
2119 tree rhs = gimple_call_arg (stmt, 3);
2120 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2121 return false;
2de001ee
RS
2122 if (dt == vect_constant_def || dt == vect_external_def)
2123 vls_type = VLS_STORE_INVARIANT;
2124 else
2125 vls_type = VLS_STORE;
57e2f6ad 2126 }
2de001ee
RS
2127 else
2128 vls_type = VLS_LOAD;
57e2f6ad 2129
2de001ee 2130 vect_memory_access_type memory_access_type;
62da9e14 2131 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2de001ee
RS
2132 &memory_access_type, &gs_info))
2133 return false;
03b9e8e4 2134
2de001ee
RS
2135 if (memory_access_type == VMAT_GATHER_SCATTER)
2136 {
134c85ca 2137 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
03b9e8e4
JJ
2138 tree masktype
2139 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2140 if (TREE_CODE (masktype) == INTEGER_TYPE)
2141 {
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2144 "masked gather with integer mask not supported.");
2145 return false;
2146 }
5ce9450f 2147 }
2de001ee
RS
2148 else if (memory_access_type != VMAT_CONTIGUOUS)
2149 {
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2152 "unsupported access type for masked %s.\n",
2153 vls_type == VLS_LOAD ? "load" : "store");
2154 return false;
2155 }
5ce9450f 2156 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
2157 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2158 TYPE_MODE (mask_vectype),
2de001ee 2159 vls_type == VLS_LOAD)
57e2f6ad
IE
2160 || (rhs_vectype
2161 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
2162 return false;
2163
5ce9450f
JJ
2164 if (!vec_stmt) /* transformation not required. */
2165 {
2de001ee 2166 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5ce9450f 2167 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2de001ee
RS
2168 if (vls_type == VLS_LOAD)
2169 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2170 NULL, NULL, NULL);
5ce9450f 2171 else
2de001ee
RS
2172 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2173 dt, NULL, NULL, NULL);
5ce9450f
JJ
2174 return true;
2175 }
2de001ee 2176 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5ce9450f 2177
67b8dbac 2178 /* Transform. */
5ce9450f 2179
2de001ee 2180 if (memory_access_type == VMAT_GATHER_SCATTER)
5ce9450f
JJ
2181 {
2182 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 2183 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5ce9450f 2184 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 2185 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 2186 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 2187 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
2188 edge pe = loop_preheader_edge (loop);
2189 gimple_seq seq;
2190 basic_block new_bb;
2191 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
2192 poly_uint64 gather_off_nunits
2193 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5ce9450f 2194
134c85ca 2195 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
acdcd61b
JJ
2196 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2197 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2198 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2199 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2200 scaletype = TREE_VALUE (arglist);
2201 gcc_checking_assert (types_compatible_p (srctype, rettype)
2202 && types_compatible_p (srctype, masktype));
2203
4d694b27 2204 if (known_eq (nunits, gather_off_nunits))
5ce9450f 2205 modifier = NONE;
4d694b27 2206 else if (known_eq (nunits * 2, gather_off_nunits))
5ce9450f 2207 {
5ce9450f
JJ
2208 modifier = WIDEN;
2209
4d694b27
RS
2210 /* Currently widening gathers and scatters are only supported for
2211 fixed-length vectors. */
2212 int count = gather_off_nunits.to_constant ();
2213 vec_perm_builder sel (count, count, 1);
2214 for (i = 0; i < count; ++i)
2215 sel.quick_push (i | (count / 2));
5ce9450f 2216
4d694b27 2217 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
2218 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2219 indices);
5ce9450f 2220 }
4d694b27 2221 else if (known_eq (nunits, gather_off_nunits * 2))
5ce9450f 2222 {
5ce9450f
JJ
2223 modifier = NARROW;
2224
4d694b27
RS
2225 /* Currently narrowing gathers and scatters are only supported for
2226 fixed-length vectors. */
2227 int count = nunits.to_constant ();
2228 vec_perm_builder sel (count, count, 1);
2229 sel.quick_grow (count);
2230 for (i = 0; i < count; ++i)
2231 sel[i] = i < count / 2 ? i : i + count / 2;
2232 vec_perm_indices indices (sel, 2, count);
e3342de4 2233 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5ce9450f 2234
5ce9450f 2235 ncopies *= 2;
4d694b27
RS
2236 for (i = 0; i < count; ++i)
2237 sel[i] = i | (count / 2);
2238 indices.new_vector (sel, 2, count);
e3342de4 2239 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
5ce9450f
JJ
2240 }
2241 else
2242 gcc_unreachable ();
2243
5ce9450f
JJ
2244 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2245
134c85ca 2246 ptr = fold_convert (ptrtype, gs_info.base);
5ce9450f
JJ
2247 if (!is_gimple_min_invariant (ptr))
2248 {
2249 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2250 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2251 gcc_assert (!new_bb);
2252 }
2253
134c85ca 2254 scale = build_int_cst (scaletype, gs_info.scale);
5ce9450f
JJ
2255
2256 prev_stmt_info = NULL;
2257 for (j = 0; j < ncopies; ++j)
2258 {
2259 if (modifier == WIDEN && (j & 1))
2260 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2261 perm_mask, stmt, gsi);
2262 else if (j == 0)
2263 op = vec_oprnd0
134c85ca 2264 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5ce9450f
JJ
2265 else
2266 op = vec_oprnd0
134c85ca 2267 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
5ce9450f
JJ
2268
2269 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2270 {
928686b1
RS
2271 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2272 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 2273 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
2274 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2275 new_stmt
0d0e4a03 2276 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2277 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2278 op = var;
2279 }
2280
acdcd61b
JJ
2281 if (mask_perm_mask && (j & 1))
2282 mask_op = permute_vec_elements (mask_op, mask_op,
2283 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
2284 else
2285 {
acdcd61b 2286 if (j == 0)
81c40241 2287 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
2288 else
2289 {
81c40241 2290 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
2291 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2292 }
5ce9450f 2293
acdcd61b
JJ
2294 mask_op = vec_mask;
2295 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2296 {
928686b1
RS
2297 gcc_assert
2298 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2299 TYPE_VECTOR_SUBPARTS (masktype)));
0e22bb5a 2300 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
2301 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2302 new_stmt
0d0e4a03 2303 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2304 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2305 mask_op = var;
2306 }
5ce9450f
JJ
2307 }
2308
2309 new_stmt
134c85ca 2310 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
5ce9450f
JJ
2311 scale);
2312
2313 if (!useless_type_conversion_p (vectype, rettype))
2314 {
928686b1
RS
2315 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2316 TYPE_VECTOR_SUBPARTS (rettype)));
0e22bb5a 2317 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
2318 gimple_call_set_lhs (new_stmt, op);
2319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2320 var = make_ssa_name (vec_dest);
5ce9450f 2321 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2322 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2323 }
2324 else
2325 {
2326 var = make_ssa_name (vec_dest, new_stmt);
2327 gimple_call_set_lhs (new_stmt, var);
2328 }
2329
2330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2331
2332 if (modifier == NARROW)
2333 {
2334 if ((j & 1) == 0)
2335 {
2336 prev_res = var;
2337 continue;
2338 }
2339 var = permute_vec_elements (prev_res, var,
2340 perm_mask, stmt, gsi);
2341 new_stmt = SSA_NAME_DEF_STMT (var);
2342 }
2343
2344 if (prev_stmt_info == NULL)
2345 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2346 else
2347 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2348 prev_stmt_info = vinfo_for_stmt (new_stmt);
2349 }
3efe2e2c
JJ
2350
2351 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2352 from the IL. */
e6f5c25d
IE
2353 if (STMT_VINFO_RELATED_STMT (stmt_info))
2354 {
2355 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2356 stmt_info = vinfo_for_stmt (stmt);
2357 }
3efe2e2c
JJ
2358 tree lhs = gimple_call_lhs (stmt);
2359 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2360 set_vinfo_for_stmt (new_stmt, stmt_info);
2361 set_vinfo_for_stmt (stmt, NULL);
2362 STMT_VINFO_STMT (stmt_info) = new_stmt;
2363 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2364 return true;
2365 }
2de001ee 2366 else if (vls_type != VLS_LOAD)
5ce9450f
JJ
2367 {
2368 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2369 prev_stmt_info = NULL;
2d4dc223 2370 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2371 for (i = 0; i < ncopies; i++)
2372 {
2373 unsigned align, misalign;
2374
2375 if (i == 0)
2376 {
2377 tree rhs = gimple_call_arg (stmt, 3);
81c40241 2378 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
7251b0bf
RS
2379 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2380 mask_vectype);
5ce9450f
JJ
2381 /* We should have catched mismatched types earlier. */
2382 gcc_assert (useless_type_conversion_p (vectype,
2383 TREE_TYPE (vec_rhs)));
2384 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2385 NULL_TREE, &dummy, gsi,
2386 &ptr_incr, false, &inv_p);
2387 gcc_assert (!inv_p);
2388 }
2389 else
2390 {
81c40241 2391 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2392 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2393 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2394 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2395 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2396 TYPE_SIZE_UNIT (vectype));
2397 }
2398
f702e7d4 2399 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2400 if (aligned_access_p (dr))
2401 misalign = 0;
2402 else if (DR_MISALIGNMENT (dr) == -1)
2403 {
2404 align = TYPE_ALIGN_UNIT (elem_type);
2405 misalign = 0;
2406 }
2407 else
2408 misalign = DR_MISALIGNMENT (dr);
2409 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2410 misalign);
08554c26 2411 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2412 misalign ? least_bit_hwi (misalign) : align);
a844293d 2413 gcall *call
5ce9450f 2414 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2415 ptr, vec_mask, vec_rhs);
a844293d
RS
2416 gimple_call_set_nothrow (call, true);
2417 new_stmt = call;
5ce9450f
JJ
2418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2419 if (i == 0)
2420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2421 else
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2423 prev_stmt_info = vinfo_for_stmt (new_stmt);
2424 }
2425 }
2426 else
2427 {
2428 tree vec_mask = NULL_TREE;
2429 prev_stmt_info = NULL;
2430 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2431 for (i = 0; i < ncopies; i++)
2432 {
2433 unsigned align, misalign;
2434
2435 if (i == 0)
2436 {
7251b0bf
RS
2437 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2438 mask_vectype);
5ce9450f
JJ
2439 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2440 NULL_TREE, &dummy, gsi,
2441 &ptr_incr, false, &inv_p);
2442 gcc_assert (!inv_p);
2443 }
2444 else
2445 {
81c40241 2446 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2447 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2448 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2449 TYPE_SIZE_UNIT (vectype));
2450 }
2451
f702e7d4 2452 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2453 if (aligned_access_p (dr))
2454 misalign = 0;
2455 else if (DR_MISALIGNMENT (dr) == -1)
2456 {
2457 align = TYPE_ALIGN_UNIT (elem_type);
2458 misalign = 0;
2459 }
2460 else
2461 misalign = DR_MISALIGNMENT (dr);
2462 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2463 misalign);
08554c26 2464 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2465 misalign ? least_bit_hwi (misalign) : align);
a844293d 2466 gcall *call
5ce9450f 2467 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2468 ptr, vec_mask);
a844293d
RS
2469 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2470 gimple_call_set_nothrow (call, true);
2471 vect_finish_stmt_generation (stmt, call, gsi);
5ce9450f 2472 if (i == 0)
a844293d 2473 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
5ce9450f 2474 else
a844293d
RS
2475 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2476 prev_stmt_info = vinfo_for_stmt (call);
5ce9450f
JJ
2477 }
2478 }
2479
2de001ee 2480 if (vls_type == VLS_LOAD)
3efe2e2c
JJ
2481 {
2482 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2483 from the IL. */
e6f5c25d
IE
2484 if (STMT_VINFO_RELATED_STMT (stmt_info))
2485 {
2486 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2487 stmt_info = vinfo_for_stmt (stmt);
2488 }
3efe2e2c
JJ
2489 tree lhs = gimple_call_lhs (stmt);
2490 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2491 set_vinfo_for_stmt (new_stmt, stmt_info);
2492 set_vinfo_for_stmt (stmt, NULL);
2493 STMT_VINFO_STMT (stmt_info) = new_stmt;
2494 gsi_replace (gsi, new_stmt, true);
2495 }
2496
5ce9450f
JJ
2497 return true;
2498}
2499
37b14185
RB
2500/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2501
2502static bool
2503vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2504 gimple **vec_stmt, slp_tree slp_node,
2505 tree vectype_in, enum vect_def_type *dt)
2506{
2507 tree op, vectype;
2508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2509 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2510 unsigned ncopies;
2511 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2512
2513 op = gimple_call_arg (stmt, 0);
2514 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2515
2516 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2517 return false;
37b14185
RB
2518
2519 /* Multiple types in SLP are handled by creating the appropriate number of
2520 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2521 case of SLP. */
2522 if (slp_node)
2523 ncopies = 1;
2524 else
e8f142e2 2525 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2526
2527 gcc_assert (ncopies >= 1);
2528
2529 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2530 if (! char_vectype)
2531 return false;
2532
928686b1
RS
2533 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2534 return false;
2535
794e3180 2536 unsigned word_bytes = num_bytes / nunits;
908a1a16 2537
d980067b
RS
2538 /* The encoding uses one stepped pattern for each byte in the word. */
2539 vec_perm_builder elts (num_bytes, word_bytes, 3);
2540 for (unsigned i = 0; i < 3; ++i)
37b14185 2541 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2542 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2543
e3342de4
RS
2544 vec_perm_indices indices (elts, 1, num_bytes);
2545 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2546 return false;
2547
2548 if (! vec_stmt)
2549 {
2550 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2553 "\n");
2554 if (! PURE_SLP_STMT (stmt_info))
2555 {
2556 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2557 1, vector_stmt, stmt_info, 0, vect_prologue);
2558 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2559 ncopies, vec_perm, stmt_info, 0, vect_body);
2560 }
2561 return true;
2562 }
2563
736d0f28 2564 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2565
2566 /* Transform. */
2567 vec<tree> vec_oprnds = vNULL;
2568 gimple *new_stmt = NULL;
2569 stmt_vec_info prev_stmt_info = NULL;
2570 for (unsigned j = 0; j < ncopies; j++)
2571 {
2572 /* Handle uses. */
2573 if (j == 0)
306b0c92 2574 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2575 else
2576 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2577
2578 /* Arguments are ready. create the new vector stmt. */
2579 unsigned i;
2580 tree vop;
2581 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2582 {
2583 tree tem = make_ssa_name (char_vectype);
2584 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2585 char_vectype, vop));
2586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2587 tree tem2 = make_ssa_name (char_vectype);
2588 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2589 tem, tem, bswap_vconst);
2590 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2591 tem = make_ssa_name (vectype);
2592 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2593 vectype, tem2));
2594 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2595 if (slp_node)
2596 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2597 }
2598
2599 if (slp_node)
2600 continue;
2601
2602 if (j == 0)
2603 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2604 else
2605 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2606
2607 prev_stmt_info = vinfo_for_stmt (new_stmt);
2608 }
2609
2610 vec_oprnds.release ();
2611 return true;
2612}
2613
b1b6836e
RS
2614/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2615 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2616 in a single step. On success, store the binary pack code in
2617 *CONVERT_CODE. */
2618
2619static bool
2620simple_integer_narrowing (tree vectype_out, tree vectype_in,
2621 tree_code *convert_code)
2622{
2623 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2624 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2625 return false;
2626
2627 tree_code code;
2628 int multi_step_cvt = 0;
2629 auto_vec <tree, 8> interm_types;
2630 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2631 &code, &multi_step_cvt,
2632 &interm_types)
2633 || multi_step_cvt)
2634 return false;
2635
2636 *convert_code = code;
2637 return true;
2638}
5ce9450f 2639
ebfd146a
IR
2640/* Function vectorizable_call.
2641
538dd0b7 2642 Check if GS performs a function call that can be vectorized.
b8698a0f 2643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2646
2647static bool
355fe088 2648vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2649 slp_tree slp_node)
ebfd146a 2650{
538dd0b7 2651 gcall *stmt;
ebfd146a
IR
2652 tree vec_dest;
2653 tree scalar_dest;
2654 tree op, type;
2655 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2656 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 2657 tree vectype_out, vectype_in;
c7bda0f4
RS
2658 poly_uint64 nunits_in;
2659 poly_uint64 nunits_out;
ebfd146a 2660 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2661 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2662 vec_info *vinfo = stmt_info->vinfo;
81c40241 2663 tree fndecl, new_temp, rhs_type;
355fe088 2664 gimple *def_stmt;
0502fb85
UB
2665 enum vect_def_type dt[3]
2666 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2667 int ndts = 3;
355fe088 2668 gimple *new_stmt = NULL;
ebfd146a 2669 int ncopies, j;
6e1aa848 2670 vec<tree> vargs = vNULL;
ebfd146a
IR
2671 enum { NARROW, NONE, WIDEN } modifier;
2672 size_t i, nargs;
9d5e7640 2673 tree lhs;
ebfd146a 2674
190c2236 2675 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2676 return false;
2677
66c16fd9
RB
2678 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2679 && ! vec_stmt)
ebfd146a
IR
2680 return false;
2681
538dd0b7
DM
2682 /* Is GS a vectorizable call? */
2683 stmt = dyn_cast <gcall *> (gs);
2684 if (!stmt)
ebfd146a
IR
2685 return false;
2686
5ce9450f
JJ
2687 if (gimple_call_internal_p (stmt)
2688 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2689 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2690 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2691 slp_node);
2692
0136f8f0
AH
2693 if (gimple_call_lhs (stmt) == NULL_TREE
2694 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2695 return false;
2696
0136f8f0 2697 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2698
b690cc0f
RG
2699 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2700
ebfd146a
IR
2701 /* Process function arguments. */
2702 rhs_type = NULL_TREE;
b690cc0f 2703 vectype_in = NULL_TREE;
ebfd146a
IR
2704 nargs = gimple_call_num_args (stmt);
2705
1b1562a5
MM
2706 /* Bail out if the function has more than three arguments, we do not have
2707 interesting builtin functions to vectorize with more than two arguments
2708 except for fma. No arguments is also not good. */
2709 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2710 return false;
2711
74bf76ed
JJ
2712 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2713 if (gimple_call_internal_p (stmt)
2714 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2715 {
2716 nargs = 0;
2717 rhs_type = unsigned_type_node;
2718 }
2719
ebfd146a
IR
2720 for (i = 0; i < nargs; i++)
2721 {
b690cc0f
RG
2722 tree opvectype;
2723
ebfd146a
IR
2724 op = gimple_call_arg (stmt, i);
2725
2726 /* We can only handle calls with arguments of the same type. */
2727 if (rhs_type
8533c9d8 2728 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2729 {
73fbfcad 2730 if (dump_enabled_p ())
78c60e3d 2731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2732 "argument types differ.\n");
ebfd146a
IR
2733 return false;
2734 }
b690cc0f
RG
2735 if (!rhs_type)
2736 rhs_type = TREE_TYPE (op);
ebfd146a 2737
81c40241 2738 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2739 {
73fbfcad 2740 if (dump_enabled_p ())
78c60e3d 2741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2742 "use not simple.\n");
ebfd146a
IR
2743 return false;
2744 }
ebfd146a 2745
b690cc0f
RG
2746 if (!vectype_in)
2747 vectype_in = opvectype;
2748 else if (opvectype
2749 && opvectype != vectype_in)
2750 {
73fbfcad 2751 if (dump_enabled_p ())
78c60e3d 2752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2753 "argument vector types differ.\n");
b690cc0f
RG
2754 return false;
2755 }
2756 }
2757 /* If all arguments are external or constant defs use a vector type with
2758 the same size as the output vector type. */
ebfd146a 2759 if (!vectype_in)
b690cc0f 2760 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2761 if (vec_stmt)
2762 gcc_assert (vectype_in);
2763 if (!vectype_in)
2764 {
73fbfcad 2765 if (dump_enabled_p ())
7d8930a0 2766 {
78c60e3d
SS
2767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2768 "no vectype for scalar type ");
2769 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2770 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2771 }
2772
2773 return false;
2774 }
ebfd146a
IR
2775
2776 /* FORNOW */
b690cc0f
RG
2777 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2778 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 2779 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 2780 modifier = NARROW;
c7bda0f4 2781 else if (known_eq (nunits_out, nunits_in))
ebfd146a 2782 modifier = NONE;
c7bda0f4 2783 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
2784 modifier = WIDEN;
2785 else
2786 return false;
2787
70439f0d
RS
2788 /* We only handle functions that do not read or clobber memory. */
2789 if (gimple_vuse (stmt))
2790 {
2791 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2793 "function reads from or writes to memory.\n");
2794 return false;
2795 }
2796
ebfd146a
IR
2797 /* For now, we only vectorize functions if a target specific builtin
2798 is available. TODO -- in some cases, it might be profitable to
2799 insert the calls for pieces of the vector, in order to be able
2800 to vectorize other operations in the loop. */
70439f0d
RS
2801 fndecl = NULL_TREE;
2802 internal_fn ifn = IFN_LAST;
2803 combined_fn cfn = gimple_call_combined_fn (stmt);
2804 tree callee = gimple_call_fndecl (stmt);
2805
2806 /* First try using an internal function. */
b1b6836e
RS
2807 tree_code convert_code = ERROR_MARK;
2808 if (cfn != CFN_LAST
2809 && (modifier == NONE
2810 || (modifier == NARROW
2811 && simple_integer_narrowing (vectype_out, vectype_in,
2812 &convert_code))))
70439f0d
RS
2813 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2814 vectype_in);
2815
2816 /* If that fails, try asking for a target-specific built-in function. */
2817 if (ifn == IFN_LAST)
2818 {
2819 if (cfn != CFN_LAST)
2820 fndecl = targetm.vectorize.builtin_vectorized_function
2821 (cfn, vectype_out, vectype_in);
2822 else
2823 fndecl = targetm.vectorize.builtin_md_vectorized_function
2824 (callee, vectype_out, vectype_in);
2825 }
2826
2827 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2828 {
70439f0d 2829 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2830 && !slp_node
2831 && loop_vinfo
2832 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2833 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2834 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2835 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2836 {
2837 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2838 { 0, 1, 2, ... vf - 1 } vector. */
2839 gcc_assert (nargs == 0);
2840 }
37b14185
RB
2841 else if (modifier == NONE
2842 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2843 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2844 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2845 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2846 vectype_in, dt);
74bf76ed
JJ
2847 else
2848 {
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2851 "function is not vectorizable.\n");
74bf76ed
JJ
2852 return false;
2853 }
ebfd146a
IR
2854 }
2855
fce57248 2856 if (slp_node)
190c2236 2857 ncopies = 1;
b1b6836e 2858 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2859 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2860 else
e8f142e2 2861 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2862
2863 /* Sanity check: make sure that at least one copy of the vectorized stmt
2864 needs to be generated. */
2865 gcc_assert (ncopies >= 1);
2866
2867 if (!vec_stmt) /* transformation not required. */
2868 {
2869 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2870 if (dump_enabled_p ())
e645e942
TJ
2871 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2872 "\n");
4fc5ebf1 2873 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2874 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2875 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2876 vec_promote_demote, stmt_info, 0, vect_body);
2877
ebfd146a
IR
2878 return true;
2879 }
2880
67b8dbac 2881 /* Transform. */
ebfd146a 2882
73fbfcad 2883 if (dump_enabled_p ())
e645e942 2884 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2885
2886 /* Handle def. */
2887 scalar_dest = gimple_call_lhs (stmt);
2888 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2889
2890 prev_stmt_info = NULL;
b1b6836e 2891 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2892 {
b1b6836e 2893 tree prev_res = NULL_TREE;
ebfd146a
IR
2894 for (j = 0; j < ncopies; ++j)
2895 {
2896 /* Build argument list for the vectorized call. */
2897 if (j == 0)
9771b263 2898 vargs.create (nargs);
ebfd146a 2899 else
9771b263 2900 vargs.truncate (0);
ebfd146a 2901
190c2236
JJ
2902 if (slp_node)
2903 {
ef062b13 2904 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2905 vec<tree> vec_oprnds0;
190c2236
JJ
2906
2907 for (i = 0; i < nargs; i++)
9771b263 2908 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2909 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2910 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2911
2912 /* Arguments are ready. Create the new vector stmt. */
9771b263 2913 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2914 {
2915 size_t k;
2916 for (k = 0; k < nargs; k++)
2917 {
37b5ec8f 2918 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2919 vargs[k] = vec_oprndsk[i];
190c2236 2920 }
b1b6836e
RS
2921 if (modifier == NARROW)
2922 {
2923 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2924 gcall *call
2925 = gimple_build_call_internal_vec (ifn, vargs);
2926 gimple_call_set_lhs (call, half_res);
2927 gimple_call_set_nothrow (call, true);
2928 new_stmt = call;
b1b6836e
RS
2929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2930 if ((i & 1) == 0)
2931 {
2932 prev_res = half_res;
2933 continue;
2934 }
2935 new_temp = make_ssa_name (vec_dest);
2936 new_stmt = gimple_build_assign (new_temp, convert_code,
2937 prev_res, half_res);
2938 }
70439f0d 2939 else
b1b6836e 2940 {
a844293d 2941 gcall *call;
b1b6836e 2942 if (ifn != IFN_LAST)
a844293d 2943 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2944 else
a844293d
RS
2945 call = gimple_build_call_vec (fndecl, vargs);
2946 new_temp = make_ssa_name (vec_dest, call);
2947 gimple_call_set_lhs (call, new_temp);
2948 gimple_call_set_nothrow (call, true);
2949 new_stmt = call;
b1b6836e 2950 }
190c2236 2951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2952 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2953 }
2954
2955 for (i = 0; i < nargs; i++)
2956 {
37b5ec8f 2957 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2958 vec_oprndsi.release ();
190c2236 2959 }
190c2236
JJ
2960 continue;
2961 }
2962
ebfd146a
IR
2963 for (i = 0; i < nargs; i++)
2964 {
2965 op = gimple_call_arg (stmt, i);
2966 if (j == 0)
2967 vec_oprnd0
81c40241 2968 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2969 else
63827fb8
IR
2970 {
2971 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2972 vec_oprnd0
2973 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2974 }
ebfd146a 2975
9771b263 2976 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2977 }
2978
74bf76ed
JJ
2979 if (gimple_call_internal_p (stmt)
2980 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2981 {
c7bda0f4 2982 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 2983 tree new_var
0e22bb5a 2984 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2985 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2986 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2987 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2988 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2989 }
b1b6836e
RS
2990 else if (modifier == NARROW)
2991 {
2992 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2993 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2994 gimple_call_set_lhs (call, half_res);
2995 gimple_call_set_nothrow (call, true);
2996 new_stmt = call;
b1b6836e
RS
2997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2998 if ((j & 1) == 0)
2999 {
3000 prev_res = half_res;
3001 continue;
3002 }
3003 new_temp = make_ssa_name (vec_dest);
3004 new_stmt = gimple_build_assign (new_temp, convert_code,
3005 prev_res, half_res);
3006 }
74bf76ed
JJ
3007 else
3008 {
a844293d 3009 gcall *call;
70439f0d 3010 if (ifn != IFN_LAST)
a844293d 3011 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3012 else
a844293d 3013 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3014 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3015 gimple_call_set_lhs (call, new_temp);
3016 gimple_call_set_nothrow (call, true);
3017 new_stmt = call;
74bf76ed 3018 }
ebfd146a
IR
3019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3020
b1b6836e 3021 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3022 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3023 else
3024 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3025
3026 prev_stmt_info = vinfo_for_stmt (new_stmt);
3027 }
b1b6836e
RS
3028 }
3029 else if (modifier == NARROW)
3030 {
ebfd146a
IR
3031 for (j = 0; j < ncopies; ++j)
3032 {
3033 /* Build argument list for the vectorized call. */
3034 if (j == 0)
9771b263 3035 vargs.create (nargs * 2);
ebfd146a 3036 else
9771b263 3037 vargs.truncate (0);
ebfd146a 3038
190c2236
JJ
3039 if (slp_node)
3040 {
ef062b13 3041 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3042 vec<tree> vec_oprnds0;
190c2236
JJ
3043
3044 for (i = 0; i < nargs; i++)
9771b263 3045 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3046 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3047 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3048
3049 /* Arguments are ready. Create the new vector stmt. */
9771b263 3050 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3051 {
3052 size_t k;
9771b263 3053 vargs.truncate (0);
190c2236
JJ
3054 for (k = 0; k < nargs; k++)
3055 {
37b5ec8f 3056 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3057 vargs.quick_push (vec_oprndsk[i]);
3058 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3059 }
a844293d 3060 gcall *call;
70439f0d 3061 if (ifn != IFN_LAST)
a844293d 3062 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3063 else
a844293d
RS
3064 call = gimple_build_call_vec (fndecl, vargs);
3065 new_temp = make_ssa_name (vec_dest, call);
3066 gimple_call_set_lhs (call, new_temp);
3067 gimple_call_set_nothrow (call, true);
3068 new_stmt = call;
190c2236 3069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3070 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3071 }
3072
3073 for (i = 0; i < nargs; i++)
3074 {
37b5ec8f 3075 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3076 vec_oprndsi.release ();
190c2236 3077 }
190c2236
JJ
3078 continue;
3079 }
3080
ebfd146a
IR
3081 for (i = 0; i < nargs; i++)
3082 {
3083 op = gimple_call_arg (stmt, i);
3084 if (j == 0)
3085 {
3086 vec_oprnd0
81c40241 3087 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3088 vec_oprnd1
63827fb8 3089 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3090 }
3091 else
3092 {
336ecb65 3093 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3094 vec_oprnd0
63827fb8 3095 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3096 vec_oprnd1
63827fb8 3097 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3098 }
3099
9771b263
DN
3100 vargs.quick_push (vec_oprnd0);
3101 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3102 }
3103
b1b6836e 3104 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3105 new_temp = make_ssa_name (vec_dest, new_stmt);
3106 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3107 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3108
3109 if (j == 0)
3110 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3111 else
3112 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3113
3114 prev_stmt_info = vinfo_for_stmt (new_stmt);
3115 }
3116
3117 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3118 }
b1b6836e
RS
3119 else
3120 /* No current target implements this case. */
3121 return false;
ebfd146a 3122
9771b263 3123 vargs.release ();
ebfd146a 3124
ebfd146a
IR
3125 /* The call in STMT might prevent it from being removed in dce.
3126 We however cannot remove it here, due to the way the ssa name
3127 it defines is mapped to the new definition. So just replace
3128 rhs of the statement with something harmless. */
3129
dd34c087
JJ
3130 if (slp_node)
3131 return true;
3132
ebfd146a 3133 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3134 if (is_pattern_stmt_p (stmt_info))
3135 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3136 else
3137 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3138
9d5e7640 3139 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3140 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3141 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3142 STMT_VINFO_STMT (stmt_info) = new_stmt;
3143 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3144
3145 return true;
3146}
3147
3148
0136f8f0
AH
3149struct simd_call_arg_info
3150{
3151 tree vectype;
3152 tree op;
0136f8f0 3153 HOST_WIDE_INT linear_step;
34e82342 3154 enum vect_def_type dt;
0136f8f0 3155 unsigned int align;
17b658af 3156 bool simd_lane_linear;
0136f8f0
AH
3157};
3158
17b658af
JJ
3159/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3160 is linear within simd lane (but not within whole loop), note it in
3161 *ARGINFO. */
3162
3163static void
3164vect_simd_lane_linear (tree op, struct loop *loop,
3165 struct simd_call_arg_info *arginfo)
3166{
355fe088 3167 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3168
3169 if (!is_gimple_assign (def_stmt)
3170 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3171 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3172 return;
3173
3174 tree base = gimple_assign_rhs1 (def_stmt);
3175 HOST_WIDE_INT linear_step = 0;
3176 tree v = gimple_assign_rhs2 (def_stmt);
3177 while (TREE_CODE (v) == SSA_NAME)
3178 {
3179 tree t;
3180 def_stmt = SSA_NAME_DEF_STMT (v);
3181 if (is_gimple_assign (def_stmt))
3182 switch (gimple_assign_rhs_code (def_stmt))
3183 {
3184 case PLUS_EXPR:
3185 t = gimple_assign_rhs2 (def_stmt);
3186 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3187 return;
3188 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3189 v = gimple_assign_rhs1 (def_stmt);
3190 continue;
3191 case MULT_EXPR:
3192 t = gimple_assign_rhs2 (def_stmt);
3193 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3194 return;
3195 linear_step = tree_to_shwi (t);
3196 v = gimple_assign_rhs1 (def_stmt);
3197 continue;
3198 CASE_CONVERT:
3199 t = gimple_assign_rhs1 (def_stmt);
3200 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3201 || (TYPE_PRECISION (TREE_TYPE (v))
3202 < TYPE_PRECISION (TREE_TYPE (t))))
3203 return;
3204 if (!linear_step)
3205 linear_step = 1;
3206 v = t;
3207 continue;
3208 default:
3209 return;
3210 }
8e4284d0 3211 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3212 && loop->simduid
3213 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3214 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3215 == loop->simduid))
3216 {
3217 if (!linear_step)
3218 linear_step = 1;
3219 arginfo->linear_step = linear_step;
3220 arginfo->op = base;
3221 arginfo->simd_lane_linear = true;
3222 return;
3223 }
3224 }
3225}
3226
cf1b2ba4
RS
3227/* Return the number of elements in vector type VECTYPE, which is associated
3228 with a SIMD clone. At present these vectors always have a constant
3229 length. */
3230
3231static unsigned HOST_WIDE_INT
3232simd_clone_subparts (tree vectype)
3233{
928686b1 3234 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3235}
3236
0136f8f0
AH
3237/* Function vectorizable_simd_clone_call.
3238
3239 Check if STMT performs a function call that can be vectorized
3240 by calling a simd clone of the function.
3241 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3242 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3243 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3244
3245static bool
355fe088
TS
3246vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3247 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3248{
3249 tree vec_dest;
3250 tree scalar_dest;
3251 tree op, type;
3252 tree vec_oprnd0 = NULL_TREE;
3253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3254 tree vectype;
3255 unsigned int nunits;
3256 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3257 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3258 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3259 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3260 tree fndecl, new_temp;
355fe088
TS
3261 gimple *def_stmt;
3262 gimple *new_stmt = NULL;
0136f8f0 3263 int ncopies, j;
00426f9a 3264 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3265 vec<tree> vargs = vNULL;
3266 size_t i, nargs;
3267 tree lhs, rtype, ratype;
e7a74006 3268 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3269
3270 /* Is STMT a vectorizable call? */
3271 if (!is_gimple_call (stmt))
3272 return false;
3273
3274 fndecl = gimple_call_fndecl (stmt);
3275 if (fndecl == NULL_TREE)
3276 return false;
3277
d52f5295 3278 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3279 if (node == NULL || node->simd_clones == NULL)
3280 return false;
3281
3282 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3283 return false;
3284
66c16fd9
RB
3285 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3286 && ! vec_stmt)
0136f8f0
AH
3287 return false;
3288
3289 if (gimple_call_lhs (stmt)
3290 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3291 return false;
3292
3293 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3294
3295 vectype = STMT_VINFO_VECTYPE (stmt_info);
3296
3297 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3298 return false;
3299
3300 /* FORNOW */
fce57248 3301 if (slp_node)
0136f8f0
AH
3302 return false;
3303
3304 /* Process function arguments. */
3305 nargs = gimple_call_num_args (stmt);
3306
3307 /* Bail out if the function has zero arguments. */
3308 if (nargs == 0)
3309 return false;
3310
00426f9a 3311 arginfo.reserve (nargs, true);
0136f8f0
AH
3312
3313 for (i = 0; i < nargs; i++)
3314 {
3315 simd_call_arg_info thisarginfo;
3316 affine_iv iv;
3317
3318 thisarginfo.linear_step = 0;
3319 thisarginfo.align = 0;
3320 thisarginfo.op = NULL_TREE;
17b658af 3321 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3322
3323 op = gimple_call_arg (stmt, i);
81c40241
RB
3324 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3325 &thisarginfo.vectype)
0136f8f0
AH
3326 || thisarginfo.dt == vect_uninitialized_def)
3327 {
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "use not simple.\n");
0136f8f0
AH
3331 return false;
3332 }
3333
3334 if (thisarginfo.dt == vect_constant_def
3335 || thisarginfo.dt == vect_external_def)
3336 gcc_assert (thisarginfo.vectype == NULL_TREE);
3337 else
3338 gcc_assert (thisarginfo.vectype != NULL_TREE);
3339
6c9e85fb
JJ
3340 /* For linear arguments, the analyze phase should have saved
3341 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3342 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3343 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3344 {
3345 gcc_assert (vec_stmt);
3346 thisarginfo.linear_step
17b658af 3347 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3348 thisarginfo.op
17b658af
JJ
3349 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3350 thisarginfo.simd_lane_linear
3351 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3352 == boolean_true_node);
6c9e85fb
JJ
3353 /* If loop has been peeled for alignment, we need to adjust it. */
3354 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3355 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3356 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3357 {
3358 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3359 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3360 tree opt = TREE_TYPE (thisarginfo.op);
3361 bias = fold_convert (TREE_TYPE (step), bias);
3362 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3363 thisarginfo.op
3364 = fold_build2 (POINTER_TYPE_P (opt)
3365 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3366 thisarginfo.op, bias);
3367 }
3368 }
3369 else if (!vec_stmt
3370 && thisarginfo.dt != vect_constant_def
3371 && thisarginfo.dt != vect_external_def
3372 && loop_vinfo
3373 && TREE_CODE (op) == SSA_NAME
3374 && simple_iv (loop, loop_containing_stmt (stmt), op,
3375 &iv, false)
3376 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3377 {
3378 thisarginfo.linear_step = tree_to_shwi (iv.step);
3379 thisarginfo.op = iv.base;
3380 }
3381 else if ((thisarginfo.dt == vect_constant_def
3382 || thisarginfo.dt == vect_external_def)
3383 && POINTER_TYPE_P (TREE_TYPE (op)))
3384 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3385 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3386 linear too. */
3387 if (POINTER_TYPE_P (TREE_TYPE (op))
3388 && !thisarginfo.linear_step
3389 && !vec_stmt
3390 && thisarginfo.dt != vect_constant_def
3391 && thisarginfo.dt != vect_external_def
3392 && loop_vinfo
3393 && !slp_node
3394 && TREE_CODE (op) == SSA_NAME)
3395 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3396
3397 arginfo.quick_push (thisarginfo);
3398 }
3399
d9f21f6a
RS
3400 unsigned HOST_WIDE_INT vf;
3401 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3402 {
3403 if (dump_enabled_p ())
3404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3405 "not considering SIMD clones; not yet supported"
3406 " for variable-width vectors.\n");
3407 return NULL;
3408 }
3409
0136f8f0
AH
3410 unsigned int badness = 0;
3411 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3412 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3413 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3414 else
3415 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3416 n = n->simdclone->next_clone)
3417 {
3418 unsigned int this_badness = 0;
d9f21f6a 3419 if (n->simdclone->simdlen > vf
0136f8f0
AH
3420 || n->simdclone->nargs != nargs)
3421 continue;
d9f21f6a
RS
3422 if (n->simdclone->simdlen < vf)
3423 this_badness += (exact_log2 (vf)
0136f8f0
AH
3424 - exact_log2 (n->simdclone->simdlen)) * 1024;
3425 if (n->simdclone->inbranch)
3426 this_badness += 2048;
3427 int target_badness = targetm.simd_clone.usable (n);
3428 if (target_badness < 0)
3429 continue;
3430 this_badness += target_badness * 512;
3431 /* FORNOW: Have to add code to add the mask argument. */
3432 if (n->simdclone->inbranch)
3433 continue;
3434 for (i = 0; i < nargs; i++)
3435 {
3436 switch (n->simdclone->args[i].arg_type)
3437 {
3438 case SIMD_CLONE_ARG_TYPE_VECTOR:
3439 if (!useless_type_conversion_p
3440 (n->simdclone->args[i].orig_type,
3441 TREE_TYPE (gimple_call_arg (stmt, i))))
3442 i = -1;
3443 else if (arginfo[i].dt == vect_constant_def
3444 || arginfo[i].dt == vect_external_def
3445 || arginfo[i].linear_step)
3446 this_badness += 64;
3447 break;
3448 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3449 if (arginfo[i].dt != vect_constant_def
3450 && arginfo[i].dt != vect_external_def)
3451 i = -1;
3452 break;
3453 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3454 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3455 if (arginfo[i].dt == vect_constant_def
3456 || arginfo[i].dt == vect_external_def
3457 || (arginfo[i].linear_step
3458 != n->simdclone->args[i].linear_step))
3459 i = -1;
3460 break;
3461 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3462 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3463 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3464 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3465 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3466 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3467 /* FORNOW */
3468 i = -1;
3469 break;
3470 case SIMD_CLONE_ARG_TYPE_MASK:
3471 gcc_unreachable ();
3472 }
3473 if (i == (size_t) -1)
3474 break;
3475 if (n->simdclone->args[i].alignment > arginfo[i].align)
3476 {
3477 i = -1;
3478 break;
3479 }
3480 if (arginfo[i].align)
3481 this_badness += (exact_log2 (arginfo[i].align)
3482 - exact_log2 (n->simdclone->args[i].alignment));
3483 }
3484 if (i == (size_t) -1)
3485 continue;
3486 if (bestn == NULL || this_badness < badness)
3487 {
3488 bestn = n;
3489 badness = this_badness;
3490 }
3491 }
3492
3493 if (bestn == NULL)
00426f9a 3494 return false;
0136f8f0
AH
3495
3496 for (i = 0; i < nargs; i++)
3497 if ((arginfo[i].dt == vect_constant_def
3498 || arginfo[i].dt == vect_external_def)
3499 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3500 {
3501 arginfo[i].vectype
3502 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3503 i)));
3504 if (arginfo[i].vectype == NULL
cf1b2ba4 3505 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3506 > bestn->simdclone->simdlen))
00426f9a 3507 return false;
0136f8f0
AH
3508 }
3509
3510 fndecl = bestn->decl;
3511 nunits = bestn->simdclone->simdlen;
d9f21f6a 3512 ncopies = vf / nunits;
0136f8f0
AH
3513
3514 /* If the function isn't const, only allow it in simd loops where user
3515 has asserted that at least nunits consecutive iterations can be
3516 performed using SIMD instructions. */
3517 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3518 && gimple_vuse (stmt))
00426f9a 3519 return false;
0136f8f0
AH
3520
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies >= 1);
3524
3525 if (!vec_stmt) /* transformation not required. */
3526 {
6c9e85fb
JJ
3527 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3528 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3529 if ((bestn->simdclone->args[i].arg_type
3530 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3531 || (bestn->simdclone->args[i].arg_type
3532 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3533 {
17b658af 3534 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3535 + 1);
3536 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3537 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3538 ? size_type_node : TREE_TYPE (arginfo[i].op);
3539 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3540 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3541 tree sll = arginfo[i].simd_lane_linear
3542 ? boolean_true_node : boolean_false_node;
3543 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3544 }
0136f8f0
AH
3545 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_NOTE, vect_location,
3548 "=== vectorizable_simd_clone_call ===\n");
3549/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3550 return true;
3551 }
3552
67b8dbac 3553 /* Transform. */
0136f8f0
AH
3554
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3557
3558 /* Handle def. */
3559 scalar_dest = gimple_call_lhs (stmt);
3560 vec_dest = NULL_TREE;
3561 rtype = NULL_TREE;
3562 ratype = NULL_TREE;
3563 if (scalar_dest)
3564 {
3565 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3566 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3567 if (TREE_CODE (rtype) == ARRAY_TYPE)
3568 {
3569 ratype = rtype;
3570 rtype = TREE_TYPE (ratype);
3571 }
3572 }
3573
3574 prev_stmt_info = NULL;
3575 for (j = 0; j < ncopies; ++j)
3576 {
3577 /* Build argument list for the vectorized call. */
3578 if (j == 0)
3579 vargs.create (nargs);
3580 else
3581 vargs.truncate (0);
3582
3583 for (i = 0; i < nargs; i++)
3584 {
3585 unsigned int k, l, m, o;
3586 tree atype;
3587 op = gimple_call_arg (stmt, i);
3588 switch (bestn->simdclone->args[i].arg_type)
3589 {
3590 case SIMD_CLONE_ARG_TYPE_VECTOR:
3591 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 3592 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
3593 for (m = j * o; m < (j + 1) * o; m++)
3594 {
cf1b2ba4
RS
3595 if (simd_clone_subparts (atype)
3596 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0
AH
3597 {
3598 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
3599 k = (simd_clone_subparts (arginfo[i].vectype)
3600 / simd_clone_subparts (atype));
0136f8f0
AH
3601 gcc_assert ((k & (k - 1)) == 0);
3602 if (m == 0)
3603 vec_oprnd0
81c40241 3604 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3605 else
3606 {
3607 vec_oprnd0 = arginfo[i].op;
3608 if ((m & (k - 1)) == 0)
3609 vec_oprnd0
3610 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3611 vec_oprnd0);
3612 }
3613 arginfo[i].op = vec_oprnd0;
3614 vec_oprnd0
3615 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3616 bitsize_int (prec),
0136f8f0
AH
3617 bitsize_int ((m & (k - 1)) * prec));
3618 new_stmt
b731b390 3619 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3620 vec_oprnd0);
3621 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3622 vargs.safe_push (gimple_assign_lhs (new_stmt));
3623 }
3624 else
3625 {
cf1b2ba4
RS
3626 k = (simd_clone_subparts (atype)
3627 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
3628 gcc_assert ((k & (k - 1)) == 0);
3629 vec<constructor_elt, va_gc> *ctor_elts;
3630 if (k != 1)
3631 vec_alloc (ctor_elts, k);
3632 else
3633 ctor_elts = NULL;
3634 for (l = 0; l < k; l++)
3635 {
3636 if (m == 0 && l == 0)
3637 vec_oprnd0
81c40241 3638 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3639 else
3640 vec_oprnd0
3641 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3642 arginfo[i].op);
3643 arginfo[i].op = vec_oprnd0;
3644 if (k == 1)
3645 break;
3646 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3647 vec_oprnd0);
3648 }
3649 if (k == 1)
3650 vargs.safe_push (vec_oprnd0);
3651 else
3652 {
3653 vec_oprnd0 = build_constructor (atype, ctor_elts);
3654 new_stmt
b731b390 3655 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3656 vec_oprnd0);
3657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3658 vargs.safe_push (gimple_assign_lhs (new_stmt));
3659 }
3660 }
3661 }
3662 break;
3663 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3664 vargs.safe_push (op);
3665 break;
3666 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3667 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3668 if (j == 0)
3669 {
3670 gimple_seq stmts;
3671 arginfo[i].op
3672 = force_gimple_operand (arginfo[i].op, &stmts, true,
3673 NULL_TREE);
3674 if (stmts != NULL)
3675 {
3676 basic_block new_bb;
3677 edge pe = loop_preheader_edge (loop);
3678 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3679 gcc_assert (!new_bb);
3680 }
17b658af
JJ
3681 if (arginfo[i].simd_lane_linear)
3682 {
3683 vargs.safe_push (arginfo[i].op);
3684 break;
3685 }
b731b390 3686 tree phi_res = copy_ssa_name (op);
538dd0b7 3687 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3688 set_vinfo_for_stmt (new_phi,
310213d4 3689 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3690 add_phi_arg (new_phi, arginfo[i].op,
3691 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3692 enum tree_code code
3693 = POINTER_TYPE_P (TREE_TYPE (op))
3694 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3695 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3696 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3697 widest_int cst
3698 = wi::mul (bestn->simdclone->args[i].linear_step,
3699 ncopies * nunits);
3700 tree tcst = wide_int_to_tree (type, cst);
b731b390 3701 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3702 new_stmt
3703 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3704 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3705 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3706 set_vinfo_for_stmt (new_stmt,
310213d4 3707 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3708 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3709 UNKNOWN_LOCATION);
3710 arginfo[i].op = phi_res;
3711 vargs.safe_push (phi_res);
3712 }
3713 else
3714 {
3715 enum tree_code code
3716 = POINTER_TYPE_P (TREE_TYPE (op))
3717 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3718 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3719 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3720 widest_int cst
3721 = wi::mul (bestn->simdclone->args[i].linear_step,
3722 j * nunits);
3723 tree tcst = wide_int_to_tree (type, cst);
b731b390 3724 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3725 new_stmt = gimple_build_assign (new_temp, code,
3726 arginfo[i].op, tcst);
0136f8f0
AH
3727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3728 vargs.safe_push (new_temp);
3729 }
3730 break;
7adb26f2
JJ
3731 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3732 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3733 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3734 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3735 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3736 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3737 default:
3738 gcc_unreachable ();
3739 }
3740 }
3741
3742 new_stmt = gimple_build_call_vec (fndecl, vargs);
3743 if (vec_dest)
3744 {
cf1b2ba4 3745 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 3746 if (ratype)
b731b390 3747 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
3748 else if (simd_clone_subparts (vectype)
3749 == simd_clone_subparts (rtype))
0136f8f0
AH
3750 new_temp = make_ssa_name (vec_dest, new_stmt);
3751 else
3752 new_temp = make_ssa_name (rtype, new_stmt);
3753 gimple_call_set_lhs (new_stmt, new_temp);
3754 }
3755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3756
3757 if (vec_dest)
3758 {
cf1b2ba4 3759 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
3760 {
3761 unsigned int k, l;
3762 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
cf1b2ba4 3763 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
3764 gcc_assert ((k & (k - 1)) == 0);
3765 for (l = 0; l < k; l++)
3766 {
3767 tree t;
3768 if (ratype)
3769 {
3770 t = build_fold_addr_expr (new_temp);
3771 t = build2 (MEM_REF, vectype, t,
3772 build_int_cst (TREE_TYPE (t),
3773 l * prec / BITS_PER_UNIT));
3774 }
3775 else
3776 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3777 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3778 new_stmt
b731b390 3779 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3780 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3781 if (j == 0 && l == 0)
3782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3783 else
3784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3785
3786 prev_stmt_info = vinfo_for_stmt (new_stmt);
3787 }
3788
3789 if (ratype)
3790 {
3791 tree clobber = build_constructor (ratype, NULL);
3792 TREE_THIS_VOLATILE (clobber) = 1;
3793 new_stmt = gimple_build_assign (new_temp, clobber);
3794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3795 }
3796 continue;
3797 }
cf1b2ba4 3798 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 3799 {
cf1b2ba4
RS
3800 unsigned int k = (simd_clone_subparts (vectype)
3801 / simd_clone_subparts (rtype));
0136f8f0
AH
3802 gcc_assert ((k & (k - 1)) == 0);
3803 if ((j & (k - 1)) == 0)
3804 vec_alloc (ret_ctor_elts, k);
3805 if (ratype)
3806 {
cf1b2ba4 3807 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
3808 for (m = 0; m < o; m++)
3809 {
3810 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3811 size_int (m), NULL_TREE, NULL_TREE);
3812 new_stmt
b731b390 3813 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3815 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3816 gimple_assign_lhs (new_stmt));
3817 }
3818 tree clobber = build_constructor (ratype, NULL);
3819 TREE_THIS_VOLATILE (clobber) = 1;
3820 new_stmt = gimple_build_assign (new_temp, clobber);
3821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3822 }
3823 else
3824 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3825 if ((j & (k - 1)) != k - 1)
3826 continue;
3827 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3828 new_stmt
b731b390 3829 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3830 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3831
3832 if ((unsigned) j == k - 1)
3833 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3834 else
3835 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3836
3837 prev_stmt_info = vinfo_for_stmt (new_stmt);
3838 continue;
3839 }
3840 else if (ratype)
3841 {
3842 tree t = build_fold_addr_expr (new_temp);
3843 t = build2 (MEM_REF, vectype, t,
3844 build_int_cst (TREE_TYPE (t), 0));
3845 new_stmt
b731b390 3846 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3847 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3848 tree clobber = build_constructor (ratype, NULL);
3849 TREE_THIS_VOLATILE (clobber) = 1;
3850 vect_finish_stmt_generation (stmt,
3851 gimple_build_assign (new_temp,
3852 clobber), gsi);
3853 }
3854 }
3855
3856 if (j == 0)
3857 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3858 else
3859 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3860
3861 prev_stmt_info = vinfo_for_stmt (new_stmt);
3862 }
3863
3864 vargs.release ();
3865
3866 /* The call in STMT might prevent it from being removed in dce.
3867 We however cannot remove it here, due to the way the ssa name
3868 it defines is mapped to the new definition. So just replace
3869 rhs of the statement with something harmless. */
3870
3871 if (slp_node)
3872 return true;
3873
3874 if (scalar_dest)
3875 {
3876 type = TREE_TYPE (scalar_dest);
3877 if (is_pattern_stmt_p (stmt_info))
3878 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3879 else
3880 lhs = gimple_call_lhs (stmt);
3881 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3882 }
3883 else
3884 new_stmt = gimple_build_nop ();
3885 set_vinfo_for_stmt (new_stmt, stmt_info);
3886 set_vinfo_for_stmt (stmt, NULL);
3887 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3888 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3889 unlink_stmt_vdef (stmt);
3890
3891 return true;
3892}
3893
3894
ebfd146a
IR
3895/* Function vect_gen_widened_results_half
3896
3897 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3898 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3899 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3900 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3901 needs to be created (DECL is a function-decl of a target-builtin).
3902 STMT is the original scalar stmt that we are vectorizing. */
3903
355fe088 3904static gimple *
ebfd146a
IR
3905vect_gen_widened_results_half (enum tree_code code,
3906 tree decl,
3907 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3908 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3909 gimple *stmt)
b8698a0f 3910{
355fe088 3911 gimple *new_stmt;
b8698a0f
L
3912 tree new_temp;
3913
3914 /* Generate half of the widened result: */
3915 if (code == CALL_EXPR)
3916 {
3917 /* Target specific support */
ebfd146a
IR
3918 if (op_type == binary_op)
3919 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3920 else
3921 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3922 new_temp = make_ssa_name (vec_dest, new_stmt);
3923 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3924 }
3925 else
ebfd146a 3926 {
b8698a0f
L
3927 /* Generic support */
3928 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3929 if (op_type != binary_op)
3930 vec_oprnd1 = NULL;
0d0e4a03 3931 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3932 new_temp = make_ssa_name (vec_dest, new_stmt);
3933 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3934 }
ebfd146a
IR
3935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3936
ebfd146a
IR
3937 return new_stmt;
3938}
3939
4a00c761
JJ
3940
3941/* Get vectorized definitions for loop-based vectorization. For the first
3942 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3943 scalar operand), and for the rest we get a copy with
3944 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3945 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3946 The vectors are collected into VEC_OPRNDS. */
3947
3948static void
355fe088 3949vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3950 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3951{
3952 tree vec_oprnd;
3953
3954 /* Get first vector operand. */
3955 /* All the vector operands except the very first one (that is scalar oprnd)
3956 are stmt copies. */
3957 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3958 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3959 else
3960 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3961
9771b263 3962 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3963
3964 /* Get second vector operand. */
3965 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3966 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3967
3968 *oprnd = vec_oprnd;
3969
3970 /* For conversion in multiple steps, continue to get operands
3971 recursively. */
3972 if (multi_step_cvt)
3973 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3974}
3975
3976
3977/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3978 For multi-step conversions store the resulting vectors and call the function
3979 recursively. */
3980
3981static void
9771b263 3982vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3983 int multi_step_cvt, gimple *stmt,
9771b263 3984 vec<tree> vec_dsts,
4a00c761
JJ
3985 gimple_stmt_iterator *gsi,
3986 slp_tree slp_node, enum tree_code code,
3987 stmt_vec_info *prev_stmt_info)
3988{
3989 unsigned int i;
3990 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3991 gimple *new_stmt;
4a00c761
JJ
3992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3993
9771b263 3994 vec_dest = vec_dsts.pop ();
4a00c761 3995
9771b263 3996 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3997 {
3998 /* Create demotion operation. */
9771b263
DN
3999 vop0 = (*vec_oprnds)[i];
4000 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4001 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4002 new_tmp = make_ssa_name (vec_dest, new_stmt);
4003 gimple_assign_set_lhs (new_stmt, new_tmp);
4004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4005
4006 if (multi_step_cvt)
4007 /* Store the resulting vector for next recursive call. */
9771b263 4008 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4009 else
4010 {
4011 /* This is the last step of the conversion sequence. Store the
4012 vectors in SLP_NODE or in vector info of the scalar statement
4013 (or in STMT_VINFO_RELATED_STMT chain). */
4014 if (slp_node)
9771b263 4015 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4016 else
c689ce1e
RB
4017 {
4018 if (!*prev_stmt_info)
4019 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4020 else
4021 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4022
c689ce1e
RB
4023 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4024 }
4a00c761
JJ
4025 }
4026 }
4027
4028 /* For multi-step demotion operations we first generate demotion operations
4029 from the source type to the intermediate types, and then combine the
4030 results (stored in VEC_OPRNDS) in demotion operation to the destination
4031 type. */
4032 if (multi_step_cvt)
4033 {
4034 /* At each level of recursion we have half of the operands we had at the
4035 previous level. */
9771b263 4036 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4037 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4038 stmt, vec_dsts, gsi, slp_node,
4039 VEC_PACK_TRUNC_EXPR,
4040 prev_stmt_info);
4041 }
4042
9771b263 4043 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4044}
4045
4046
4047/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4048 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4049 the resulting vectors and call the function recursively. */
4050
4051static void
9771b263
DN
4052vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4053 vec<tree> *vec_oprnds1,
355fe088 4054 gimple *stmt, tree vec_dest,
4a00c761
JJ
4055 gimple_stmt_iterator *gsi,
4056 enum tree_code code1,
4057 enum tree_code code2, tree decl1,
4058 tree decl2, int op_type)
4059{
4060 int i;
4061 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4062 gimple *new_stmt1, *new_stmt2;
6e1aa848 4063 vec<tree> vec_tmp = vNULL;
4a00c761 4064
9771b263
DN
4065 vec_tmp.create (vec_oprnds0->length () * 2);
4066 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4067 {
4068 if (op_type == binary_op)
9771b263 4069 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4070 else
4071 vop1 = NULL_TREE;
4072
4073 /* Generate the two halves of promotion operation. */
4074 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4075 op_type, vec_dest, gsi, stmt);
4076 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4077 op_type, vec_dest, gsi, stmt);
4078 if (is_gimple_call (new_stmt1))
4079 {
4080 new_tmp1 = gimple_call_lhs (new_stmt1);
4081 new_tmp2 = gimple_call_lhs (new_stmt2);
4082 }
4083 else
4084 {
4085 new_tmp1 = gimple_assign_lhs (new_stmt1);
4086 new_tmp2 = gimple_assign_lhs (new_stmt2);
4087 }
4088
4089 /* Store the results for the next step. */
9771b263
DN
4090 vec_tmp.quick_push (new_tmp1);
4091 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4092 }
4093
689eaba3 4094 vec_oprnds0->release ();
4a00c761
JJ
4095 *vec_oprnds0 = vec_tmp;
4096}
4097
4098
b8698a0f
L
4099/* Check if STMT performs a conversion operation, that can be vectorized.
4100 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4101 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4102 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4103
4104static bool
355fe088
TS
4105vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4106 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4107{
4108 tree vec_dest;
4109 tree scalar_dest;
4a00c761 4110 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4111 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4114 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4115 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4116 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4117 tree new_temp;
355fe088 4118 gimple *def_stmt;
ebfd146a 4119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4120 int ndts = 2;
355fe088 4121 gimple *new_stmt = NULL;
ebfd146a 4122 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4123 poly_uint64 nunits_in;
4124 poly_uint64 nunits_out;
ebfd146a 4125 tree vectype_out, vectype_in;
4a00c761
JJ
4126 int ncopies, i, j;
4127 tree lhs_type, rhs_type;
ebfd146a 4128 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4129 vec<tree> vec_oprnds0 = vNULL;
4130 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4131 tree vop0;
4a00c761 4132 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4133 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4134 int multi_step_cvt = 0;
6e1aa848 4135 vec<tree> interm_types = vNULL;
4a00c761
JJ
4136 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4137 int op_type;
4a00c761 4138 unsigned short fltsz;
ebfd146a
IR
4139
4140 /* Is STMT a vectorizable conversion? */
4141
4a00c761 4142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4143 return false;
4144
66c16fd9
RB
4145 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4146 && ! vec_stmt)
ebfd146a
IR
4147 return false;
4148
4149 if (!is_gimple_assign (stmt))
4150 return false;
4151
4152 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4153 return false;
4154
4155 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4156 if (!CONVERT_EXPR_CODE_P (code)
4157 && code != FIX_TRUNC_EXPR
4158 && code != FLOAT_EXPR
4159 && code != WIDEN_MULT_EXPR
4160 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4161 return false;
4162
4a00c761
JJ
4163 op_type = TREE_CODE_LENGTH (code);
4164
ebfd146a 4165 /* Check types of lhs and rhs. */
b690cc0f 4166 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4167 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4168 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4169
ebfd146a
IR
4170 op0 = gimple_assign_rhs1 (stmt);
4171 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4172
4173 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4174 && !((INTEGRAL_TYPE_P (lhs_type)
4175 && INTEGRAL_TYPE_P (rhs_type))
4176 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4177 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4178 return false;
4179
e6f5c25d
IE
4180 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4181 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4182 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4183 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4184 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4185 {
73fbfcad 4186 if (dump_enabled_p ())
78c60e3d 4187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4188 "type conversion to/from bit-precision unsupported."
4189 "\n");
4a00c761
JJ
4190 return false;
4191 }
4192
b690cc0f 4193 /* Check the operands of the operation. */
81c40241 4194 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4195 {
73fbfcad 4196 if (dump_enabled_p ())
78c60e3d 4197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4198 "use not simple.\n");
b690cc0f
RG
4199 return false;
4200 }
4a00c761
JJ
4201 if (op_type == binary_op)
4202 {
4203 bool ok;
4204
4205 op1 = gimple_assign_rhs2 (stmt);
4206 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4207 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4208 OP1. */
4209 if (CONSTANT_CLASS_P (op0))
81c40241 4210 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4211 else
81c40241 4212 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4213
4214 if (!ok)
4215 {
73fbfcad 4216 if (dump_enabled_p ())
78c60e3d 4217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4218 "use not simple.\n");
4a00c761
JJ
4219 return false;
4220 }
4221 }
4222
b690cc0f
RG
4223 /* If op0 is an external or constant defs use a vector type of
4224 the same size as the output vector type. */
ebfd146a 4225 if (!vectype_in)
b690cc0f 4226 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4227 if (vec_stmt)
4228 gcc_assert (vectype_in);
4229 if (!vectype_in)
4230 {
73fbfcad 4231 if (dump_enabled_p ())
4a00c761 4232 {
78c60e3d
SS
4233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4234 "no vectype for scalar type ");
4235 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4236 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4237 }
7d8930a0
IR
4238
4239 return false;
4240 }
ebfd146a 4241
e6f5c25d
IE
4242 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4243 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4244 {
4245 if (dump_enabled_p ())
4246 {
4247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4248 "can't convert between boolean and non "
4249 "boolean vectors");
4250 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4251 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4252 }
4253
4254 return false;
4255 }
4256
b690cc0f
RG
4257 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4258 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4259 if (known_eq (nunits_out, nunits_in))
ebfd146a 4260 modifier = NONE;
062d5ccc
RS
4261 else if (multiple_p (nunits_out, nunits_in))
4262 modifier = NARROW;
ebfd146a 4263 else
062d5ccc
RS
4264 {
4265 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4266 modifier = WIDEN;
4267 }
ebfd146a 4268
ff802fa1
IR
4269 /* Multiple types in SLP are handled by creating the appropriate number of
4270 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4271 case of SLP. */
fce57248 4272 if (slp_node)
ebfd146a 4273 ncopies = 1;
4a00c761 4274 else if (modifier == NARROW)
e8f142e2 4275 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4276 else
e8f142e2 4277 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4278
ebfd146a
IR
4279 /* Sanity check: make sure that at least one copy of the vectorized stmt
4280 needs to be generated. */
4281 gcc_assert (ncopies >= 1);
4282
16d22000
RS
4283 bool found_mode = false;
4284 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4285 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4286 opt_scalar_mode rhs_mode_iter;
b397965c 4287
ebfd146a 4288 /* Supportable by target? */
4a00c761 4289 switch (modifier)
ebfd146a 4290 {
4a00c761
JJ
4291 case NONE:
4292 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4293 return false;
4294 if (supportable_convert_operation (code, vectype_out, vectype_in,
4295 &decl1, &code1))
4296 break;
4297 /* FALLTHRU */
4298 unsupported:
73fbfcad 4299 if (dump_enabled_p ())
78c60e3d 4300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4301 "conversion not supported by target.\n");
ebfd146a 4302 return false;
ebfd146a 4303
4a00c761
JJ
4304 case WIDEN:
4305 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4306 &code1, &code2, &multi_step_cvt,
4307 &interm_types))
4a00c761
JJ
4308 {
4309 /* Binary widening operation can only be supported directly by the
4310 architecture. */
4311 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4312 break;
4313 }
4314
4315 if (code != FLOAT_EXPR
b397965c 4316 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4317 goto unsupported;
4318
b397965c 4319 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4320 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4321 {
16d22000 4322 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4323 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4324 break;
4325
4a00c761
JJ
4326 cvt_type
4327 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4328 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4329 if (cvt_type == NULL_TREE)
4330 goto unsupported;
4331
4332 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4333 {
4334 if (!supportable_convert_operation (code, vectype_out,
4335 cvt_type, &decl1, &codecvt1))
4336 goto unsupported;
4337 }
4338 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4339 cvt_type, &codecvt1,
4340 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4341 &interm_types))
4342 continue;
4343 else
4344 gcc_assert (multi_step_cvt == 0);
4345
4346 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4347 vectype_in, &code1, &code2,
4348 &multi_step_cvt, &interm_types))
16d22000
RS
4349 {
4350 found_mode = true;
4351 break;
4352 }
4a00c761
JJ
4353 }
4354
16d22000 4355 if (!found_mode)
4a00c761
JJ
4356 goto unsupported;
4357
4358 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4359 codecvt2 = ERROR_MARK;
4360 else
4361 {
4362 multi_step_cvt++;
9771b263 4363 interm_types.safe_push (cvt_type);
4a00c761
JJ
4364 cvt_type = NULL_TREE;
4365 }
4366 break;
4367
4368 case NARROW:
4369 gcc_assert (op_type == unary_op);
4370 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4371 &code1, &multi_step_cvt,
4372 &interm_types))
4373 break;
4374
4375 if (code != FIX_TRUNC_EXPR
b397965c 4376 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4377 goto unsupported;
4378
4a00c761
JJ
4379 cvt_type
4380 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4381 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4382 if (cvt_type == NULL_TREE)
4383 goto unsupported;
4384 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4385 &decl1, &codecvt1))
4386 goto unsupported;
4387 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4388 &code1, &multi_step_cvt,
4389 &interm_types))
4390 break;
4391 goto unsupported;
4392
4393 default:
4394 gcc_unreachable ();
ebfd146a
IR
4395 }
4396
4397 if (!vec_stmt) /* transformation not required. */
4398 {
73fbfcad 4399 if (dump_enabled_p ())
78c60e3d 4400 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4401 "=== vectorizable_conversion ===\n");
4a00c761 4402 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4403 {
4404 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4405 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4406 }
4a00c761
JJ
4407 else if (modifier == NARROW)
4408 {
4409 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4410 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4411 }
4412 else
4413 {
4414 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4415 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4416 }
9771b263 4417 interm_types.release ();
ebfd146a
IR
4418 return true;
4419 }
4420
67b8dbac 4421 /* Transform. */
73fbfcad 4422 if (dump_enabled_p ())
78c60e3d 4423 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4424 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4425
4a00c761
JJ
4426 if (op_type == binary_op)
4427 {
4428 if (CONSTANT_CLASS_P (op0))
4429 op0 = fold_convert (TREE_TYPE (op1), op0);
4430 else if (CONSTANT_CLASS_P (op1))
4431 op1 = fold_convert (TREE_TYPE (op0), op1);
4432 }
4433
4434 /* In case of multi-step conversion, we first generate conversion operations
4435 to the intermediate types, and then from that types to the final one.
4436 We create vector destinations for the intermediate type (TYPES) received
4437 from supportable_*_operation, and store them in the correct order
4438 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4439 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4440 vec_dest = vect_create_destination_var (scalar_dest,
4441 (cvt_type && modifier == WIDEN)
4442 ? cvt_type : vectype_out);
9771b263 4443 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4444
4445 if (multi_step_cvt)
4446 {
9771b263
DN
4447 for (i = interm_types.length () - 1;
4448 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4449 {
4450 vec_dest = vect_create_destination_var (scalar_dest,
4451 intermediate_type);
9771b263 4452 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4453 }
4454 }
ebfd146a 4455
4a00c761 4456 if (cvt_type)
82294ec1
JJ
4457 vec_dest = vect_create_destination_var (scalar_dest,
4458 modifier == WIDEN
4459 ? vectype_out : cvt_type);
4a00c761
JJ
4460
4461 if (!slp_node)
4462 {
30862efc 4463 if (modifier == WIDEN)
4a00c761 4464 {
c3284718 4465 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4466 if (op_type == binary_op)
9771b263 4467 vec_oprnds1.create (1);
4a00c761 4468 }
30862efc 4469 else if (modifier == NARROW)
9771b263
DN
4470 vec_oprnds0.create (
4471 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4472 }
4473 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4474 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4475
4a00c761 4476 last_oprnd = op0;
ebfd146a
IR
4477 prev_stmt_info = NULL;
4478 switch (modifier)
4479 {
4480 case NONE:
4481 for (j = 0; j < ncopies; j++)
4482 {
ebfd146a 4483 if (j == 0)
306b0c92 4484 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4485 else
4486 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4487
9771b263 4488 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4489 {
4490 /* Arguments are ready, create the new vector stmt. */
4491 if (code1 == CALL_EXPR)
4492 {
4493 new_stmt = gimple_build_call (decl1, 1, vop0);
4494 new_temp = make_ssa_name (vec_dest, new_stmt);
4495 gimple_call_set_lhs (new_stmt, new_temp);
4496 }
4497 else
4498 {
4499 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4500 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4501 new_temp = make_ssa_name (vec_dest, new_stmt);
4502 gimple_assign_set_lhs (new_stmt, new_temp);
4503 }
4504
4505 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4506 if (slp_node)
9771b263 4507 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4508 else
4509 {
4510 if (!prev_stmt_info)
4511 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4512 else
4513 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4514 prev_stmt_info = vinfo_for_stmt (new_stmt);
4515 }
4a00c761 4516 }
ebfd146a
IR
4517 }
4518 break;
4519
4520 case WIDEN:
4521 /* In case the vectorization factor (VF) is bigger than the number
4522 of elements that we can fit in a vectype (nunits), we have to
4523 generate more than one vector stmt - i.e - we need to "unroll"
4524 the vector stmt by a factor VF/nunits. */
4525 for (j = 0; j < ncopies; j++)
4526 {
4a00c761 4527 /* Handle uses. */
ebfd146a 4528 if (j == 0)
4a00c761
JJ
4529 {
4530 if (slp_node)
4531 {
4532 if (code == WIDEN_LSHIFT_EXPR)
4533 {
4534 unsigned int k;
ebfd146a 4535
4a00c761
JJ
4536 vec_oprnd1 = op1;
4537 /* Store vec_oprnd1 for every vector stmt to be created
4538 for SLP_NODE. We check during the analysis that all
4539 the shift arguments are the same. */
4540 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4541 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4542
4543 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4544 slp_node);
4a00c761
JJ
4545 }
4546 else
4547 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4548 &vec_oprnds1, slp_node);
4a00c761
JJ
4549 }
4550 else
4551 {
81c40241 4552 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4553 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4554 if (op_type == binary_op)
4555 {
4556 if (code == WIDEN_LSHIFT_EXPR)
4557 vec_oprnd1 = op1;
4558 else
81c40241 4559 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4560 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4561 }
4562 }
4563 }
ebfd146a 4564 else
4a00c761
JJ
4565 {
4566 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4567 vec_oprnds0.truncate (0);
4568 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4569 if (op_type == binary_op)
4570 {
4571 if (code == WIDEN_LSHIFT_EXPR)
4572 vec_oprnd1 = op1;
4573 else
4574 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4575 vec_oprnd1);
9771b263
DN
4576 vec_oprnds1.truncate (0);
4577 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4578 }
4579 }
ebfd146a 4580
4a00c761
JJ
4581 /* Arguments are ready. Create the new vector stmts. */
4582 for (i = multi_step_cvt; i >= 0; i--)
4583 {
9771b263 4584 tree this_dest = vec_dsts[i];
4a00c761
JJ
4585 enum tree_code c1 = code1, c2 = code2;
4586 if (i == 0 && codecvt2 != ERROR_MARK)
4587 {
4588 c1 = codecvt1;
4589 c2 = codecvt2;
4590 }
4591 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4592 &vec_oprnds1,
4593 stmt, this_dest, gsi,
4594 c1, c2, decl1, decl2,
4595 op_type);
4596 }
4597
9771b263 4598 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4599 {
4600 if (cvt_type)
4601 {
4602 if (codecvt1 == CALL_EXPR)
4603 {
4604 new_stmt = gimple_build_call (decl1, 1, vop0);
4605 new_temp = make_ssa_name (vec_dest, new_stmt);
4606 gimple_call_set_lhs (new_stmt, new_temp);
4607 }
4608 else
4609 {
4610 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4611 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4612 new_stmt = gimple_build_assign (new_temp, codecvt1,
4613 vop0);
4a00c761
JJ
4614 }
4615
4616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4617 }
4618 else
4619 new_stmt = SSA_NAME_DEF_STMT (vop0);
4620
4621 if (slp_node)
9771b263 4622 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4623 else
c689ce1e
RB
4624 {
4625 if (!prev_stmt_info)
4626 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4627 else
4628 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4629 prev_stmt_info = vinfo_for_stmt (new_stmt);
4630 }
4a00c761 4631 }
ebfd146a 4632 }
4a00c761
JJ
4633
4634 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4635 break;
4636
4637 case NARROW:
4638 /* In case the vectorization factor (VF) is bigger than the number
4639 of elements that we can fit in a vectype (nunits), we have to
4640 generate more than one vector stmt - i.e - we need to "unroll"
4641 the vector stmt by a factor VF/nunits. */
4642 for (j = 0; j < ncopies; j++)
4643 {
4644 /* Handle uses. */
4a00c761
JJ
4645 if (slp_node)
4646 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4647 slp_node);
ebfd146a
IR
4648 else
4649 {
9771b263 4650 vec_oprnds0.truncate (0);
4a00c761
JJ
4651 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4652 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4653 }
4654
4a00c761
JJ
4655 /* Arguments are ready. Create the new vector stmts. */
4656 if (cvt_type)
9771b263 4657 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4658 {
4659 if (codecvt1 == CALL_EXPR)
4660 {
4661 new_stmt = gimple_build_call (decl1, 1, vop0);
4662 new_temp = make_ssa_name (vec_dest, new_stmt);
4663 gimple_call_set_lhs (new_stmt, new_temp);
4664 }
4665 else
4666 {
4667 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4668 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4669 new_stmt = gimple_build_assign (new_temp, codecvt1,
4670 vop0);
4a00c761 4671 }
ebfd146a 4672
4a00c761 4673 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4674 vec_oprnds0[i] = new_temp;
4a00c761 4675 }
ebfd146a 4676
4a00c761
JJ
4677 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4678 stmt, vec_dsts, gsi,
4679 slp_node, code1,
4680 &prev_stmt_info);
ebfd146a
IR
4681 }
4682
4683 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4684 break;
ebfd146a
IR
4685 }
4686
9771b263
DN
4687 vec_oprnds0.release ();
4688 vec_oprnds1.release ();
9771b263 4689 interm_types.release ();
ebfd146a
IR
4690
4691 return true;
4692}
ff802fa1
IR
4693
4694
ebfd146a
IR
4695/* Function vectorizable_assignment.
4696
b8698a0f
L
4697 Check if STMT performs an assignment (copy) that can be vectorized.
4698 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4699 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4700 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4701
4702static bool
355fe088
TS
4703vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4704 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4705{
4706 tree vec_dest;
4707 tree scalar_dest;
4708 tree op;
4709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4710 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4711 tree new_temp;
355fe088 4712 gimple *def_stmt;
4fc5ebf1
JG
4713 enum vect_def_type dt[1] = {vect_unknown_def_type};
4714 int ndts = 1;
ebfd146a 4715 int ncopies;
f18b55bd 4716 int i, j;
6e1aa848 4717 vec<tree> vec_oprnds = vNULL;
ebfd146a 4718 tree vop;
a70d6342 4719 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4720 vec_info *vinfo = stmt_info->vinfo;
355fe088 4721 gimple *new_stmt = NULL;
f18b55bd 4722 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4723 enum tree_code code;
4724 tree vectype_in;
ebfd146a 4725
a70d6342 4726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4727 return false;
4728
66c16fd9
RB
4729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4730 && ! vec_stmt)
ebfd146a
IR
4731 return false;
4732
4733 /* Is vectorizable assignment? */
4734 if (!is_gimple_assign (stmt))
4735 return false;
4736
4737 scalar_dest = gimple_assign_lhs (stmt);
4738 if (TREE_CODE (scalar_dest) != SSA_NAME)
4739 return false;
4740
fde9c428 4741 code = gimple_assign_rhs_code (stmt);
ebfd146a 4742 if (gimple_assign_single_p (stmt)
fde9c428
RG
4743 || code == PAREN_EXPR
4744 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4745 op = gimple_assign_rhs1 (stmt);
4746 else
4747 return false;
4748
7b7ec6c5
RG
4749 if (code == VIEW_CONVERT_EXPR)
4750 op = TREE_OPERAND (op, 0);
4751
465c8c19 4752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 4753 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
4754
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 case of SLP. */
fce57248 4758 if (slp_node)
465c8c19
JJ
4759 ncopies = 1;
4760 else
e8f142e2 4761 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4762
4763 gcc_assert (ncopies >= 1);
4764
81c40241 4765 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4766 {
73fbfcad 4767 if (dump_enabled_p ())
78c60e3d 4768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4769 "use not simple.\n");
ebfd146a
IR
4770 return false;
4771 }
4772
fde9c428
RG
4773 /* We can handle NOP_EXPR conversions that do not change the number
4774 of elements or the vector size. */
7b7ec6c5
RG
4775 if ((CONVERT_EXPR_CODE_P (code)
4776 || code == VIEW_CONVERT_EXPR)
fde9c428 4777 && (!vectype_in
928686b1 4778 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
fde9c428
RG
4779 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4780 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4781 return false;
4782
7b7b1813
RG
4783 /* We do not handle bit-precision changes. */
4784 if ((CONVERT_EXPR_CODE_P (code)
4785 || code == VIEW_CONVERT_EXPR)
4786 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4787 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4788 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4789 /* But a conversion that does not change the bit-pattern is ok. */
4790 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4791 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4792 && TYPE_UNSIGNED (TREE_TYPE (op)))
4793 /* Conversion between boolean types of different sizes is
4794 a simple assignment in case their vectypes are same
4795 boolean vectors. */
4796 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4797 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4798 {
73fbfcad 4799 if (dump_enabled_p ())
78c60e3d
SS
4800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4801 "type conversion to/from bit-precision "
e645e942 4802 "unsupported.\n");
7b7b1813
RG
4803 return false;
4804 }
4805
ebfd146a
IR
4806 if (!vec_stmt) /* transformation not required. */
4807 {
4808 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4809 if (dump_enabled_p ())
78c60e3d 4810 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4811 "=== vectorizable_assignment ===\n");
4fc5ebf1 4812 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4813 return true;
4814 }
4815
67b8dbac 4816 /* Transform. */
73fbfcad 4817 if (dump_enabled_p ())
e645e942 4818 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4819
4820 /* Handle def. */
4821 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4822
4823 /* Handle use. */
f18b55bd 4824 for (j = 0; j < ncopies; j++)
ebfd146a 4825 {
f18b55bd
IR
4826 /* Handle uses. */
4827 if (j == 0)
306b0c92 4828 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4829 else
4830 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4831
4832 /* Arguments are ready. create the new vector stmt. */
9771b263 4833 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4834 {
7b7ec6c5
RG
4835 if (CONVERT_EXPR_CODE_P (code)
4836 || code == VIEW_CONVERT_EXPR)
4a73490d 4837 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4838 new_stmt = gimple_build_assign (vec_dest, vop);
4839 new_temp = make_ssa_name (vec_dest, new_stmt);
4840 gimple_assign_set_lhs (new_stmt, new_temp);
4841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4842 if (slp_node)
9771b263 4843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4844 }
ebfd146a
IR
4845
4846 if (slp_node)
f18b55bd
IR
4847 continue;
4848
4849 if (j == 0)
4850 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4851 else
4852 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4853
4854 prev_stmt_info = vinfo_for_stmt (new_stmt);
4855 }
b8698a0f 4856
9771b263 4857 vec_oprnds.release ();
ebfd146a
IR
4858 return true;
4859}
4860
9dc3f7de 4861
1107f3ae
IR
4862/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4863 either as shift by a scalar or by a vector. */
4864
4865bool
4866vect_supportable_shift (enum tree_code code, tree scalar_type)
4867{
4868
ef4bddc2 4869 machine_mode vec_mode;
1107f3ae
IR
4870 optab optab;
4871 int icode;
4872 tree vectype;
4873
4874 vectype = get_vectype_for_scalar_type (scalar_type);
4875 if (!vectype)
4876 return false;
4877
4878 optab = optab_for_tree_code (code, vectype, optab_scalar);
4879 if (!optab
4880 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4881 {
4882 optab = optab_for_tree_code (code, vectype, optab_vector);
4883 if (!optab
4884 || (optab_handler (optab, TYPE_MODE (vectype))
4885 == CODE_FOR_nothing))
4886 return false;
4887 }
4888
4889 vec_mode = TYPE_MODE (vectype);
4890 icode = (int) optab_handler (optab, vec_mode);
4891 if (icode == CODE_FOR_nothing)
4892 return false;
4893
4894 return true;
4895}
4896
4897
9dc3f7de
IR
4898/* Function vectorizable_shift.
4899
4900 Check if STMT performs a shift operation that can be vectorized.
4901 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4902 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4903 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4904
4905static bool
355fe088
TS
4906vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4907 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4908{
4909 tree vec_dest;
4910 tree scalar_dest;
4911 tree op0, op1 = NULL;
4912 tree vec_oprnd1 = NULL_TREE;
4913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4914 tree vectype;
4915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4916 enum tree_code code;
ef4bddc2 4917 machine_mode vec_mode;
9dc3f7de
IR
4918 tree new_temp;
4919 optab optab;
4920 int icode;
ef4bddc2 4921 machine_mode optab_op2_mode;
355fe088 4922 gimple *def_stmt;
9dc3f7de 4923 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4924 int ndts = 2;
355fe088 4925 gimple *new_stmt = NULL;
9dc3f7de 4926 stmt_vec_info prev_stmt_info;
928686b1
RS
4927 poly_uint64 nunits_in;
4928 poly_uint64 nunits_out;
9dc3f7de 4929 tree vectype_out;
cede2577 4930 tree op1_vectype;
9dc3f7de
IR
4931 int ncopies;
4932 int j, i;
6e1aa848
DN
4933 vec<tree> vec_oprnds0 = vNULL;
4934 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4935 tree vop0, vop1;
4936 unsigned int k;
49eab32e 4937 bool scalar_shift_arg = true;
9dc3f7de 4938 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4939 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4940
4941 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4942 return false;
4943
66c16fd9
RB
4944 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4945 && ! vec_stmt)
9dc3f7de
IR
4946 return false;
4947
4948 /* Is STMT a vectorizable binary/unary operation? */
4949 if (!is_gimple_assign (stmt))
4950 return false;
4951
4952 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4953 return false;
4954
4955 code = gimple_assign_rhs_code (stmt);
4956
4957 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4958 || code == RROTATE_EXPR))
4959 return false;
4960
4961 scalar_dest = gimple_assign_lhs (stmt);
4962 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 4963 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 4964 {
73fbfcad 4965 if (dump_enabled_p ())
78c60e3d 4966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4967 "bit-precision shifts not supported.\n");
7b7b1813
RG
4968 return false;
4969 }
9dc3f7de
IR
4970
4971 op0 = gimple_assign_rhs1 (stmt);
81c40241 4972 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4973 {
73fbfcad 4974 if (dump_enabled_p ())
78c60e3d 4975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4976 "use not simple.\n");
9dc3f7de
IR
4977 return false;
4978 }
4979 /* If op0 is an external or constant def use a vector type with
4980 the same size as the output vector type. */
4981 if (!vectype)
4982 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4983 if (vec_stmt)
4984 gcc_assert (vectype);
4985 if (!vectype)
4986 {
73fbfcad 4987 if (dump_enabled_p ())
78c60e3d 4988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4989 "no vectype for scalar type\n");
9dc3f7de
IR
4990 return false;
4991 }
4992
4993 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4994 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 4995 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
4996 return false;
4997
4998 op1 = gimple_assign_rhs2 (stmt);
81c40241 4999 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5000 {
73fbfcad 5001 if (dump_enabled_p ())
78c60e3d 5002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5003 "use not simple.\n");
9dc3f7de
IR
5004 return false;
5005 }
5006
9dc3f7de
IR
5007 /* Multiple types in SLP are handled by creating the appropriate number of
5008 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5009 case of SLP. */
fce57248 5010 if (slp_node)
9dc3f7de
IR
5011 ncopies = 1;
5012 else
e8f142e2 5013 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5014
5015 gcc_assert (ncopies >= 1);
5016
5017 /* Determine whether the shift amount is a vector, or scalar. If the
5018 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5019
dbfa87aa
YR
5020 if ((dt[1] == vect_internal_def
5021 || dt[1] == vect_induction_def)
5022 && !slp_node)
49eab32e
JJ
5023 scalar_shift_arg = false;
5024 else if (dt[1] == vect_constant_def
5025 || dt[1] == vect_external_def
5026 || dt[1] == vect_internal_def)
5027 {
5028 /* In SLP, need to check whether the shift count is the same,
5029 in loops if it is a constant or invariant, it is always
5030 a scalar shift. */
5031 if (slp_node)
5032 {
355fe088
TS
5033 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5034 gimple *slpstmt;
49eab32e 5035
9771b263 5036 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5037 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5038 scalar_shift_arg = false;
5039 }
60d393e8
RB
5040
5041 /* If the shift amount is computed by a pattern stmt we cannot
5042 use the scalar amount directly thus give up and use a vector
5043 shift. */
5044 if (dt[1] == vect_internal_def)
5045 {
5046 gimple *def = SSA_NAME_DEF_STMT (op1);
5047 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5048 scalar_shift_arg = false;
5049 }
49eab32e
JJ
5050 }
5051 else
5052 {
73fbfcad 5053 if (dump_enabled_p ())
78c60e3d 5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5055 "operand mode requires invariant argument.\n");
49eab32e
JJ
5056 return false;
5057 }
5058
9dc3f7de 5059 /* Vector shifted by vector. */
49eab32e 5060 if (!scalar_shift_arg)
9dc3f7de
IR
5061 {
5062 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5063 if (dump_enabled_p ())
78c60e3d 5064 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5065 "vector/vector shift/rotate found.\n");
78c60e3d 5066
aa948027
JJ
5067 if (!op1_vectype)
5068 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5069 if (op1_vectype == NULL_TREE
5070 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5071 {
73fbfcad 5072 if (dump_enabled_p ())
78c60e3d
SS
5073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5074 "unusable type for last operand in"
e645e942 5075 " vector/vector shift/rotate.\n");
cede2577
JJ
5076 return false;
5077 }
9dc3f7de
IR
5078 }
5079 /* See if the machine has a vector shifted by scalar insn and if not
5080 then see if it has a vector shifted by vector insn. */
49eab32e 5081 else
9dc3f7de
IR
5082 {
5083 optab = optab_for_tree_code (code, vectype, optab_scalar);
5084 if (optab
5085 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5086 {
73fbfcad 5087 if (dump_enabled_p ())
78c60e3d 5088 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5089 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5090 }
5091 else
5092 {
5093 optab = optab_for_tree_code (code, vectype, optab_vector);
5094 if (optab
5095 && (optab_handler (optab, TYPE_MODE (vectype))
5096 != CODE_FOR_nothing))
5097 {
49eab32e
JJ
5098 scalar_shift_arg = false;
5099
73fbfcad 5100 if (dump_enabled_p ())
78c60e3d 5101 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5102 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5103
5104 /* Unlike the other binary operators, shifts/rotates have
5105 the rhs being int, instead of the same type as the lhs,
5106 so make sure the scalar is the right type if we are
aa948027 5107 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5108 if (dt[1] == vect_constant_def)
5109 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5110 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5111 TREE_TYPE (op1)))
5112 {
5113 if (slp_node
5114 && TYPE_MODE (TREE_TYPE (vectype))
5115 != TYPE_MODE (TREE_TYPE (op1)))
5116 {
73fbfcad 5117 if (dump_enabled_p ())
78c60e3d
SS
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119 "unusable type for last operand in"
e645e942 5120 " vector/vector shift/rotate.\n");
21c0a521 5121 return false;
aa948027
JJ
5122 }
5123 if (vec_stmt && !slp_node)
5124 {
5125 op1 = fold_convert (TREE_TYPE (vectype), op1);
5126 op1 = vect_init_vector (stmt, op1,
5127 TREE_TYPE (vectype), NULL);
5128 }
5129 }
9dc3f7de
IR
5130 }
5131 }
5132 }
9dc3f7de
IR
5133
5134 /* Supportable by target? */
5135 if (!optab)
5136 {
73fbfcad 5137 if (dump_enabled_p ())
78c60e3d 5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5139 "no optab.\n");
9dc3f7de
IR
5140 return false;
5141 }
5142 vec_mode = TYPE_MODE (vectype);
5143 icode = (int) optab_handler (optab, vec_mode);
5144 if (icode == CODE_FOR_nothing)
5145 {
73fbfcad 5146 if (dump_enabled_p ())
78c60e3d 5147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5148 "op not supported by target.\n");
9dc3f7de
IR
5149 /* Check only during analysis. */
5150 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb
RS
5151 || (!vec_stmt
5152 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5153 return false;
73fbfcad 5154 if (dump_enabled_p ())
e645e942
TJ
5155 dump_printf_loc (MSG_NOTE, vect_location,
5156 "proceeding using word mode.\n");
9dc3f7de
IR
5157 }
5158
5159 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5160 if (!vec_stmt
5161 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5162 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5163 {
73fbfcad 5164 if (dump_enabled_p ())
78c60e3d 5165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5166 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5167 return false;
5168 }
5169
5170 if (!vec_stmt) /* transformation not required. */
5171 {
5172 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5173 if (dump_enabled_p ())
e645e942
TJ
5174 dump_printf_loc (MSG_NOTE, vect_location,
5175 "=== vectorizable_shift ===\n");
4fc5ebf1 5176 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5177 return true;
5178 }
5179
67b8dbac 5180 /* Transform. */
9dc3f7de 5181
73fbfcad 5182 if (dump_enabled_p ())
78c60e3d 5183 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5184 "transform binary/unary operation.\n");
9dc3f7de
IR
5185
5186 /* Handle def. */
5187 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5188
9dc3f7de
IR
5189 prev_stmt_info = NULL;
5190 for (j = 0; j < ncopies; j++)
5191 {
5192 /* Handle uses. */
5193 if (j == 0)
5194 {
5195 if (scalar_shift_arg)
5196 {
5197 /* Vector shl and shr insn patterns can be defined with scalar
5198 operand 2 (shift operand). In this case, use constant or loop
5199 invariant op1 directly, without extending it to vector mode
5200 first. */
5201 optab_op2_mode = insn_data[icode].operand[2].mode;
5202 if (!VECTOR_MODE_P (optab_op2_mode))
5203 {
73fbfcad 5204 if (dump_enabled_p ())
78c60e3d 5205 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5206 "operand 1 using scalar mode.\n");
9dc3f7de 5207 vec_oprnd1 = op1;
8930f723 5208 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5209 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5210 if (slp_node)
5211 {
5212 /* Store vec_oprnd1 for every vector stmt to be created
5213 for SLP_NODE. We check during the analysis that all
5214 the shift arguments are the same.
5215 TODO: Allow different constants for different vector
5216 stmts generated for an SLP instance. */
5217 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5218 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5219 }
5220 }
5221 }
5222
5223 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5224 (a special case for certain kind of vector shifts); otherwise,
5225 operand 1 should be of a vector type (the usual case). */
5226 if (vec_oprnd1)
5227 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5228 slp_node);
9dc3f7de
IR
5229 else
5230 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5231 slp_node);
9dc3f7de
IR
5232 }
5233 else
5234 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5235
5236 /* Arguments are ready. Create the new vector stmt. */
9771b263 5237 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5238 {
9771b263 5239 vop1 = vec_oprnds1[i];
0d0e4a03 5240 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5241 new_temp = make_ssa_name (vec_dest, new_stmt);
5242 gimple_assign_set_lhs (new_stmt, new_temp);
5243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5244 if (slp_node)
9771b263 5245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5246 }
5247
5248 if (slp_node)
5249 continue;
5250
5251 if (j == 0)
5252 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5253 else
5254 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5255 prev_stmt_info = vinfo_for_stmt (new_stmt);
5256 }
5257
9771b263
DN
5258 vec_oprnds0.release ();
5259 vec_oprnds1.release ();
9dc3f7de
IR
5260
5261 return true;
5262}
5263
5264
ebfd146a
IR
5265/* Function vectorizable_operation.
5266
16949072
RG
5267 Check if STMT performs a binary, unary or ternary operation that can
5268 be vectorized.
b8698a0f 5269 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5270 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5271 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5272
5273static bool
355fe088
TS
5274vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5275 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5276{
00f07b86 5277 tree vec_dest;
ebfd146a 5278 tree scalar_dest;
16949072 5279 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5280 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5281 tree vectype;
ebfd146a 5282 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5283 enum tree_code code, orig_code;
ef4bddc2 5284 machine_mode vec_mode;
ebfd146a
IR
5285 tree new_temp;
5286 int op_type;
00f07b86 5287 optab optab;
523ba738 5288 bool target_support_p;
355fe088 5289 gimple *def_stmt;
16949072
RG
5290 enum vect_def_type dt[3]
5291 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5292 int ndts = 3;
355fe088 5293 gimple *new_stmt = NULL;
ebfd146a 5294 stmt_vec_info prev_stmt_info;
928686b1
RS
5295 poly_uint64 nunits_in;
5296 poly_uint64 nunits_out;
ebfd146a
IR
5297 tree vectype_out;
5298 int ncopies;
5299 int j, i;
6e1aa848
DN
5300 vec<tree> vec_oprnds0 = vNULL;
5301 vec<tree> vec_oprnds1 = vNULL;
5302 vec<tree> vec_oprnds2 = vNULL;
16949072 5303 tree vop0, vop1, vop2;
a70d6342 5304 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5305 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5306
a70d6342 5307 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5308 return false;
5309
66c16fd9
RB
5310 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5311 && ! vec_stmt)
ebfd146a
IR
5312 return false;
5313
5314 /* Is STMT a vectorizable binary/unary operation? */
5315 if (!is_gimple_assign (stmt))
5316 return false;
5317
5318 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5319 return false;
5320
0eb952ea 5321 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5322
1af4ebf5
MG
5323 /* For pointer addition and subtraction, we should use the normal
5324 plus and minus for the vector operation. */
ebfd146a
IR
5325 if (code == POINTER_PLUS_EXPR)
5326 code = PLUS_EXPR;
1af4ebf5
MG
5327 if (code == POINTER_DIFF_EXPR)
5328 code = MINUS_EXPR;
ebfd146a
IR
5329
5330 /* Support only unary or binary operations. */
5331 op_type = TREE_CODE_LENGTH (code);
16949072 5332 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5333 {
73fbfcad 5334 if (dump_enabled_p ())
78c60e3d 5335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5336 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5337 op_type);
ebfd146a
IR
5338 return false;
5339 }
5340
b690cc0f
RG
5341 scalar_dest = gimple_assign_lhs (stmt);
5342 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5343
7b7b1813
RG
5344 /* Most operations cannot handle bit-precision types without extra
5345 truncations. */
045c1278 5346 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5347 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5348 /* Exception are bitwise binary operations. */
5349 && code != BIT_IOR_EXPR
5350 && code != BIT_XOR_EXPR
5351 && code != BIT_AND_EXPR)
5352 {
73fbfcad 5353 if (dump_enabled_p ())
78c60e3d 5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5355 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5356 return false;
5357 }
5358
ebfd146a 5359 op0 = gimple_assign_rhs1 (stmt);
81c40241 5360 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5361 {
73fbfcad 5362 if (dump_enabled_p ())
78c60e3d 5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5364 "use not simple.\n");
ebfd146a
IR
5365 return false;
5366 }
b690cc0f
RG
5367 /* If op0 is an external or constant def use a vector type with
5368 the same size as the output vector type. */
5369 if (!vectype)
b036c6c5
IE
5370 {
5371 /* For boolean type we cannot determine vectype by
5372 invariant value (don't know whether it is a vector
5373 of booleans or vector of integers). We use output
5374 vectype because operations on boolean don't change
5375 type. */
2568d8a1 5376 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5377 {
2568d8a1 5378 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5379 {
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "not supported operation on bool value.\n");
5383 return false;
5384 }
5385 vectype = vectype_out;
5386 }
5387 else
5388 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5389 }
7d8930a0
IR
5390 if (vec_stmt)
5391 gcc_assert (vectype);
5392 if (!vectype)
5393 {
73fbfcad 5394 if (dump_enabled_p ())
7d8930a0 5395 {
78c60e3d
SS
5396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5397 "no vectype for scalar type ");
5398 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5399 TREE_TYPE (op0));
e645e942 5400 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5401 }
5402
5403 return false;
5404 }
b690cc0f
RG
5405
5406 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5407 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5408 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5409 return false;
ebfd146a 5410
16949072 5411 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5412 {
5413 op1 = gimple_assign_rhs2 (stmt);
81c40241 5414 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5415 {
73fbfcad 5416 if (dump_enabled_p ())
78c60e3d 5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5418 "use not simple.\n");
ebfd146a
IR
5419 return false;
5420 }
5421 }
16949072
RG
5422 if (op_type == ternary_op)
5423 {
5424 op2 = gimple_assign_rhs3 (stmt);
81c40241 5425 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5426 {
73fbfcad 5427 if (dump_enabled_p ())
78c60e3d 5428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5429 "use not simple.\n");
16949072
RG
5430 return false;
5431 }
5432 }
ebfd146a 5433
b690cc0f 5434 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5435 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5436 case of SLP. */
fce57248 5437 if (slp_node)
b690cc0f
RG
5438 ncopies = 1;
5439 else
e8f142e2 5440 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5441
5442 gcc_assert (ncopies >= 1);
5443
9dc3f7de 5444 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5445 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5446 || code == RROTATE_EXPR)
9dc3f7de 5447 return false;
ebfd146a 5448
ebfd146a 5449 /* Supportable by target? */
00f07b86
RH
5450
5451 vec_mode = TYPE_MODE (vectype);
5452 if (code == MULT_HIGHPART_EXPR)
523ba738 5453 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5454 else
5455 {
5456 optab = optab_for_tree_code (code, vectype, optab_default);
5457 if (!optab)
5deb57cb 5458 {
73fbfcad 5459 if (dump_enabled_p ())
78c60e3d 5460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5461 "no optab.\n");
00f07b86 5462 return false;
5deb57cb 5463 }
523ba738
RS
5464 target_support_p = (optab_handler (optab, vec_mode)
5465 != CODE_FOR_nothing);
5deb57cb
JJ
5466 }
5467
523ba738 5468 if (!target_support_p)
ebfd146a 5469 {
73fbfcad 5470 if (dump_enabled_p ())
78c60e3d 5471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5472 "op not supported by target.\n");
ebfd146a
IR
5473 /* Check only during analysis. */
5474 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb 5475 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5476 return false;
73fbfcad 5477 if (dump_enabled_p ())
e645e942
TJ
5478 dump_printf_loc (MSG_NOTE, vect_location,
5479 "proceeding using word mode.\n");
383d9c83
IR
5480 }
5481
4a00c761 5482 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5483 if (!VECTOR_MODE_P (vec_mode)
5484 && !vec_stmt
ca09abcb 5485 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5486 {
73fbfcad 5487 if (dump_enabled_p ())
78c60e3d 5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5489 "not worthwhile without SIMD support.\n");
e34842c6 5490 return false;
7d8930a0 5491 }
ebfd146a 5492
ebfd146a
IR
5493 if (!vec_stmt) /* transformation not required. */
5494 {
4a00c761 5495 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5496 if (dump_enabled_p ())
78c60e3d 5497 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5498 "=== vectorizable_operation ===\n");
4fc5ebf1 5499 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5500 return true;
5501 }
5502
67b8dbac 5503 /* Transform. */
ebfd146a 5504
73fbfcad 5505 if (dump_enabled_p ())
78c60e3d 5506 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5507 "transform binary/unary operation.\n");
383d9c83 5508
ebfd146a 5509 /* Handle def. */
00f07b86 5510 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5511
0eb952ea
JJ
5512 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5513 vectors with unsigned elements, but the result is signed. So, we
5514 need to compute the MINUS_EXPR into vectype temporary and
5515 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5516 tree vec_cvt_dest = NULL_TREE;
5517 if (orig_code == POINTER_DIFF_EXPR)
5518 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5519
ebfd146a
IR
5520 /* In case the vectorization factor (VF) is bigger than the number
5521 of elements that we can fit in a vectype (nunits), we have to generate
5522 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5523 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5524 from one copy of the vector stmt to the next, in the field
5525 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5526 stages to find the correct vector defs to be used when vectorizing
5527 stmts that use the defs of the current stmt. The example below
5528 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5529 we need to create 4 vectorized stmts):
5530
5531 before vectorization:
5532 RELATED_STMT VEC_STMT
5533 S1: x = memref - -
5534 S2: z = x + 1 - -
5535
5536 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5537 there):
5538 RELATED_STMT VEC_STMT
5539 VS1_0: vx0 = memref0 VS1_1 -
5540 VS1_1: vx1 = memref1 VS1_2 -
5541 VS1_2: vx2 = memref2 VS1_3 -
5542 VS1_3: vx3 = memref3 - -
5543 S1: x = load - VS1_0
5544 S2: z = x + 1 - -
5545
5546 step2: vectorize stmt S2 (done here):
5547 To vectorize stmt S2 we first need to find the relevant vector
5548 def for the first operand 'x'. This is, as usual, obtained from
5549 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5550 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5551 relevant vector def 'vx0'. Having found 'vx0' we can generate
5552 the vector stmt VS2_0, and as usual, record it in the
5553 STMT_VINFO_VEC_STMT of stmt S2.
5554 When creating the second copy (VS2_1), we obtain the relevant vector
5555 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5556 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5557 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5558 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5559 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5560 chain of stmts and pointers:
5561 RELATED_STMT VEC_STMT
5562 VS1_0: vx0 = memref0 VS1_1 -
5563 VS1_1: vx1 = memref1 VS1_2 -
5564 VS1_2: vx2 = memref2 VS1_3 -
5565 VS1_3: vx3 = memref3 - -
5566 S1: x = load - VS1_0
5567 VS2_0: vz0 = vx0 + v1 VS2_1 -
5568 VS2_1: vz1 = vx1 + v1 VS2_2 -
5569 VS2_2: vz2 = vx2 + v1 VS2_3 -
5570 VS2_3: vz3 = vx3 + v1 - -
5571 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5572
5573 prev_stmt_info = NULL;
5574 for (j = 0; j < ncopies; j++)
5575 {
5576 /* Handle uses. */
5577 if (j == 0)
4a00c761
JJ
5578 {
5579 if (op_type == binary_op || op_type == ternary_op)
5580 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5581 slp_node);
4a00c761
JJ
5582 else
5583 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5584 slp_node);
4a00c761 5585 if (op_type == ternary_op)
c392943c 5586 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5587 slp_node);
4a00c761 5588 }
ebfd146a 5589 else
4a00c761
JJ
5590 {
5591 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5592 if (op_type == ternary_op)
5593 {
9771b263
DN
5594 tree vec_oprnd = vec_oprnds2.pop ();
5595 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5596 vec_oprnd));
4a00c761
JJ
5597 }
5598 }
5599
5600 /* Arguments are ready. Create the new vector stmt. */
9771b263 5601 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5602 {
4a00c761 5603 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5604 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5605 vop2 = ((op_type == ternary_op)
9771b263 5606 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5607 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5608 new_temp = make_ssa_name (vec_dest, new_stmt);
5609 gimple_assign_set_lhs (new_stmt, new_temp);
5610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5611 if (vec_cvt_dest)
5612 {
5613 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5614 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5615 new_temp);
5616 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5617 gimple_assign_set_lhs (new_stmt, new_temp);
5618 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5619 }
4a00c761 5620 if (slp_node)
9771b263 5621 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5622 }
5623
4a00c761
JJ
5624 if (slp_node)
5625 continue;
5626
5627 if (j == 0)
5628 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5629 else
5630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5631 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5632 }
5633
9771b263
DN
5634 vec_oprnds0.release ();
5635 vec_oprnds1.release ();
5636 vec_oprnds2.release ();
ebfd146a 5637
ebfd146a
IR
5638 return true;
5639}
5640
f702e7d4 5641/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5642
5643static void
f702e7d4 5644ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5645{
5646 if (!dr->aux)
5647 return;
5648
52639a61 5649 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5650 {
52639a61 5651 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5652
f702e7d4
RS
5653 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5654
428f0c67 5655 if (decl_in_symtab_p (base_decl))
f702e7d4 5656 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5657 else
5658 {
f702e7d4 5659 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5660 DECL_USER_ALIGN (base_decl) = 1;
5661 }
52639a61 5662 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5663 }
5664}
5665
ebfd146a 5666
44fc7854
BE
5667/* Function get_group_alias_ptr_type.
5668
5669 Return the alias type for the group starting at FIRST_STMT. */
5670
5671static tree
5672get_group_alias_ptr_type (gimple *first_stmt)
5673{
5674 struct data_reference *first_dr, *next_dr;
5675 gimple *next_stmt;
5676
5677 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5678 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5679 while (next_stmt)
5680 {
5681 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5682 if (get_alias_set (DR_REF (first_dr))
5683 != get_alias_set (DR_REF (next_dr)))
5684 {
5685 if (dump_enabled_p ())
5686 dump_printf_loc (MSG_NOTE, vect_location,
5687 "conflicting alias set types.\n");
5688 return ptr_type_node;
5689 }
5690 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5691 }
5692 return reference_alias_ptr_type (DR_REF (first_dr));
5693}
5694
5695
ebfd146a
IR
5696/* Function vectorizable_store.
5697
b8698a0f
L
5698 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5699 can be vectorized.
5700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5703
5704static bool
355fe088 5705vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5706 slp_tree slp_node)
ebfd146a
IR
5707{
5708 tree scalar_dest;
5709 tree data_ref;
5710 tree op;
5711 tree vec_oprnd = NULL_TREE;
5712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5713 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5714 tree elem_type;
ebfd146a 5715 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5716 struct loop *loop = NULL;
ef4bddc2 5717 machine_mode vec_mode;
ebfd146a
IR
5718 tree dummy;
5719 enum dr_alignment_support alignment_support_scheme;
355fe088 5720 gimple *def_stmt;
ebfd146a
IR
5721 enum vect_def_type dt;
5722 stmt_vec_info prev_stmt_info = NULL;
5723 tree dataref_ptr = NULL_TREE;
74bf76ed 5724 tree dataref_offset = NULL_TREE;
355fe088 5725 gimple *ptr_incr = NULL;
ebfd146a
IR
5726 int ncopies;
5727 int j;
2de001ee
RS
5728 gimple *next_stmt, *first_stmt;
5729 bool grouped_store;
ebfd146a 5730 unsigned int group_size, i;
6e1aa848
DN
5731 vec<tree> oprnds = vNULL;
5732 vec<tree> result_chain = vNULL;
ebfd146a 5733 bool inv_p;
09dfa495 5734 tree offset = NULL_TREE;
6e1aa848 5735 vec<tree> vec_oprnds = vNULL;
ebfd146a 5736 bool slp = (slp_node != NULL);
ebfd146a 5737 unsigned int vec_num;
a70d6342 5738 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5739 vec_info *vinfo = stmt_info->vinfo;
272c6793 5740 tree aggr_type;
134c85ca 5741 gather_scatter_info gs_info;
3bab6342 5742 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5743 gimple *new_stmt;
d9f21f6a 5744 poly_uint64 vf;
2de001ee 5745 vec_load_store_type vls_type;
44fc7854 5746 tree ref_type;
a70d6342 5747
a70d6342 5748 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5749 return false;
5750
66c16fd9
RB
5751 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5752 && ! vec_stmt)
ebfd146a
IR
5753 return false;
5754
5755 /* Is vectorizable store? */
5756
5757 if (!is_gimple_assign (stmt))
5758 return false;
5759
5760 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5761 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5762 && is_pattern_stmt_p (stmt_info))
5763 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5764 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5765 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5766 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5767 && TREE_CODE (scalar_dest) != COMPONENT_REF
5768 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5769 && TREE_CODE (scalar_dest) != REALPART_EXPR
5770 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5771 return false;
5772
fce57248
RS
5773 /* Cannot have hybrid store SLP -- that would mean storing to the
5774 same location twice. */
5775 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5776
ebfd146a 5777 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5778
f4d09712 5779 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 5780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5781
5782 if (loop_vinfo)
b17dc4d4
RB
5783 {
5784 loop = LOOP_VINFO_LOOP (loop_vinfo);
5785 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5786 }
5787 else
5788 vf = 1;
465c8c19
JJ
5789
5790 /* Multiple types in SLP are handled by creating the appropriate number of
5791 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5792 case of SLP. */
fce57248 5793 if (slp)
465c8c19
JJ
5794 ncopies = 1;
5795 else
e8f142e2 5796 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5797
5798 gcc_assert (ncopies >= 1);
5799
5800 /* FORNOW. This restriction should be relaxed. */
5801 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5802 {
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5805 "multiple types in nested loop.\n");
5806 return false;
5807 }
5808
ebfd146a 5809 op = gimple_assign_rhs1 (stmt);
f4d09712 5810
2f391428 5811 /* In the case this is a store from a constant make sure
11a82e25 5812 native_encode_expr can handle it. */
2f391428 5813 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
11a82e25
RB
5814 return false;
5815
f4d09712 5816 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5817 {
73fbfcad 5818 if (dump_enabled_p ())
78c60e3d 5819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5820 "use not simple.\n");
ebfd146a
IR
5821 return false;
5822 }
5823
2de001ee
RS
5824 if (dt == vect_constant_def || dt == vect_external_def)
5825 vls_type = VLS_STORE_INVARIANT;
5826 else
5827 vls_type = VLS_STORE;
5828
f4d09712
KY
5829 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5830 return false;
5831
272c6793 5832 elem_type = TREE_TYPE (vectype);
ebfd146a 5833 vec_mode = TYPE_MODE (vectype);
7b7b1813 5834
ebfd146a
IR
5835 /* FORNOW. In some cases can vectorize even if data-type not supported
5836 (e.g. - array initialization with 0). */
947131ba 5837 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5838 return false;
5839
5840 if (!STMT_VINFO_DATA_REF (stmt_info))
5841 return false;
5842
2de001ee 5843 vect_memory_access_type memory_access_type;
62da9e14 5844 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
2de001ee
RS
5845 &memory_access_type, &gs_info))
5846 return false;
3bab6342 5847
ebfd146a
IR
5848 if (!vec_stmt) /* transformation not required. */
5849 {
2de001ee 5850 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 5851 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5852 /* The SLP costs are calculated during SLP analysis. */
5853 if (!PURE_SLP_STMT (stmt_info))
2de001ee 5854 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
2e8ab70c 5855 NULL, NULL, NULL);
ebfd146a
IR
5856 return true;
5857 }
2de001ee 5858 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5859
67b8dbac 5860 /* Transform. */
ebfd146a 5861
f702e7d4 5862 ensure_base_align (dr);
c716e67f 5863
2de001ee 5864 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342
AT
5865 {
5866 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
134c85ca 5867 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5868 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5869 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5870 edge pe = loop_preheader_edge (loop);
5871 gimple_seq seq;
5872 basic_block new_bb;
5873 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
5874 poly_uint64 scatter_off_nunits
5875 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 5876
4d694b27 5877 if (known_eq (nunits, scatter_off_nunits))
3bab6342 5878 modifier = NONE;
4d694b27 5879 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 5880 {
3bab6342
AT
5881 modifier = WIDEN;
5882
4d694b27
RS
5883 /* Currently gathers and scatters are only supported for
5884 fixed-length vectors. */
5885 unsigned int count = scatter_off_nunits.to_constant ();
5886 vec_perm_builder sel (count, count, 1);
5887 for (i = 0; i < (unsigned int) count; ++i)
5888 sel.quick_push (i | (count / 2));
3bab6342 5889
4d694b27 5890 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
5891 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5892 indices);
3bab6342
AT
5893 gcc_assert (perm_mask != NULL_TREE);
5894 }
4d694b27 5895 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 5896 {
3bab6342
AT
5897 modifier = NARROW;
5898
4d694b27
RS
5899 /* Currently gathers and scatters are only supported for
5900 fixed-length vectors. */
5901 unsigned int count = nunits.to_constant ();
5902 vec_perm_builder sel (count, count, 1);
5903 for (i = 0; i < (unsigned int) count; ++i)
5904 sel.quick_push (i | (count / 2));
3bab6342 5905
4d694b27 5906 vec_perm_indices indices (sel, 2, count);
e3342de4 5907 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5908 gcc_assert (perm_mask != NULL_TREE);
5909 ncopies *= 2;
5910 }
5911 else
5912 gcc_unreachable ();
5913
134c85ca 5914 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5915 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5916 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5917 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5918 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5919 scaletype = TREE_VALUE (arglist);
5920
5921 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5922 && TREE_CODE (rettype) == VOID_TYPE);
5923
134c85ca 5924 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
5925 if (!is_gimple_min_invariant (ptr))
5926 {
5927 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5928 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5929 gcc_assert (!new_bb);
5930 }
5931
5932 /* Currently we support only unconditional scatter stores,
5933 so mask should be all ones. */
5934 mask = build_int_cst (masktype, -1);
5935 mask = vect_init_vector (stmt, mask, masktype, NULL);
5936
134c85ca 5937 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
5938
5939 prev_stmt_info = NULL;
5940 for (j = 0; j < ncopies; ++j)
5941 {
5942 if (j == 0)
5943 {
5944 src = vec_oprnd1
81c40241 5945 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5946 op = vec_oprnd0
134c85ca 5947 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
5948 }
5949 else if (modifier != NONE && (j & 1))
5950 {
5951 if (modifier == WIDEN)
5952 {
5953 src = vec_oprnd1
5954 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5956 stmt, gsi);
5957 }
5958 else if (modifier == NARROW)
5959 {
5960 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5961 stmt, gsi);
5962 op = vec_oprnd0
134c85ca
RS
5963 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5964 vec_oprnd0);
3bab6342
AT
5965 }
5966 else
5967 gcc_unreachable ();
5968 }
5969 else
5970 {
5971 src = vec_oprnd1
5972 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5973 op = vec_oprnd0
134c85ca
RS
5974 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5975 vec_oprnd0);
3bab6342
AT
5976 }
5977
5978 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5979 {
928686b1
RS
5980 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
5981 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 5982 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5983 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5984 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986 src = var;
5987 }
5988
5989 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5990 {
928686b1
RS
5991 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
5992 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 5993 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5994 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5995 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5997 op = var;
5998 }
5999
6000 new_stmt
134c85ca 6001 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6002
6003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6004
6005 if (prev_stmt_info == NULL)
6006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6007 else
6008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6009 prev_stmt_info = vinfo_for_stmt (new_stmt);
6010 }
6011 return true;
6012 }
6013
2de001ee 6014 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
0d0293ac 6015 if (grouped_store)
ebfd146a 6016 {
2de001ee 6017 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 6018 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6019 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 6020
e14c1050 6021 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
6022
6023 /* FORNOW */
a70d6342 6024 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6025
6026 /* We vectorize all the stmts of the interleaving group when we
6027 reach the last stmt in the group. */
e14c1050
IR
6028 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6029 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6030 && !slp)
6031 {
6032 *vec_stmt = NULL;
6033 return true;
6034 }
6035
6036 if (slp)
4b5caab7 6037 {
0d0293ac 6038 grouped_store = false;
4b5caab7
IR
6039 /* VEC_NUM is the number of vect stmts to be created for this
6040 group. */
6041 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6042 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6043 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6044 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 6045 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 6046 }
ebfd146a 6047 else
4b5caab7
IR
6048 /* VEC_NUM is the number of vect stmts to be created for this
6049 group. */
ebfd146a 6050 vec_num = group_size;
44fc7854
BE
6051
6052 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6053 }
b8698a0f 6054 else
ebfd146a
IR
6055 {
6056 first_stmt = stmt;
6057 first_dr = dr;
6058 group_size = vec_num = 1;
44fc7854 6059 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a 6060 }
b8698a0f 6061
73fbfcad 6062 if (dump_enabled_p ())
78c60e3d 6063 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6064 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6065
2de001ee
RS
6066 if (memory_access_type == VMAT_ELEMENTWISE
6067 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6068 {
6069 gimple_stmt_iterator incr_gsi;
6070 bool insert_after;
355fe088 6071 gimple *incr;
f2e2a985
MM
6072 tree offvar;
6073 tree ivstep;
6074 tree running_off;
6075 gimple_seq stmts = NULL;
6076 tree stride_base, stride_step, alias_off;
6077 tree vec_oprnd;
f502d50e 6078 unsigned int g;
4d694b27
RS
6079 /* Checked by get_load_store_type. */
6080 unsigned int const_nunits = nunits.to_constant ();
f2e2a985
MM
6081
6082 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6083
6084 stride_base
6085 = fold_build_pointer_plus
f502d50e 6086 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6087 size_binop (PLUS_EXPR,
f502d50e 6088 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6089 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6090 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6091
6092 /* For a store with loop-invariant (but other than power-of-2)
6093 stride (i.e. not a grouped access) like so:
6094
6095 for (i = 0; i < n; i += stride)
6096 array[i] = ...;
6097
6098 we generate a new induction variable and new stores from
6099 the components of the (vectorized) rhs:
6100
6101 for (j = 0; ; j += VF*stride)
6102 vectemp = ...;
6103 tmp1 = vectemp[0];
6104 array[j] = tmp1;
6105 tmp2 = vectemp[1];
6106 array[j + stride] = tmp2;
6107 ...
6108 */
6109
4d694b27 6110 unsigned nstores = const_nunits;
b17dc4d4 6111 unsigned lnel = 1;
cee62fee 6112 tree ltype = elem_type;
04199738 6113 tree lvectype = vectype;
cee62fee
MM
6114 if (slp)
6115 {
4d694b27
RS
6116 if (group_size < const_nunits
6117 && const_nunits % group_size == 0)
b17dc4d4 6118 {
4d694b27 6119 nstores = const_nunits / group_size;
b17dc4d4
RB
6120 lnel = group_size;
6121 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6122 lvectype = vectype;
6123
6124 /* First check if vec_extract optab doesn't support extraction
6125 of vector elts directly. */
b397965c 6126 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6127 machine_mode vmode;
6128 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6129 || !VECTOR_MODE_P (vmode)
04199738
RB
6130 || (convert_optab_handler (vec_extract_optab,
6131 TYPE_MODE (vectype), vmode)
6132 == CODE_FOR_nothing))
6133 {
6134 /* Try to avoid emitting an extract of vector elements
6135 by performing the extracts using an integer type of the
6136 same size, extracting from a vector of those and then
6137 re-interpreting it as the original vector type if
6138 supported. */
6139 unsigned lsize
6140 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6141 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6142 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6143 /* If we can't construct such a vector fall back to
6144 element extracts from the original vector type and
6145 element size stores. */
4d694b27 6146 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6147 && VECTOR_MODE_P (vmode)
04199738
RB
6148 && (convert_optab_handler (vec_extract_optab,
6149 vmode, elmode)
6150 != CODE_FOR_nothing))
6151 {
4d694b27 6152 nstores = lnunits;
04199738
RB
6153 lnel = group_size;
6154 ltype = build_nonstandard_integer_type (lsize, 1);
6155 lvectype = build_vector_type (ltype, nstores);
6156 }
6157 /* Else fall back to vector extraction anyway.
6158 Fewer stores are more important than avoiding spilling
6159 of the vector we extract from. Compared to the
6160 construction case in vectorizable_load no store-forwarding
6161 issue exists here for reasonable archs. */
6162 }
b17dc4d4 6163 }
4d694b27
RS
6164 else if (group_size >= const_nunits
6165 && group_size % const_nunits == 0)
b17dc4d4
RB
6166 {
6167 nstores = 1;
4d694b27 6168 lnel = const_nunits;
b17dc4d4 6169 ltype = vectype;
04199738 6170 lvectype = vectype;
b17dc4d4 6171 }
cee62fee
MM
6172 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6173 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6174 }
6175
f2e2a985
MM
6176 ivstep = stride_step;
6177 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6178 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6179
6180 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6181
6182 create_iv (stride_base, ivstep, NULL,
6183 loop, &incr_gsi, insert_after,
6184 &offvar, NULL);
6185 incr = gsi_stmt (incr_gsi);
310213d4 6186 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6187
6188 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6189 if (stmts)
6190 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6191
6192 prev_stmt_info = NULL;
44fc7854 6193 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6194 next_stmt = first_stmt;
6195 for (g = 0; g < group_size; g++)
f2e2a985 6196 {
f502d50e
MM
6197 running_off = offvar;
6198 if (g)
f2e2a985 6199 {
f502d50e
MM
6200 tree size = TYPE_SIZE_UNIT (ltype);
6201 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6202 size);
f502d50e 6203 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6204 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6205 running_off, pos);
f2e2a985 6206 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6207 running_off = newoff;
f502d50e 6208 }
b17dc4d4
RB
6209 unsigned int group_el = 0;
6210 unsigned HOST_WIDE_INT
6211 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6212 for (j = 0; j < ncopies; j++)
6213 {
6214 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6215 and first_stmt == stmt. */
6216 if (j == 0)
6217 {
6218 if (slp)
6219 {
6220 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6221 slp_node);
f502d50e
MM
6222 vec_oprnd = vec_oprnds[0];
6223 }
6224 else
6225 {
6226 gcc_assert (gimple_assign_single_p (next_stmt));
6227 op = gimple_assign_rhs1 (next_stmt);
81c40241 6228 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6229 }
6230 }
f2e2a985 6231 else
f502d50e
MM
6232 {
6233 if (slp)
6234 vec_oprnd = vec_oprnds[j];
6235 else
c079cbac 6236 {
81c40241 6237 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6238 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6239 }
f502d50e 6240 }
04199738
RB
6241 /* Pun the vector to extract from if necessary. */
6242 if (lvectype != vectype)
6243 {
6244 tree tem = make_ssa_name (lvectype);
6245 gimple *pun
6246 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6247 lvectype, vec_oprnd));
6248 vect_finish_stmt_generation (stmt, pun, gsi);
6249 vec_oprnd = tem;
6250 }
f502d50e
MM
6251 for (i = 0; i < nstores; i++)
6252 {
6253 tree newref, newoff;
355fe088 6254 gimple *incr, *assign;
f502d50e
MM
6255 tree size = TYPE_SIZE (ltype);
6256 /* Extract the i'th component. */
6257 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6258 bitsize_int (i), size);
6259 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6260 size, pos);
6261
6262 elem = force_gimple_operand_gsi (gsi, elem, true,
6263 NULL_TREE, true,
6264 GSI_SAME_STMT);
6265
b17dc4d4
RB
6266 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6267 group_el * elsz);
f502d50e 6268 newref = build2 (MEM_REF, ltype,
b17dc4d4 6269 running_off, this_off);
f502d50e
MM
6270
6271 /* And store it to *running_off. */
6272 assign = gimple_build_assign (newref, elem);
6273 vect_finish_stmt_generation (stmt, assign, gsi);
6274
b17dc4d4
RB
6275 group_el += lnel;
6276 if (! slp
6277 || group_el == group_size)
6278 {
6279 newoff = copy_ssa_name (running_off, NULL);
6280 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6281 running_off, stride_step);
6282 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6283
b17dc4d4
RB
6284 running_off = newoff;
6285 group_el = 0;
6286 }
225ce44b
RB
6287 if (g == group_size - 1
6288 && !slp)
f502d50e
MM
6289 {
6290 if (j == 0 && i == 0)
225ce44b
RB
6291 STMT_VINFO_VEC_STMT (stmt_info)
6292 = *vec_stmt = assign;
f502d50e
MM
6293 else
6294 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6295 prev_stmt_info = vinfo_for_stmt (assign);
6296 }
6297 }
f2e2a985 6298 }
f502d50e 6299 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6300 if (slp)
6301 break;
f2e2a985 6302 }
778dd3b6
RB
6303
6304 vec_oprnds.release ();
f2e2a985
MM
6305 return true;
6306 }
6307
8c681247 6308 auto_vec<tree> dr_chain (group_size);
9771b263 6309 oprnds.create (group_size);
ebfd146a 6310
720f5239 6311 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6312 gcc_assert (alignment_support_scheme);
272c6793
RS
6313 /* Targets with store-lane instructions must not require explicit
6314 realignment. */
2de001ee 6315 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
6316 || alignment_support_scheme == dr_aligned
6317 || alignment_support_scheme == dr_unaligned_supported);
6318
62da9e14
RS
6319 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6320 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6321 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6322
2de001ee 6323 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6324 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6325 else
6326 aggr_type = vectype;
ebfd146a
IR
6327
6328 /* In case the vectorization factor (VF) is bigger than the number
6329 of elements that we can fit in a vectype (nunits), we have to generate
6330 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6331 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6332 vect_get_vec_def_for_copy_stmt. */
6333
0d0293ac 6334 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6335
6336 S1: &base + 2 = x2
6337 S2: &base = x0
6338 S3: &base + 1 = x1
6339 S4: &base + 3 = x3
6340
6341 We create vectorized stores starting from base address (the access of the
6342 first stmt in the chain (S2 in the above example), when the last store stmt
6343 of the chain (S4) is reached:
6344
6345 VS1: &base = vx2
6346 VS2: &base + vec_size*1 = vx0
6347 VS3: &base + vec_size*2 = vx1
6348 VS4: &base + vec_size*3 = vx3
6349
6350 Then permutation statements are generated:
6351
3fcc1b55
JJ
6352 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6353 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6354 ...
b8698a0f 6355
ebfd146a
IR
6356 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6357 (the order of the data-refs in the output of vect_permute_store_chain
6358 corresponds to the order of scalar stmts in the interleaving chain - see
6359 the documentation of vect_permute_store_chain()).
6360
6361 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6362 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6363 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6364 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6365 */
6366
6367 prev_stmt_info = NULL;
6368 for (j = 0; j < ncopies; j++)
6369 {
ebfd146a
IR
6370
6371 if (j == 0)
6372 {
6373 if (slp)
6374 {
6375 /* Get vectorized arguments for SLP_NODE. */
d092494c 6376 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6377 NULL, slp_node);
ebfd146a 6378
9771b263 6379 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6380 }
6381 else
6382 {
b8698a0f
L
6383 /* For interleaved stores we collect vectorized defs for all the
6384 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6385 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6386 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6387
0d0293ac 6388 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6389 OPRNDS are of size 1. */
b8698a0f 6390 next_stmt = first_stmt;
ebfd146a
IR
6391 for (i = 0; i < group_size; i++)
6392 {
b8698a0f
L
6393 /* Since gaps are not supported for interleaved stores,
6394 GROUP_SIZE is the exact number of stmts in the chain.
6395 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6396 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
6397 iteration of the loop will be executed. */
6398 gcc_assert (next_stmt
6399 && gimple_assign_single_p (next_stmt));
6400 op = gimple_assign_rhs1 (next_stmt);
6401
81c40241 6402 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6403 dr_chain.quick_push (vec_oprnd);
6404 oprnds.quick_push (vec_oprnd);
e14c1050 6405 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
6406 }
6407 }
6408
6409 /* We should have catched mismatched types earlier. */
6410 gcc_assert (useless_type_conversion_p (vectype,
6411 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6412 bool simd_lane_access_p
6413 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6414 if (simd_lane_access_p
6415 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6416 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6417 && integer_zerop (DR_OFFSET (first_dr))
6418 && integer_zerop (DR_INIT (first_dr))
6419 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6420 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6421 {
6422 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6423 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6424 inv_p = false;
74bf76ed
JJ
6425 }
6426 else
6427 dataref_ptr
6428 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6429 simd_lane_access_p ? loop : NULL,
09dfa495 6430 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6431 simd_lane_access_p, &inv_p);
a70d6342 6432 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6433 }
b8698a0f 6434 else
ebfd146a 6435 {
b8698a0f
L
6436 /* For interleaved stores we created vectorized defs for all the
6437 defs stored in OPRNDS in the previous iteration (previous copy).
6438 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6439 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6440 next copy.
0d0293ac 6441 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6442 OPRNDS are of size 1. */
6443 for (i = 0; i < group_size; i++)
6444 {
9771b263 6445 op = oprnds[i];
81c40241 6446 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6447 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6448 dr_chain[i] = vec_oprnd;
6449 oprnds[i] = vec_oprnd;
ebfd146a 6450 }
74bf76ed
JJ
6451 if (dataref_offset)
6452 dataref_offset
6453 = int_const_binop (PLUS_EXPR, dataref_offset,
6454 TYPE_SIZE_UNIT (aggr_type));
6455 else
6456 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6457 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6458 }
6459
2de001ee 6460 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6461 {
272c6793 6462 tree vec_array;
267d3070 6463
272c6793
RS
6464 /* Combine all the vectors into an array. */
6465 vec_array = create_vector_array (vectype, vec_num);
6466 for (i = 0; i < vec_num; i++)
c2d7ab2a 6467 {
9771b263 6468 vec_oprnd = dr_chain[i];
272c6793 6469 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6470 }
b8698a0f 6471
272c6793
RS
6472 /* Emit:
6473 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
44fc7854 6474 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
6475 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6476 vec_array);
6477 gimple_call_set_lhs (call, data_ref);
6478 gimple_call_set_nothrow (call, true);
6479 new_stmt = call;
267d3070 6480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6481 }
6482 else
6483 {
6484 new_stmt = NULL;
0d0293ac 6485 if (grouped_store)
272c6793 6486 {
b6b9227d
JJ
6487 if (j == 0)
6488 result_chain.create (group_size);
272c6793
RS
6489 /* Permute. */
6490 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6491 &result_chain);
6492 }
c2d7ab2a 6493
272c6793
RS
6494 next_stmt = first_stmt;
6495 for (i = 0; i < vec_num; i++)
6496 {
644ffefd 6497 unsigned align, misalign;
272c6793
RS
6498
6499 if (i > 0)
6500 /* Bump the vector pointer. */
6501 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6502 stmt, NULL_TREE);
6503
6504 if (slp)
9771b263 6505 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6506 else if (grouped_store)
6507 /* For grouped stores vectorized defs are interleaved in
272c6793 6508 vect_permute_store_chain(). */
9771b263 6509 vec_oprnd = result_chain[i];
272c6793 6510
69a2e8a1 6511 data_ref = fold_build2 (MEM_REF, vectype,
aed93b23
RB
6512 dataref_ptr,
6513 dataref_offset
6514 ? dataref_offset
44fc7854 6515 : build_int_cst (ref_type, 0));
f702e7d4 6516 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6517 if (aligned_access_p (first_dr))
644ffefd 6518 misalign = 0;
272c6793
RS
6519 else if (DR_MISALIGNMENT (first_dr) == -1)
6520 {
25f68d90 6521 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6522 misalign = 0;
272c6793
RS
6523 TREE_TYPE (data_ref)
6524 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6525 align * BITS_PER_UNIT);
272c6793
RS
6526 }
6527 else
6528 {
6529 TREE_TYPE (data_ref)
6530 = build_aligned_type (TREE_TYPE (data_ref),
6531 TYPE_ALIGN (elem_type));
644ffefd 6532 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6533 }
aed93b23
RB
6534 if (dataref_offset == NULL_TREE
6535 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6536 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6537 misalign);
c2d7ab2a 6538
62da9e14 6539 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6540 {
6541 tree perm_mask = perm_mask_for_reverse (vectype);
6542 tree perm_dest
6543 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6544 vectype);
b731b390 6545 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6546
6547 /* Generate the permute statement. */
355fe088 6548 gimple *perm_stmt
0d0e4a03
JJ
6549 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6550 vec_oprnd, perm_mask);
09dfa495
BM
6551 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6552
6553 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6554 vec_oprnd = new_temp;
6555 }
6556
272c6793
RS
6557 /* Arguments are ready. Create the new vector stmt. */
6558 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6560
6561 if (slp)
6562 continue;
6563
e14c1050 6564 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6565 if (!next_stmt)
6566 break;
6567 }
ebfd146a 6568 }
1da0876c
RS
6569 if (!slp)
6570 {
6571 if (j == 0)
6572 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6573 else
6574 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6575 prev_stmt_info = vinfo_for_stmt (new_stmt);
6576 }
ebfd146a
IR
6577 }
6578
9771b263
DN
6579 oprnds.release ();
6580 result_chain.release ();
6581 vec_oprnds.release ();
ebfd146a
IR
6582
6583 return true;
6584}
6585
557be5a8
AL
6586/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6587 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6588 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6589 vect_gen_perm_mask_checked. */
a1e53f3f 6590
3fcc1b55 6591tree
4aae3cb3 6592vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6593{
b00cb3bf 6594 tree mask_type;
a1e53f3f 6595
0ecc2b7d
RS
6596 poly_uint64 nunits = sel.length ();
6597 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
6598
6599 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 6600 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
6601}
6602
7ac7e286 6603/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6604 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6605
6606tree
4aae3cb3 6607vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6608{
7ac7e286 6609 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6610 return vect_gen_perm_mask_any (vectype, sel);
6611}
6612
aec7ae7d
JJ
6613/* Given a vector variable X and Y, that was generated for the scalar
6614 STMT, generate instructions to permute the vector elements of X and Y
6615 using permutation mask MASK_VEC, insert them at *GSI and return the
6616 permuted vector variable. */
a1e53f3f
L
6617
6618static tree
355fe088 6619permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6620 gimple_stmt_iterator *gsi)
a1e53f3f
L
6621{
6622 tree vectype = TREE_TYPE (x);
aec7ae7d 6623 tree perm_dest, data_ref;
355fe088 6624 gimple *perm_stmt;
a1e53f3f 6625
acdcd61b 6626 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6627 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6628
6629 /* Generate the permute statement. */
0d0e4a03 6630 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6631 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6632
6633 return data_ref;
6634}
6635
6b916b36
RB
6636/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6637 inserting them on the loops preheader edge. Returns true if we
6638 were successful in doing so (and thus STMT can be moved then),
6639 otherwise returns false. */
6640
6641static bool
355fe088 6642hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6643{
6644 ssa_op_iter i;
6645 tree op;
6646 bool any = false;
6647
6648 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6649 {
355fe088 6650 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6651 if (!gimple_nop_p (def_stmt)
6652 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6653 {
6654 /* Make sure we don't need to recurse. While we could do
6655 so in simple cases when there are more complex use webs
6656 we don't have an easy way to preserve stmt order to fulfil
6657 dependencies within them. */
6658 tree op2;
6659 ssa_op_iter i2;
d1417442
JJ
6660 if (gimple_code (def_stmt) == GIMPLE_PHI)
6661 return false;
6b916b36
RB
6662 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6663 {
355fe088 6664 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6665 if (!gimple_nop_p (def_stmt2)
6666 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6667 return false;
6668 }
6669 any = true;
6670 }
6671 }
6672
6673 if (!any)
6674 return true;
6675
6676 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6677 {
355fe088 6678 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6679 if (!gimple_nop_p (def_stmt)
6680 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6681 {
6682 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6683 gsi_remove (&gsi, false);
6684 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6685 }
6686 }
6687
6688 return true;
6689}
6690
ebfd146a
IR
6691/* vectorizable_load.
6692
b8698a0f
L
6693 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6694 can be vectorized.
6695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6698
6699static bool
355fe088 6700vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6701 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6702{
6703 tree scalar_dest;
6704 tree vec_dest = NULL;
6705 tree data_ref = NULL;
6706 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6707 stmt_vec_info prev_stmt_info;
ebfd146a 6708 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6709 struct loop *loop = NULL;
ebfd146a 6710 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6711 bool nested_in_vect_loop = false;
c716e67f 6712 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6713 tree elem_type;
ebfd146a 6714 tree new_temp;
ef4bddc2 6715 machine_mode mode;
355fe088 6716 gimple *new_stmt = NULL;
ebfd146a
IR
6717 tree dummy;
6718 enum dr_alignment_support alignment_support_scheme;
6719 tree dataref_ptr = NULL_TREE;
74bf76ed 6720 tree dataref_offset = NULL_TREE;
355fe088 6721 gimple *ptr_incr = NULL;
ebfd146a 6722 int ncopies;
4d694b27
RS
6723 int i, j;
6724 unsigned int group_size;
6725 poly_uint64 group_gap_adj;
ebfd146a
IR
6726 tree msq = NULL_TREE, lsq;
6727 tree offset = NULL_TREE;
356bbc4c 6728 tree byte_offset = NULL_TREE;
ebfd146a 6729 tree realignment_token = NULL_TREE;
538dd0b7 6730 gphi *phi = NULL;
6e1aa848 6731 vec<tree> dr_chain = vNULL;
0d0293ac 6732 bool grouped_load = false;
355fe088 6733 gimple *first_stmt;
4f0a0218 6734 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6735 bool inv_p;
6736 bool compute_in_loop = false;
6737 struct loop *at_loop;
6738 int vec_num;
6739 bool slp = (slp_node != NULL);
6740 bool slp_perm = false;
6741 enum tree_code code;
a70d6342 6742 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 6743 poly_uint64 vf;
272c6793 6744 tree aggr_type;
134c85ca 6745 gather_scatter_info gs_info;
310213d4 6746 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6747 tree ref_type;
a70d6342 6748
465c8c19
JJ
6749 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6750 return false;
6751
66c16fd9
RB
6752 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6753 && ! vec_stmt)
465c8c19
JJ
6754 return false;
6755
6756 /* Is vectorizable load? */
6757 if (!is_gimple_assign (stmt))
6758 return false;
6759
6760 scalar_dest = gimple_assign_lhs (stmt);
6761 if (TREE_CODE (scalar_dest) != SSA_NAME)
6762 return false;
6763
6764 code = gimple_assign_rhs_code (stmt);
6765 if (code != ARRAY_REF
6766 && code != BIT_FIELD_REF
6767 && code != INDIRECT_REF
6768 && code != COMPONENT_REF
6769 && code != IMAGPART_EXPR
6770 && code != REALPART_EXPR
6771 && code != MEM_REF
6772 && TREE_CODE_CLASS (code) != tcc_declaration)
6773 return false;
6774
6775 if (!STMT_VINFO_DATA_REF (stmt_info))
6776 return false;
6777
6778 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 6779 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 6780
a70d6342
IR
6781 if (loop_vinfo)
6782 {
6783 loop = LOOP_VINFO_LOOP (loop_vinfo);
6784 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6785 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6786 }
6787 else
3533e503 6788 vf = 1;
ebfd146a
IR
6789
6790 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6791 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6792 case of SLP. */
fce57248 6793 if (slp)
ebfd146a
IR
6794 ncopies = 1;
6795 else
e8f142e2 6796 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6797
6798 gcc_assert (ncopies >= 1);
6799
6800 /* FORNOW. This restriction should be relaxed. */
6801 if (nested_in_vect_loop && ncopies > 1)
6802 {
73fbfcad 6803 if (dump_enabled_p ())
78c60e3d 6804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6805 "multiple types in nested loop.\n");
ebfd146a
IR
6806 return false;
6807 }
6808
f2556b68
RB
6809 /* Invalidate assumptions made by dependence analysis when vectorization
6810 on the unrolled body effectively re-orders stmts. */
6811 if (ncopies > 1
6812 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6813 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6814 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6815 {
6816 if (dump_enabled_p ())
6817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6818 "cannot perform implicit CSE when unrolling "
6819 "with negative dependence distance\n");
6820 return false;
6821 }
6822
7b7b1813 6823 elem_type = TREE_TYPE (vectype);
947131ba 6824 mode = TYPE_MODE (vectype);
ebfd146a
IR
6825
6826 /* FORNOW. In some cases can vectorize even if data-type not supported
6827 (e.g. - data copies). */
947131ba 6828 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6829 {
73fbfcad 6830 if (dump_enabled_p ())
78c60e3d 6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6832 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6833 return false;
6834 }
6835
ebfd146a 6836 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6837 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6838 {
0d0293ac 6839 grouped_load = true;
ebfd146a 6840 /* FORNOW */
2de001ee
RS
6841 gcc_assert (!nested_in_vect_loop);
6842 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6843
e14c1050 6844 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 6845 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 6846
b1af7da6
RB
6847 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6848 slp_perm = true;
6849
f2556b68
RB
6850 /* Invalidate assumptions made by dependence analysis when vectorization
6851 on the unrolled body effectively re-orders stmts. */
6852 if (!PURE_SLP_STMT (stmt_info)
6853 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6854 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6855 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6856 {
6857 if (dump_enabled_p ())
6858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6859 "cannot perform implicit CSE when performing "
6860 "group loads with negative dependence distance\n");
6861 return false;
6862 }
96bb56b2
RB
6863
6864 /* Similarly when the stmt is a load that is both part of a SLP
6865 instance and a loop vectorized stmt via the same-dr mechanism
6866 we have to give up. */
6867 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6868 && (STMT_SLP_TYPE (stmt_info)
6869 != STMT_SLP_TYPE (vinfo_for_stmt
6870 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6871 {
6872 if (dump_enabled_p ())
6873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6874 "conflicting SLP types for CSEd load\n");
6875 return false;
6876 }
ebfd146a
IR
6877 }
6878
2de001ee 6879 vect_memory_access_type memory_access_type;
62da9e14 6880 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
2de001ee
RS
6881 &memory_access_type, &gs_info))
6882 return false;
a1e53f3f 6883
ebfd146a
IR
6884 if (!vec_stmt) /* transformation not required. */
6885 {
2de001ee
RS
6886 if (!slp)
6887 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 6888 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6889 /* The SLP costs are calculated during SLP analysis. */
6890 if (!PURE_SLP_STMT (stmt_info))
2de001ee 6891 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 6892 NULL, NULL, NULL);
ebfd146a
IR
6893 return true;
6894 }
6895
2de001ee
RS
6896 if (!slp)
6897 gcc_assert (memory_access_type
6898 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6899
73fbfcad 6900 if (dump_enabled_p ())
78c60e3d 6901 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6902 "transform load. ncopies = %d\n", ncopies);
ebfd146a 6903
67b8dbac 6904 /* Transform. */
ebfd146a 6905
f702e7d4 6906 ensure_base_align (dr);
c716e67f 6907
2de001ee 6908 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d
JJ
6909 {
6910 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 6911 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
aec7ae7d 6912 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6913 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6914 edge pe = loop_preheader_edge (loop);
6915 gimple_seq seq;
6916 basic_block new_bb;
6917 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6918 poly_uint64 gather_off_nunits
6919 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
aec7ae7d 6920
4d694b27 6921 if (known_eq (nunits, gather_off_nunits))
aec7ae7d 6922 modifier = NONE;
4d694b27 6923 else if (known_eq (nunits * 2, gather_off_nunits))
aec7ae7d 6924 {
aec7ae7d
JJ
6925 modifier = WIDEN;
6926
4d694b27
RS
6927 /* Currently widening gathers are only supported for
6928 fixed-length vectors. */
6929 int count = gather_off_nunits.to_constant ();
6930 vec_perm_builder sel (count, count, 1);
6931 for (i = 0; i < count; ++i)
6932 sel.quick_push (i | (count / 2));
aec7ae7d 6933
4d694b27 6934 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6935 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6936 indices);
aec7ae7d 6937 }
4d694b27 6938 else if (known_eq (nunits, gather_off_nunits * 2))
aec7ae7d 6939 {
aec7ae7d
JJ
6940 modifier = NARROW;
6941
4d694b27
RS
6942 /* Currently narrowing gathers are only supported for
6943 fixed-length vectors. */
6944 int count = nunits.to_constant ();
6945 vec_perm_builder sel (count, count, 1);
6946 for (i = 0; i < count; ++i)
6947 sel.quick_push (i < count / 2 ? i : i + count / 2);
aec7ae7d 6948
4d694b27 6949 vec_perm_indices indices (sel, 2, count);
e3342de4 6950 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
aec7ae7d
JJ
6951 ncopies *= 2;
6952 }
6953 else
6954 gcc_unreachable ();
6955
134c85ca 6956 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
aec7ae7d
JJ
6957 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6958 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6959 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6960 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6961 scaletype = TREE_VALUE (arglist);
d3c2fee0 6962 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6963
6964 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6965
134c85ca 6966 ptr = fold_convert (ptrtype, gs_info.base);
aec7ae7d
JJ
6967 if (!is_gimple_min_invariant (ptr))
6968 {
6969 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6970 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6971 gcc_assert (!new_bb);
6972 }
6973
6974 /* Currently we support only unconditional gather loads,
6975 so mask should be all ones. */
d3c2fee0
AI
6976 if (TREE_CODE (masktype) == INTEGER_TYPE)
6977 mask = build_int_cst (masktype, -1);
6978 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6979 {
6980 mask = build_int_cst (TREE_TYPE (masktype), -1);
6981 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6982 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6983 }
aec7ae7d
JJ
6984 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6985 {
6986 REAL_VALUE_TYPE r;
6987 long tmp[6];
6988 for (j = 0; j < 6; ++j)
6989 tmp[j] = -1;
6990 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6991 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6992 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6993 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6994 }
6995 else
6996 gcc_unreachable ();
aec7ae7d 6997
134c85ca 6998 scale = build_int_cst (scaletype, gs_info.scale);
aec7ae7d 6999
d3c2fee0
AI
7000 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
7001 merge = build_int_cst (TREE_TYPE (rettype), 0);
7002 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
7003 {
7004 REAL_VALUE_TYPE r;
7005 long tmp[6];
7006 for (j = 0; j < 6; ++j)
7007 tmp[j] = 0;
7008 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
7009 merge = build_real (TREE_TYPE (rettype), r);
7010 }
7011 else
7012 gcc_unreachable ();
7013 merge = build_vector_from_val (rettype, merge);
7014 merge = vect_init_vector (stmt, merge, rettype, NULL);
7015
aec7ae7d
JJ
7016 prev_stmt_info = NULL;
7017 for (j = 0; j < ncopies; ++j)
7018 {
7019 if (modifier == WIDEN && (j & 1))
7020 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
7021 perm_mask, stmt, gsi);
7022 else if (j == 0)
7023 op = vec_oprnd0
134c85ca 7024 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
aec7ae7d
JJ
7025 else
7026 op = vec_oprnd0
134c85ca 7027 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
aec7ae7d
JJ
7028
7029 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7030 {
928686b1
RS
7031 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7032 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 7033 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
7034 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7035 new_stmt
0d0e4a03 7036 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
7037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7038 op = var;
7039 }
7040
7041 new_stmt
134c85ca 7042 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
7043
7044 if (!useless_type_conversion_p (vectype, rettype))
7045 {
928686b1
RS
7046 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
7047 TYPE_VECTOR_SUBPARTS (rettype)));
0e22bb5a 7048 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
7049 gimple_call_set_lhs (new_stmt, op);
7050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 7051 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
7052 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
7053 new_stmt
0d0e4a03 7054 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
7055 }
7056 else
7057 {
7058 var = make_ssa_name (vec_dest, new_stmt);
7059 gimple_call_set_lhs (new_stmt, var);
7060 }
7061
7062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7063
7064 if (modifier == NARROW)
7065 {
7066 if ((j & 1) == 0)
7067 {
7068 prev_res = var;
7069 continue;
7070 }
7071 var = permute_vec_elements (prev_res, var,
7072 perm_mask, stmt, gsi);
7073 new_stmt = SSA_NAME_DEF_STMT (var);
7074 }
7075
7076 if (prev_stmt_info == NULL)
7077 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7078 else
7079 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7080 prev_stmt_info = vinfo_for_stmt (new_stmt);
7081 }
7082 return true;
7083 }
2de001ee
RS
7084
7085 if (memory_access_type == VMAT_ELEMENTWISE
7086 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7087 {
7088 gimple_stmt_iterator incr_gsi;
7089 bool insert_after;
355fe088 7090 gimple *incr;
7d75abc8 7091 tree offvar;
7d75abc8
MM
7092 tree ivstep;
7093 tree running_off;
9771b263 7094 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 7095 gimple_seq stmts = NULL;
14ac6aa2 7096 tree stride_base, stride_step, alias_off;
4d694b27
RS
7097 /* Checked by get_load_store_type. */
7098 unsigned int const_nunits = nunits.to_constant ();
14ac6aa2
RB
7099
7100 gcc_assert (!nested_in_vect_loop);
7d75abc8 7101
f502d50e 7102 if (slp && grouped_load)
44fc7854
BE
7103 {
7104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7105 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7106 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7107 ref_type = get_group_alias_ptr_type (first_stmt);
7108 }
ab313a8c 7109 else
44fc7854
BE
7110 {
7111 first_stmt = stmt;
7112 first_dr = dr;
7113 group_size = 1;
7114 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7115 }
ab313a8c 7116
14ac6aa2
RB
7117 stride_base
7118 = fold_build_pointer_plus
ab313a8c 7119 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7120 size_binop (PLUS_EXPR,
ab313a8c
RB
7121 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7122 convert_to_ptrofftype (DR_INIT (first_dr))));
7123 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7124
7125 /* For a load with loop-invariant (but other than power-of-2)
7126 stride (i.e. not a grouped access) like so:
7127
7128 for (i = 0; i < n; i += stride)
7129 ... = array[i];
7130
7131 we generate a new induction variable and new accesses to
7132 form a new vector (or vectors, depending on ncopies):
7133
7134 for (j = 0; ; j += VF*stride)
7135 tmp1 = array[j];
7136 tmp2 = array[j + stride];
7137 ...
7138 vectemp = {tmp1, tmp2, ...}
7139 */
7140
ab313a8c
RB
7141 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7142 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7143
7144 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7145
ab313a8c 7146 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7147 loop, &incr_gsi, insert_after,
7148 &offvar, NULL);
7149 incr = gsi_stmt (incr_gsi);
310213d4 7150 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7151
ab313a8c
RB
7152 stride_step = force_gimple_operand (unshare_expr (stride_step),
7153 &stmts, true, NULL_TREE);
7d75abc8
MM
7154 if (stmts)
7155 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7156
7157 prev_stmt_info = NULL;
7158 running_off = offvar;
44fc7854 7159 alias_off = build_int_cst (ref_type, 0);
4d694b27 7160 int nloads = const_nunits;
e09b4c37 7161 int lnel = 1;
7b5fc413 7162 tree ltype = TREE_TYPE (vectype);
ea60dd34 7163 tree lvectype = vectype;
b266b968 7164 auto_vec<tree> dr_chain;
2de001ee 7165 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7166 {
4d694b27 7167 if (group_size < const_nunits)
e09b4c37 7168 {
ff03930a
JJ
7169 /* First check if vec_init optab supports construction from
7170 vector elts directly. */
b397965c 7171 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7172 machine_mode vmode;
7173 if (mode_for_vector (elmode, group_size).exists (&vmode)
7174 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7175 && (convert_optab_handler (vec_init_optab,
7176 TYPE_MODE (vectype), vmode)
7177 != CODE_FOR_nothing))
ea60dd34 7178 {
4d694b27 7179 nloads = const_nunits / group_size;
ea60dd34 7180 lnel = group_size;
ff03930a
JJ
7181 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7182 }
7183 else
7184 {
7185 /* Otherwise avoid emitting a constructor of vector elements
7186 by performing the loads using an integer type of the same
7187 size, constructing a vector of those and then
7188 re-interpreting it as the original vector type.
7189 This avoids a huge runtime penalty due to the general
7190 inability to perform store forwarding from smaller stores
7191 to a larger load. */
7192 unsigned lsize
7193 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7194 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7195 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7196 /* If we can't construct such a vector fall back to
7197 element loads of the original vector type. */
4d694b27 7198 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7199 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7200 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7201 != CODE_FOR_nothing))
7202 {
4d694b27 7203 nloads = lnunits;
ff03930a
JJ
7204 lnel = group_size;
7205 ltype = build_nonstandard_integer_type (lsize, 1);
7206 lvectype = build_vector_type (ltype, nloads);
7207 }
ea60dd34 7208 }
e09b4c37 7209 }
2de001ee 7210 else
e09b4c37 7211 {
ea60dd34 7212 nloads = 1;
4d694b27 7213 lnel = const_nunits;
e09b4c37 7214 ltype = vectype;
e09b4c37 7215 }
2de001ee
RS
7216 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7217 }
7218 if (slp)
7219 {
66c16fd9
RB
7220 /* For SLP permutation support we need to load the whole group,
7221 not only the number of vector stmts the permutation result
7222 fits in. */
b266b968 7223 if (slp_perm)
66c16fd9 7224 {
d9f21f6a
RS
7225 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7226 variable VF. */
7227 unsigned int const_vf = vf.to_constant ();
4d694b27 7228 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7229 dr_chain.create (ncopies);
7230 }
7231 else
7232 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7233 }
4d694b27 7234 unsigned int group_el = 0;
e09b4c37
RB
7235 unsigned HOST_WIDE_INT
7236 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7237 for (j = 0; j < ncopies; j++)
7238 {
7b5fc413 7239 if (nloads > 1)
e09b4c37
RB
7240 vec_alloc (v, nloads);
7241 for (i = 0; i < nloads; i++)
7b5fc413 7242 {
e09b4c37
RB
7243 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7244 group_el * elsz);
7245 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7246 build2 (MEM_REF, ltype,
7247 running_off, this_off));
7248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7249 if (nloads > 1)
7250 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7251 gimple_assign_lhs (new_stmt));
7252
7253 group_el += lnel;
7254 if (! slp
7255 || group_el == group_size)
7b5fc413 7256 {
e09b4c37
RB
7257 tree newoff = copy_ssa_name (running_off);
7258 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7259 running_off, stride_step);
7b5fc413
RB
7260 vect_finish_stmt_generation (stmt, incr, gsi);
7261
7262 running_off = newoff;
e09b4c37 7263 group_el = 0;
7b5fc413 7264 }
7b5fc413 7265 }
e09b4c37 7266 if (nloads > 1)
7d75abc8 7267 {
ea60dd34
RB
7268 tree vec_inv = build_constructor (lvectype, v);
7269 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7270 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7271 if (lvectype != vectype)
7272 {
7273 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7274 VIEW_CONVERT_EXPR,
7275 build1 (VIEW_CONVERT_EXPR,
7276 vectype, new_temp));
7277 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7278 }
7d75abc8
MM
7279 }
7280
7b5fc413 7281 if (slp)
b266b968 7282 {
b266b968
RB
7283 if (slp_perm)
7284 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7285 else
7286 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7287 }
7d75abc8 7288 else
225ce44b
RB
7289 {
7290 if (j == 0)
7291 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7292 else
7293 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7294 prev_stmt_info = vinfo_for_stmt (new_stmt);
7295 }
7d75abc8 7296 }
b266b968 7297 if (slp_perm)
29afecdf
RB
7298 {
7299 unsigned n_perms;
7300 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7301 slp_node_instance, false, &n_perms);
7302 }
7d75abc8
MM
7303 return true;
7304 }
aec7ae7d 7305
0d0293ac 7306 if (grouped_load)
ebfd146a 7307 {
e14c1050 7308 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7309 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7310 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7311 without permutation. */
7312 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7313 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7314 /* For BB vectorization always use the first stmt to base
7315 the data ref pointer on. */
7316 if (bb_vinfo)
7317 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7318
ebfd146a 7319 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7320 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7321 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7322 ??? But we can only do so if there is exactly one
7323 as we have no way to get at the rest. Leave the CSE
7324 opportunity alone.
7325 ??? With the group load eventually participating
7326 in multiple different permutations (having multiple
7327 slp nodes which refer to the same group) the CSE
7328 is even wrong code. See PR56270. */
7329 && !slp)
ebfd146a
IR
7330 {
7331 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7332 return true;
7333 }
7334 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7335 group_gap_adj = 0;
ebfd146a
IR
7336
7337 /* VEC_NUM is the number of vect stmts to be created for this group. */
7338 if (slp)
7339 {
0d0293ac 7340 grouped_load = false;
91ff1504
RB
7341 /* For SLP permutation support we need to load the whole group,
7342 not only the number of vector stmts the permutation result
7343 fits in. */
7344 if (slp_perm)
b267968e 7345 {
d9f21f6a
RS
7346 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7347 variable VF. */
7348 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7349 unsigned int const_nunits = nunits.to_constant ();
7350 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7351 group_gap_adj = vf * group_size - nunits * vec_num;
7352 }
91ff1504 7353 else
b267968e
RB
7354 {
7355 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7356 group_gap_adj
7357 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7358 }
a70d6342 7359 }
ebfd146a 7360 else
9b999e8c 7361 vec_num = group_size;
44fc7854
BE
7362
7363 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7364 }
7365 else
7366 {
7367 first_stmt = stmt;
7368 first_dr = dr;
7369 group_size = vec_num = 1;
9b999e8c 7370 group_gap_adj = 0;
44fc7854 7371 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7372 }
7373
720f5239 7374 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7375 gcc_assert (alignment_support_scheme);
272c6793
RS
7376 /* Targets with load-lane instructions must not require explicit
7377 realignment. */
2de001ee 7378 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
7379 || alignment_support_scheme == dr_aligned
7380 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7381
7382 /* In case the vectorization factor (VF) is bigger than the number
7383 of elements that we can fit in a vectype (nunits), we have to generate
7384 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7385 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7386 from one copy of the vector stmt to the next, in the field
ff802fa1 7387 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7388 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7389 stmts that use the defs of the current stmt. The example below
7390 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7391 need to create 4 vectorized stmts):
ebfd146a
IR
7392
7393 before vectorization:
7394 RELATED_STMT VEC_STMT
7395 S1: x = memref - -
7396 S2: z = x + 1 - -
7397
7398 step 1: vectorize stmt S1:
7399 We first create the vector stmt VS1_0, and, as usual, record a
7400 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7401 Next, we create the vector stmt VS1_1, and record a pointer to
7402 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7403 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7404 stmts and pointers:
7405 RELATED_STMT VEC_STMT
7406 VS1_0: vx0 = memref0 VS1_1 -
7407 VS1_1: vx1 = memref1 VS1_2 -
7408 VS1_2: vx2 = memref2 VS1_3 -
7409 VS1_3: vx3 = memref3 - -
7410 S1: x = load - VS1_0
7411 S2: z = x + 1 - -
7412
b8698a0f
L
7413 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7414 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7415 stmt S2. */
7416
0d0293ac 7417 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7418
7419 S1: x2 = &base + 2
7420 S2: x0 = &base
7421 S3: x1 = &base + 1
7422 S4: x3 = &base + 3
7423
b8698a0f 7424 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7425 starting from the access of the first stmt of the chain:
7426
7427 VS1: vx0 = &base
7428 VS2: vx1 = &base + vec_size*1
7429 VS3: vx3 = &base + vec_size*2
7430 VS4: vx4 = &base + vec_size*3
7431
7432 Then permutation statements are generated:
7433
e2c83630
RH
7434 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7435 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7436 ...
7437
7438 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7439 (the order of the data-refs in the output of vect_permute_load_chain
7440 corresponds to the order of scalar stmts in the interleaving chain - see
7441 the documentation of vect_permute_load_chain()).
7442 The generation of permutation stmts and recording them in
0d0293ac 7443 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7444
b8698a0f 7445 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7446 permutation stmts above are created for every copy. The result vector
7447 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7448 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7449
7450 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7451 on a target that supports unaligned accesses (dr_unaligned_supported)
7452 we generate the following code:
7453 p = initial_addr;
7454 indx = 0;
7455 loop {
7456 p = p + indx * vectype_size;
7457 vec_dest = *(p);
7458 indx = indx + 1;
7459 }
7460
7461 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7462 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7463 then generate the following code, in which the data in each iteration is
7464 obtained by two vector loads, one from the previous iteration, and one
7465 from the current iteration:
7466 p1 = initial_addr;
7467 msq_init = *(floor(p1))
7468 p2 = initial_addr + VS - 1;
7469 realignment_token = call target_builtin;
7470 indx = 0;
7471 loop {
7472 p2 = p2 + indx * vectype_size
7473 lsq = *(floor(p2))
7474 vec_dest = realign_load (msq, lsq, realignment_token)
7475 indx = indx + 1;
7476 msq = lsq;
7477 } */
7478
7479 /* If the misalignment remains the same throughout the execution of the
7480 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7481 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7482 This can only occur when vectorizing memory accesses in the inner-loop
7483 nested within an outer-loop that is being vectorized. */
7484
d1e4b493 7485 if (nested_in_vect_loop
832b4117 7486 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
ebfd146a
IR
7487 {
7488 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7489 compute_in_loop = true;
7490 }
7491
7492 if ((alignment_support_scheme == dr_explicit_realign_optimized
7493 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7494 && !compute_in_loop)
ebfd146a
IR
7495 {
7496 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7497 alignment_support_scheme, NULL_TREE,
7498 &at_loop);
7499 if (alignment_support_scheme == dr_explicit_realign_optimized)
7500 {
538dd0b7 7501 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7502 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7503 size_one_node);
ebfd146a
IR
7504 }
7505 }
7506 else
7507 at_loop = loop;
7508
62da9e14 7509 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7510 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7511
2de001ee 7512 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7513 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7514 else
7515 aggr_type = vectype;
7516
ebfd146a 7517 prev_stmt_info = NULL;
4d694b27 7518 poly_uint64 group_elt = 0;
ebfd146a 7519 for (j = 0; j < ncopies; j++)
b8698a0f 7520 {
272c6793 7521 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7522 if (j == 0)
74bf76ed
JJ
7523 {
7524 bool simd_lane_access_p
7525 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7526 if (simd_lane_access_p
7527 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7528 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7529 && integer_zerop (DR_OFFSET (first_dr))
7530 && integer_zerop (DR_INIT (first_dr))
7531 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7532 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7533 && (alignment_support_scheme == dr_aligned
7534 || alignment_support_scheme == dr_unaligned_supported))
7535 {
7536 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7537 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7538 inv_p = false;
74bf76ed 7539 }
4f0a0218
RB
7540 else if (first_stmt_for_drptr
7541 && first_stmt != first_stmt_for_drptr)
7542 {
7543 dataref_ptr
7544 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7545 at_loop, offset, &dummy, gsi,
7546 &ptr_incr, simd_lane_access_p,
7547 &inv_p, byte_offset);
7548 /* Adjust the pointer by the difference to first_stmt. */
7549 data_reference_p ptrdr
7550 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7551 tree diff = fold_convert (sizetype,
7552 size_binop (MINUS_EXPR,
7553 DR_INIT (first_dr),
7554 DR_INIT (ptrdr)));
7555 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7556 stmt, diff);
7557 }
74bf76ed
JJ
7558 else
7559 dataref_ptr
7560 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7561 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7562 simd_lane_access_p, &inv_p,
7563 byte_offset);
74bf76ed
JJ
7564 }
7565 else if (dataref_offset)
7566 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7567 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7568 else
272c6793
RS
7569 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7570 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7571
0d0293ac 7572 if (grouped_load || slp_perm)
9771b263 7573 dr_chain.create (vec_num);
5ce1ee7f 7574
2de001ee 7575 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7576 {
272c6793
RS
7577 tree vec_array;
7578
7579 vec_array = create_vector_array (vectype, vec_num);
7580
7581 /* Emit:
7582 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
44fc7854 7583 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
7584 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7585 data_ref);
7586 gimple_call_set_lhs (call, vec_array);
7587 gimple_call_set_nothrow (call, true);
7588 new_stmt = call;
272c6793 7589 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7590
272c6793
RS
7591 /* Extract each vector into an SSA_NAME. */
7592 for (i = 0; i < vec_num; i++)
ebfd146a 7593 {
272c6793
RS
7594 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7595 vec_array, i);
9771b263 7596 dr_chain.quick_push (new_temp);
272c6793
RS
7597 }
7598
7599 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7600 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7601 }
7602 else
7603 {
7604 for (i = 0; i < vec_num; i++)
7605 {
7606 if (i > 0)
7607 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7608 stmt, NULL_TREE);
7609
7610 /* 2. Create the vector-load in the loop. */
7611 switch (alignment_support_scheme)
7612 {
7613 case dr_aligned:
7614 case dr_unaligned_supported:
be1ac4ec 7615 {
644ffefd
MJ
7616 unsigned int align, misalign;
7617
272c6793 7618 data_ref
aed93b23
RB
7619 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7620 dataref_offset
7621 ? dataref_offset
44fc7854 7622 : build_int_cst (ref_type, 0));
f702e7d4 7623 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7624 if (alignment_support_scheme == dr_aligned)
7625 {
7626 gcc_assert (aligned_access_p (first_dr));
644ffefd 7627 misalign = 0;
272c6793
RS
7628 }
7629 else if (DR_MISALIGNMENT (first_dr) == -1)
7630 {
25f68d90 7631 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7632 misalign = 0;
272c6793
RS
7633 TREE_TYPE (data_ref)
7634 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7635 align * BITS_PER_UNIT);
272c6793
RS
7636 }
7637 else
7638 {
7639 TREE_TYPE (data_ref)
7640 = build_aligned_type (TREE_TYPE (data_ref),
7641 TYPE_ALIGN (elem_type));
644ffefd 7642 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7643 }
aed93b23
RB
7644 if (dataref_offset == NULL_TREE
7645 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7646 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7647 align, misalign);
272c6793 7648 break;
be1ac4ec 7649 }
272c6793 7650 case dr_explicit_realign:
267d3070 7651 {
272c6793 7652 tree ptr, bump;
272c6793 7653
d88981fc 7654 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7655
7656 if (compute_in_loop)
7657 msq = vect_setup_realignment (first_stmt, gsi,
7658 &realignment_token,
7659 dr_explicit_realign,
7660 dataref_ptr, NULL);
7661
aed93b23
RB
7662 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7663 ptr = copy_ssa_name (dataref_ptr);
7664 else
7665 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7666 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7667 new_stmt = gimple_build_assign
7668 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7669 build_int_cst
7670 (TREE_TYPE (dataref_ptr),
f702e7d4 7671 -(HOST_WIDE_INT) align));
272c6793
RS
7672 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7673 data_ref
7674 = build2 (MEM_REF, vectype, ptr,
44fc7854 7675 build_int_cst (ref_type, 0));
272c6793
RS
7676 vec_dest = vect_create_destination_var (scalar_dest,
7677 vectype);
7678 new_stmt = gimple_build_assign (vec_dest, data_ref);
7679 new_temp = make_ssa_name (vec_dest, new_stmt);
7680 gimple_assign_set_lhs (new_stmt, new_temp);
7681 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7682 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7683 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7684 msq = new_temp;
7685
d88981fc 7686 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7687 TYPE_SIZE_UNIT (elem_type));
d88981fc 7688 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7689 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7690 new_stmt = gimple_build_assign
7691 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7692 build_int_cst
f702e7d4 7693 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7694 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7695 gimple_assign_set_lhs (new_stmt, ptr);
7696 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7697 data_ref
7698 = build2 (MEM_REF, vectype, ptr,
44fc7854 7699 build_int_cst (ref_type, 0));
272c6793 7700 break;
267d3070 7701 }
272c6793 7702 case dr_explicit_realign_optimized:
f702e7d4
RS
7703 {
7704 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7705 new_temp = copy_ssa_name (dataref_ptr);
7706 else
7707 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7708 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7709 new_stmt = gimple_build_assign
7710 (new_temp, BIT_AND_EXPR, dataref_ptr,
7711 build_int_cst (TREE_TYPE (dataref_ptr),
7712 -(HOST_WIDE_INT) align));
7713 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7714 data_ref
7715 = build2 (MEM_REF, vectype, new_temp,
7716 build_int_cst (ref_type, 0));
7717 break;
7718 }
272c6793
RS
7719 default:
7720 gcc_unreachable ();
7721 }
ebfd146a 7722 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7723 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7724 new_temp = make_ssa_name (vec_dest, new_stmt);
7725 gimple_assign_set_lhs (new_stmt, new_temp);
7726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7727
272c6793
RS
7728 /* 3. Handle explicit realignment if necessary/supported.
7729 Create in loop:
7730 vec_dest = realign_load (msq, lsq, realignment_token) */
7731 if (alignment_support_scheme == dr_explicit_realign_optimized
7732 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7733 {
272c6793
RS
7734 lsq = gimple_assign_lhs (new_stmt);
7735 if (!realignment_token)
7736 realignment_token = dataref_ptr;
7737 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7738 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7739 msq, lsq, realignment_token);
272c6793
RS
7740 new_temp = make_ssa_name (vec_dest, new_stmt);
7741 gimple_assign_set_lhs (new_stmt, new_temp);
7742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7743
7744 if (alignment_support_scheme == dr_explicit_realign_optimized)
7745 {
7746 gcc_assert (phi);
7747 if (i == vec_num - 1 && j == ncopies - 1)
7748 add_phi_arg (phi, lsq,
7749 loop_latch_edge (containing_loop),
9e227d60 7750 UNKNOWN_LOCATION);
272c6793
RS
7751 msq = lsq;
7752 }
ebfd146a 7753 }
ebfd146a 7754
59fd17e3
RB
7755 /* 4. Handle invariant-load. */
7756 if (inv_p && !bb_vinfo)
7757 {
59fd17e3 7758 gcc_assert (!grouped_load);
d1417442
JJ
7759 /* If we have versioned for aliasing or the loop doesn't
7760 have any data dependencies that would preclude this,
7761 then we are sure this is a loop invariant load and
7762 thus we can insert it on the preheader edge. */
7763 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7764 && !nested_in_vect_loop
6b916b36 7765 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7766 {
7767 if (dump_enabled_p ())
7768 {
7769 dump_printf_loc (MSG_NOTE, vect_location,
7770 "hoisting out of the vectorized "
7771 "loop: ");
7772 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7773 }
b731b390 7774 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7775 gsi_insert_on_edge_immediate
7776 (loop_preheader_edge (loop),
7777 gimple_build_assign (tem,
7778 unshare_expr
7779 (gimple_assign_rhs1 (stmt))));
7780 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7781 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7782 set_vinfo_for_stmt (new_stmt,
7783 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7784 }
7785 else
7786 {
7787 gimple_stmt_iterator gsi2 = *gsi;
7788 gsi_next (&gsi2);
7789 new_temp = vect_init_vector (stmt, scalar_dest,
7790 vectype, &gsi2);
34cd48e5 7791 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7792 }
59fd17e3
RB
7793 }
7794
62da9e14 7795 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7796 {
aec7ae7d
JJ
7797 tree perm_mask = perm_mask_for_reverse (vectype);
7798 new_temp = permute_vec_elements (new_temp, new_temp,
7799 perm_mask, stmt, gsi);
ebfd146a
IR
7800 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7801 }
267d3070 7802
272c6793 7803 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7804 vect_transform_grouped_load (). */
7805 if (grouped_load || slp_perm)
9771b263 7806 dr_chain.quick_push (new_temp);
267d3070 7807
272c6793
RS
7808 /* Store vector loads in the corresponding SLP_NODE. */
7809 if (slp && !slp_perm)
9771b263 7810 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7811
7812 /* With SLP permutation we load the gaps as well, without
7813 we need to skip the gaps after we manage to fully load
7814 all elements. group_gap_adj is GROUP_SIZE here. */
7815 group_elt += nunits;
d9f21f6a
RS
7816 if (maybe_ne (group_gap_adj, 0U)
7817 && !slp_perm
7818 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 7819 {
d9f21f6a
RS
7820 poly_wide_int bump_val
7821 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7822 * group_gap_adj);
8e6cdc90 7823 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7824 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7825 stmt, bump);
7826 group_elt = 0;
7827 }
272c6793 7828 }
9b999e8c
RB
7829 /* Bump the vector pointer to account for a gap or for excess
7830 elements loaded for a permuted SLP load. */
d9f21f6a 7831 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 7832 {
d9f21f6a
RS
7833 poly_wide_int bump_val
7834 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7835 * group_gap_adj);
8e6cdc90 7836 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7837 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7838 stmt, bump);
7839 }
ebfd146a
IR
7840 }
7841
7842 if (slp && !slp_perm)
7843 continue;
7844
7845 if (slp_perm)
7846 {
29afecdf 7847 unsigned n_perms;
01d8bf07 7848 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7849 slp_node_instance, false,
7850 &n_perms))
ebfd146a 7851 {
9771b263 7852 dr_chain.release ();
ebfd146a
IR
7853 return false;
7854 }
7855 }
7856 else
7857 {
0d0293ac 7858 if (grouped_load)
ebfd146a 7859 {
2de001ee 7860 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7861 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7862 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7863 }
7864 else
7865 {
7866 if (j == 0)
7867 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7868 else
7869 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7870 prev_stmt_info = vinfo_for_stmt (new_stmt);
7871 }
7872 }
9771b263 7873 dr_chain.release ();
ebfd146a
IR
7874 }
7875
ebfd146a
IR
7876 return true;
7877}
7878
7879/* Function vect_is_simple_cond.
b8698a0f 7880
ebfd146a
IR
7881 Input:
7882 LOOP - the loop that is being vectorized.
7883 COND - Condition that is checked for simple use.
7884
e9e1d143
RG
7885 Output:
7886 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7887 *DTS - The def types for the arguments of the comparison
e9e1d143 7888
ebfd146a
IR
7889 Returns whether a COND can be vectorized. Checks whether
7890 condition operands are supportable using vec_is_simple_use. */
7891
87aab9b2 7892static bool
4fc5ebf1 7893vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7894 tree *comp_vectype, enum vect_def_type *dts,
7895 tree vectype)
ebfd146a
IR
7896{
7897 tree lhs, rhs;
e9e1d143 7898 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7899
a414c77f
IE
7900 /* Mask case. */
7901 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7902 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7903 {
7904 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7905 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 7906 &dts[0], comp_vectype)
a414c77f
IE
7907 || !*comp_vectype
7908 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7909 return false;
7910 return true;
7911 }
7912
ebfd146a
IR
7913 if (!COMPARISON_CLASS_P (cond))
7914 return false;
7915
7916 lhs = TREE_OPERAND (cond, 0);
7917 rhs = TREE_OPERAND (cond, 1);
7918
7919 if (TREE_CODE (lhs) == SSA_NAME)
7920 {
355fe088 7921 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 7922 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
7923 return false;
7924 }
4fc5ebf1
JG
7925 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7926 || TREE_CODE (lhs) == FIXED_CST)
7927 dts[0] = vect_constant_def;
7928 else
ebfd146a
IR
7929 return false;
7930
7931 if (TREE_CODE (rhs) == SSA_NAME)
7932 {
355fe088 7933 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 7934 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
7935 return false;
7936 }
4fc5ebf1
JG
7937 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7938 || TREE_CODE (rhs) == FIXED_CST)
7939 dts[1] = vect_constant_def;
7940 else
ebfd146a
IR
7941 return false;
7942
28b33016 7943 if (vectype1 && vectype2
928686b1
RS
7944 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
7945 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
7946 return false;
7947
e9e1d143 7948 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
7949 /* Invariant comparison. */
7950 if (! *comp_vectype)
7951 {
7952 tree scalar_type = TREE_TYPE (lhs);
7953 /* If we can widen the comparison to match vectype do so. */
7954 if (INTEGRAL_TYPE_P (scalar_type)
7955 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7956 TYPE_SIZE (TREE_TYPE (vectype))))
7957 scalar_type = build_nonstandard_integer_type
7958 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7959 TYPE_UNSIGNED (scalar_type));
7960 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7961 }
7962
ebfd146a
IR
7963 return true;
7964}
7965
7966/* vectorizable_condition.
7967
b8698a0f
L
7968 Check if STMT is conditional modify expression that can be vectorized.
7969 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7970 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7971 at GSI.
7972
7973 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7974 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7975 else clause if it is 2).
ebfd146a
IR
7976
7977 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7978
4bbe8262 7979bool
355fe088
TS
7980vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7981 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7982 slp_tree slp_node)
ebfd146a
IR
7983{
7984 tree scalar_dest = NULL_TREE;
7985 tree vec_dest = NULL_TREE;
01216d27
JJ
7986 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7987 tree then_clause, else_clause;
ebfd146a 7988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7989 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7990 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7991 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7992 tree vec_compare;
ebfd146a
IR
7993 tree new_temp;
7994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
7995 enum vect_def_type dts[4]
7996 = {vect_unknown_def_type, vect_unknown_def_type,
7997 vect_unknown_def_type, vect_unknown_def_type};
7998 int ndts = 4;
f7e531cf 7999 int ncopies;
01216d27 8000 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8001 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8002 int i, j;
8003 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8004 vec<tree> vec_oprnds0 = vNULL;
8005 vec<tree> vec_oprnds1 = vNULL;
8006 vec<tree> vec_oprnds2 = vNULL;
8007 vec<tree> vec_oprnds3 = vNULL;
74946978 8008 tree vec_cmp_type;
a414c77f 8009 bool masked = false;
b8698a0f 8010
f7e531cf
IR
8011 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8012 return false;
8013
af29617a
AH
8014 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
8015 {
8016 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8017 return false;
ebfd146a 8018
af29617a
AH
8019 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8020 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8021 && reduc_def))
8022 return false;
ebfd146a 8023
af29617a
AH
8024 /* FORNOW: not yet supported. */
8025 if (STMT_VINFO_LIVE_P (stmt_info))
8026 {
8027 if (dump_enabled_p ())
8028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8029 "value used after loop.\n");
8030 return false;
8031 }
ebfd146a
IR
8032 }
8033
8034 /* Is vectorizable conditional operation? */
8035 if (!is_gimple_assign (stmt))
8036 return false;
8037
8038 code = gimple_assign_rhs_code (stmt);
8039
8040 if (code != COND_EXPR)
8041 return false;
8042
465c8c19 8043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8044 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8045
fce57248 8046 if (slp_node)
465c8c19
JJ
8047 ncopies = 1;
8048 else
e8f142e2 8049 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8050
8051 gcc_assert (ncopies >= 1);
8052 if (reduc_index && ncopies > 1)
8053 return false; /* FORNOW */
8054
4e71066d
RG
8055 cond_expr = gimple_assign_rhs1 (stmt);
8056 then_clause = gimple_assign_rhs2 (stmt);
8057 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8058
4fc5ebf1 8059 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8060 &comp_vectype, &dts[0], vectype)
e9e1d143 8061 || !comp_vectype)
ebfd146a
IR
8062 return false;
8063
81c40241 8064 gimple *def_stmt;
4fc5ebf1 8065 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8066 &vectype1))
8067 return false;
4fc5ebf1 8068 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8069 &vectype2))
ebfd146a 8070 return false;
2947d3b2
IE
8071
8072 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8073 return false;
8074
8075 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8076 return false;
8077
28b33016
IE
8078 masked = !COMPARISON_CLASS_P (cond_expr);
8079 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8080
74946978
MP
8081 if (vec_cmp_type == NULL_TREE)
8082 return false;
784fb9b3 8083
01216d27
JJ
8084 cond_code = TREE_CODE (cond_expr);
8085 if (!masked)
8086 {
8087 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8088 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8089 }
8090
8091 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8092 {
8093 /* Boolean values may have another representation in vectors
8094 and therefore we prefer bit operations over comparison for
8095 them (which also works for scalar masks). We store opcodes
8096 to use in bitop1 and bitop2. Statement is vectorized as
8097 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8098 depending on bitop1 and bitop2 arity. */
8099 switch (cond_code)
8100 {
8101 case GT_EXPR:
8102 bitop1 = BIT_NOT_EXPR;
8103 bitop2 = BIT_AND_EXPR;
8104 break;
8105 case GE_EXPR:
8106 bitop1 = BIT_NOT_EXPR;
8107 bitop2 = BIT_IOR_EXPR;
8108 break;
8109 case LT_EXPR:
8110 bitop1 = BIT_NOT_EXPR;
8111 bitop2 = BIT_AND_EXPR;
8112 std::swap (cond_expr0, cond_expr1);
8113 break;
8114 case LE_EXPR:
8115 bitop1 = BIT_NOT_EXPR;
8116 bitop2 = BIT_IOR_EXPR;
8117 std::swap (cond_expr0, cond_expr1);
8118 break;
8119 case NE_EXPR:
8120 bitop1 = BIT_XOR_EXPR;
8121 break;
8122 case EQ_EXPR:
8123 bitop1 = BIT_XOR_EXPR;
8124 bitop2 = BIT_NOT_EXPR;
8125 break;
8126 default:
8127 return false;
8128 }
8129 cond_code = SSA_NAME;
8130 }
8131
b8698a0f 8132 if (!vec_stmt)
ebfd146a
IR
8133 {
8134 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8135 if (bitop1 != NOP_EXPR)
8136 {
8137 machine_mode mode = TYPE_MODE (comp_vectype);
8138 optab optab;
8139
8140 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8141 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8142 return false;
8143
8144 if (bitop2 != NOP_EXPR)
8145 {
8146 optab = optab_for_tree_code (bitop2, comp_vectype,
8147 optab_default);
8148 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8149 return false;
8150 }
8151 }
4fc5ebf1
JG
8152 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8153 cond_code))
8154 {
8155 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8156 return true;
8157 }
8158 return false;
ebfd146a
IR
8159 }
8160
f7e531cf
IR
8161 /* Transform. */
8162
8163 if (!slp_node)
8164 {
9771b263
DN
8165 vec_oprnds0.create (1);
8166 vec_oprnds1.create (1);
8167 vec_oprnds2.create (1);
8168 vec_oprnds3.create (1);
f7e531cf 8169 }
ebfd146a
IR
8170
8171 /* Handle def. */
8172 scalar_dest = gimple_assign_lhs (stmt);
8173 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8174
8175 /* Handle cond expr. */
a855b1b1
MM
8176 for (j = 0; j < ncopies; j++)
8177 {
538dd0b7 8178 gassign *new_stmt = NULL;
a855b1b1
MM
8179 if (j == 0)
8180 {
f7e531cf
IR
8181 if (slp_node)
8182 {
00f96dc9
TS
8183 auto_vec<tree, 4> ops;
8184 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8185
a414c77f 8186 if (masked)
01216d27 8187 ops.safe_push (cond_expr);
a414c77f
IE
8188 else
8189 {
01216d27
JJ
8190 ops.safe_push (cond_expr0);
8191 ops.safe_push (cond_expr1);
a414c77f 8192 }
9771b263
DN
8193 ops.safe_push (then_clause);
8194 ops.safe_push (else_clause);
306b0c92 8195 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8196 vec_oprnds3 = vec_defs.pop ();
8197 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8198 if (!masked)
8199 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8200 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8201 }
8202 else
8203 {
355fe088 8204 gimple *gtemp;
a414c77f
IE
8205 if (masked)
8206 {
8207 vec_cond_lhs
8208 = vect_get_vec_def_for_operand (cond_expr, stmt,
8209 comp_vectype);
8210 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8211 &gtemp, &dts[0]);
8212 }
8213 else
8214 {
01216d27
JJ
8215 vec_cond_lhs
8216 = vect_get_vec_def_for_operand (cond_expr0,
8217 stmt, comp_vectype);
8218 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8219
8220 vec_cond_rhs
8221 = vect_get_vec_def_for_operand (cond_expr1,
8222 stmt, comp_vectype);
8223 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8224 }
f7e531cf
IR
8225 if (reduc_index == 1)
8226 vec_then_clause = reduc_def;
8227 else
8228 {
8229 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8230 stmt);
8231 vect_is_simple_use (then_clause, loop_vinfo,
8232 &gtemp, &dts[2]);
f7e531cf
IR
8233 }
8234 if (reduc_index == 2)
8235 vec_else_clause = reduc_def;
8236 else
8237 {
8238 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8239 stmt);
8240 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8241 }
a855b1b1
MM
8242 }
8243 }
8244 else
8245 {
a414c77f
IE
8246 vec_cond_lhs
8247 = vect_get_vec_def_for_stmt_copy (dts[0],
8248 vec_oprnds0.pop ());
8249 if (!masked)
8250 vec_cond_rhs
8251 = vect_get_vec_def_for_stmt_copy (dts[1],
8252 vec_oprnds1.pop ());
8253
a855b1b1 8254 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8255 vec_oprnds2.pop ());
a855b1b1 8256 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8257 vec_oprnds3.pop ());
f7e531cf
IR
8258 }
8259
8260 if (!slp_node)
8261 {
9771b263 8262 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8263 if (!masked)
8264 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8265 vec_oprnds2.quick_push (vec_then_clause);
8266 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8267 }
8268
9dc3f7de 8269 /* Arguments are ready. Create the new vector stmt. */
9771b263 8270 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8271 {
9771b263
DN
8272 vec_then_clause = vec_oprnds2[i];
8273 vec_else_clause = vec_oprnds3[i];
a855b1b1 8274
a414c77f
IE
8275 if (masked)
8276 vec_compare = vec_cond_lhs;
8277 else
8278 {
8279 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8280 if (bitop1 == NOP_EXPR)
8281 vec_compare = build2 (cond_code, vec_cmp_type,
8282 vec_cond_lhs, vec_cond_rhs);
8283 else
8284 {
8285 new_temp = make_ssa_name (vec_cmp_type);
8286 if (bitop1 == BIT_NOT_EXPR)
8287 new_stmt = gimple_build_assign (new_temp, bitop1,
8288 vec_cond_rhs);
8289 else
8290 new_stmt
8291 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8292 vec_cond_rhs);
8293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8294 if (bitop2 == NOP_EXPR)
8295 vec_compare = new_temp;
8296 else if (bitop2 == BIT_NOT_EXPR)
8297 {
8298 /* Instead of doing ~x ? y : z do x ? z : y. */
8299 vec_compare = new_temp;
8300 std::swap (vec_then_clause, vec_else_clause);
8301 }
8302 else
8303 {
8304 vec_compare = make_ssa_name (vec_cmp_type);
8305 new_stmt
8306 = gimple_build_assign (vec_compare, bitop2,
8307 vec_cond_lhs, new_temp);
8308 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8309 }
8310 }
a414c77f 8311 }
5958f9e2
JJ
8312 new_temp = make_ssa_name (vec_dest);
8313 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8314 vec_compare, vec_then_clause,
8315 vec_else_clause);
f7e531cf
IR
8316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8317 if (slp_node)
9771b263 8318 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8319 }
8320
8321 if (slp_node)
8322 continue;
8323
8324 if (j == 0)
8325 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8326 else
8327 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8328
8329 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8330 }
b8698a0f 8331
9771b263
DN
8332 vec_oprnds0.release ();
8333 vec_oprnds1.release ();
8334 vec_oprnds2.release ();
8335 vec_oprnds3.release ();
f7e531cf 8336
ebfd146a
IR
8337 return true;
8338}
8339
42fd8198
IE
8340/* vectorizable_comparison.
8341
8342 Check if STMT is comparison expression that can be vectorized.
8343 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8344 comparison, put it in VEC_STMT, and insert it at GSI.
8345
8346 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8347
fce57248 8348static bool
42fd8198
IE
8349vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8350 gimple **vec_stmt, tree reduc_def,
8351 slp_tree slp_node)
8352{
8353 tree lhs, rhs1, rhs2;
8354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8355 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8356 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8357 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8358 tree new_temp;
8359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8360 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8361 int ndts = 2;
928686b1 8362 poly_uint64 nunits;
42fd8198 8363 int ncopies;
49e76ff1 8364 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8365 stmt_vec_info prev_stmt_info = NULL;
8366 int i, j;
8367 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8368 vec<tree> vec_oprnds0 = vNULL;
8369 vec<tree> vec_oprnds1 = vNULL;
8370 gimple *def_stmt;
8371 tree mask_type;
8372 tree mask;
8373
c245362b
IE
8374 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8375 return false;
8376
30480bcd 8377 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8378 return false;
8379
8380 mask_type = vectype;
8381 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8382
fce57248 8383 if (slp_node)
42fd8198
IE
8384 ncopies = 1;
8385 else
e8f142e2 8386 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8387
8388 gcc_assert (ncopies >= 1);
42fd8198
IE
8389 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8390 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8391 && reduc_def))
8392 return false;
8393
8394 if (STMT_VINFO_LIVE_P (stmt_info))
8395 {
8396 if (dump_enabled_p ())
8397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8398 "value used after loop.\n");
8399 return false;
8400 }
8401
8402 if (!is_gimple_assign (stmt))
8403 return false;
8404
8405 code = gimple_assign_rhs_code (stmt);
8406
8407 if (TREE_CODE_CLASS (code) != tcc_comparison)
8408 return false;
8409
8410 rhs1 = gimple_assign_rhs1 (stmt);
8411 rhs2 = gimple_assign_rhs2 (stmt);
8412
8413 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8414 &dts[0], &vectype1))
8415 return false;
8416
8417 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8418 &dts[1], &vectype2))
8419 return false;
8420
8421 if (vectype1 && vectype2
928686b1
RS
8422 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8423 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
8424 return false;
8425
8426 vectype = vectype1 ? vectype1 : vectype2;
8427
8428 /* Invariant comparison. */
8429 if (!vectype)
8430 {
69a9a66f 8431 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 8432 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
8433 return false;
8434 }
928686b1 8435 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
8436 return false;
8437
49e76ff1
IE
8438 /* Can't compare mask and non-mask types. */
8439 if (vectype1 && vectype2
8440 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8441 return false;
8442
8443 /* Boolean values may have another representation in vectors
8444 and therefore we prefer bit operations over comparison for
8445 them (which also works for scalar masks). We store opcodes
8446 to use in bitop1 and bitop2. Statement is vectorized as
8447 BITOP2 (rhs1 BITOP1 rhs2) or
8448 rhs1 BITOP2 (BITOP1 rhs2)
8449 depending on bitop1 and bitop2 arity. */
8450 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8451 {
8452 if (code == GT_EXPR)
8453 {
8454 bitop1 = BIT_NOT_EXPR;
8455 bitop2 = BIT_AND_EXPR;
8456 }
8457 else if (code == GE_EXPR)
8458 {
8459 bitop1 = BIT_NOT_EXPR;
8460 bitop2 = BIT_IOR_EXPR;
8461 }
8462 else if (code == LT_EXPR)
8463 {
8464 bitop1 = BIT_NOT_EXPR;
8465 bitop2 = BIT_AND_EXPR;
8466 std::swap (rhs1, rhs2);
264d951a 8467 std::swap (dts[0], dts[1]);
49e76ff1
IE
8468 }
8469 else if (code == LE_EXPR)
8470 {
8471 bitop1 = BIT_NOT_EXPR;
8472 bitop2 = BIT_IOR_EXPR;
8473 std::swap (rhs1, rhs2);
264d951a 8474 std::swap (dts[0], dts[1]);
49e76ff1
IE
8475 }
8476 else
8477 {
8478 bitop1 = BIT_XOR_EXPR;
8479 if (code == EQ_EXPR)
8480 bitop2 = BIT_NOT_EXPR;
8481 }
8482 }
8483
42fd8198
IE
8484 if (!vec_stmt)
8485 {
8486 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8487 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8488 dts, ndts, NULL, NULL);
49e76ff1 8489 if (bitop1 == NOP_EXPR)
96592eed 8490 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8491 else
8492 {
8493 machine_mode mode = TYPE_MODE (vectype);
8494 optab optab;
8495
8496 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8497 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8498 return false;
8499
8500 if (bitop2 != NOP_EXPR)
8501 {
8502 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8503 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8504 return false;
8505 }
8506 return true;
8507 }
42fd8198
IE
8508 }
8509
8510 /* Transform. */
8511 if (!slp_node)
8512 {
8513 vec_oprnds0.create (1);
8514 vec_oprnds1.create (1);
8515 }
8516
8517 /* Handle def. */
8518 lhs = gimple_assign_lhs (stmt);
8519 mask = vect_create_destination_var (lhs, mask_type);
8520
8521 /* Handle cmp expr. */
8522 for (j = 0; j < ncopies; j++)
8523 {
8524 gassign *new_stmt = NULL;
8525 if (j == 0)
8526 {
8527 if (slp_node)
8528 {
8529 auto_vec<tree, 2> ops;
8530 auto_vec<vec<tree>, 2> vec_defs;
8531
8532 ops.safe_push (rhs1);
8533 ops.safe_push (rhs2);
306b0c92 8534 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8535 vec_oprnds1 = vec_defs.pop ();
8536 vec_oprnds0 = vec_defs.pop ();
8537 }
8538 else
8539 {
e4af0bc4
IE
8540 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8541 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8542 }
8543 }
8544 else
8545 {
8546 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8547 vec_oprnds0.pop ());
8548 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8549 vec_oprnds1.pop ());
8550 }
8551
8552 if (!slp_node)
8553 {
8554 vec_oprnds0.quick_push (vec_rhs1);
8555 vec_oprnds1.quick_push (vec_rhs2);
8556 }
8557
8558 /* Arguments are ready. Create the new vector stmt. */
8559 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8560 {
8561 vec_rhs2 = vec_oprnds1[i];
8562
8563 new_temp = make_ssa_name (mask);
49e76ff1
IE
8564 if (bitop1 == NOP_EXPR)
8565 {
8566 new_stmt = gimple_build_assign (new_temp, code,
8567 vec_rhs1, vec_rhs2);
8568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8569 }
8570 else
8571 {
8572 if (bitop1 == BIT_NOT_EXPR)
8573 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8574 else
8575 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8576 vec_rhs2);
8577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8578 if (bitop2 != NOP_EXPR)
8579 {
8580 tree res = make_ssa_name (mask);
8581 if (bitop2 == BIT_NOT_EXPR)
8582 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8583 else
8584 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8585 new_temp);
8586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8587 }
8588 }
42fd8198
IE
8589 if (slp_node)
8590 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8591 }
8592
8593 if (slp_node)
8594 continue;
8595
8596 if (j == 0)
8597 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8598 else
8599 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8600
8601 prev_stmt_info = vinfo_for_stmt (new_stmt);
8602 }
8603
8604 vec_oprnds0.release ();
8605 vec_oprnds1.release ();
8606
8607 return true;
8608}
ebfd146a 8609
68a0f2ff
RS
8610/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8611 can handle all live statements in the node. Otherwise return true
8612 if STMT is not live or if vectorizable_live_operation can handle it.
8613 GSI and VEC_STMT are as for vectorizable_live_operation. */
8614
8615static bool
8616can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8617 slp_tree slp_node, gimple **vec_stmt)
8618{
8619 if (slp_node)
8620 {
8621 gimple *slp_stmt;
8622 unsigned int i;
8623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8624 {
8625 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8626 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8627 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8628 vec_stmt))
8629 return false;
8630 }
8631 }
8632 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8633 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8634 return false;
8635
8636 return true;
8637}
8638
8644a673 8639/* Make sure the statement is vectorizable. */
ebfd146a
IR
8640
8641bool
891ad31c
RB
8642vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8643 slp_instance node_instance)
ebfd146a 8644{
8644a673 8645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8647 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8648 bool ok;
355fe088 8649 gimple *pattern_stmt;
363477c0 8650 gimple_seq pattern_def_seq;
ebfd146a 8651
73fbfcad 8652 if (dump_enabled_p ())
ebfd146a 8653 {
78c60e3d
SS
8654 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8656 }
ebfd146a 8657
1825a1f3 8658 if (gimple_has_volatile_ops (stmt))
b8698a0f 8659 {
73fbfcad 8660 if (dump_enabled_p ())
78c60e3d 8661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8662 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8663
8664 return false;
8665 }
b8698a0f
L
8666
8667 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8668 to include:
8669 - the COND_EXPR which is the loop exit condition
8670 - any LABEL_EXPRs in the loop
b8698a0f 8671 - computations that are used only for array indexing or loop control.
8644a673 8672 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8673 instance, therefore, all the statements are relevant.
ebfd146a 8674
d092494c 8675 Pattern statement needs to be analyzed instead of the original statement
83197f37 8676 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8677 statements. In basic blocks we are called from some SLP instance
8678 traversal, don't analyze pattern stmts instead, the pattern stmts
8679 already will be part of SLP instance. */
83197f37
IR
8680
8681 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8682 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8683 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8684 {
9d5e7640 8685 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8686 && pattern_stmt
9d5e7640
IR
8687 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8688 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8689 {
83197f37 8690 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8691 stmt = pattern_stmt;
8692 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8693 if (dump_enabled_p ())
9d5e7640 8694 {
78c60e3d
SS
8695 dump_printf_loc (MSG_NOTE, vect_location,
8696 "==> examining pattern statement: ");
8697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8698 }
8699 }
8700 else
8701 {
73fbfcad 8702 if (dump_enabled_p ())
e645e942 8703 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8704
9d5e7640
IR
8705 return true;
8706 }
8644a673 8707 }
83197f37 8708 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8709 && node == NULL
83197f37
IR
8710 && pattern_stmt
8711 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8712 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8713 {
8714 /* Analyze PATTERN_STMT too. */
73fbfcad 8715 if (dump_enabled_p ())
83197f37 8716 {
78c60e3d
SS
8717 dump_printf_loc (MSG_NOTE, vect_location,
8718 "==> examining pattern statement: ");
8719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8720 }
8721
891ad31c
RB
8722 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8723 node_instance))
83197f37
IR
8724 return false;
8725 }
ebfd146a 8726
1107f3ae 8727 if (is_pattern_stmt_p (stmt_info)
079c527f 8728 && node == NULL
363477c0 8729 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8730 {
363477c0 8731 gimple_stmt_iterator si;
1107f3ae 8732
363477c0
JJ
8733 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8734 {
355fe088 8735 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8736 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8737 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8738 {
8739 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8740 if (dump_enabled_p ())
363477c0 8741 {
78c60e3d
SS
8742 dump_printf_loc (MSG_NOTE, vect_location,
8743 "==> examining pattern def statement: ");
8744 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8745 }
1107f3ae 8746
363477c0 8747 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8748 need_to_vectorize, node, node_instance))
363477c0
JJ
8749 return false;
8750 }
8751 }
8752 }
1107f3ae 8753
8644a673
IR
8754 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8755 {
8756 case vect_internal_def:
8757 break;
ebfd146a 8758
8644a673 8759 case vect_reduction_def:
7c5222ff 8760 case vect_nested_cycle:
14a61437
RB
8761 gcc_assert (!bb_vinfo
8762 && (relevance == vect_used_in_outer
8763 || relevance == vect_used_in_outer_by_reduction
8764 || relevance == vect_used_by_reduction
b28ead45
AH
8765 || relevance == vect_unused_in_scope
8766 || relevance == vect_used_only_live));
8644a673
IR
8767 break;
8768
8769 case vect_induction_def:
e7baeb39
RB
8770 gcc_assert (!bb_vinfo);
8771 break;
8772
8644a673
IR
8773 case vect_constant_def:
8774 case vect_external_def:
8775 case vect_unknown_def_type:
8776 default:
8777 gcc_unreachable ();
8778 }
ebfd146a 8779
8644a673 8780 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8781 {
8644a673 8782 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8783 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8784 || (is_gimple_call (stmt)
8785 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8786 *need_to_vectorize = true;
ebfd146a
IR
8787 }
8788
b1af7da6
RB
8789 if (PURE_SLP_STMT (stmt_info) && !node)
8790 {
8791 dump_printf_loc (MSG_NOTE, vect_location,
8792 "handled only by SLP analysis\n");
8793 return true;
8794 }
8795
8796 ok = true;
8797 if (!bb_vinfo
8798 && (STMT_VINFO_RELEVANT_P (stmt_info)
8799 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8800 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8801 || vectorizable_conversion (stmt, NULL, NULL, node)
8802 || vectorizable_shift (stmt, NULL, NULL, node)
8803 || vectorizable_operation (stmt, NULL, NULL, node)
8804 || vectorizable_assignment (stmt, NULL, NULL, node)
8805 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8806 || vectorizable_call (stmt, NULL, NULL, node)
8807 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8808 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8809 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8810 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8811 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8812 else
8813 {
8814 if (bb_vinfo)
8815 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8816 || vectorizable_conversion (stmt, NULL, NULL, node)
8817 || vectorizable_shift (stmt, NULL, NULL, node)
8818 || vectorizable_operation (stmt, NULL, NULL, node)
8819 || vectorizable_assignment (stmt, NULL, NULL, node)
8820 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8821 || vectorizable_call (stmt, NULL, NULL, node)
8822 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8823 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8824 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8825 }
8644a673
IR
8826
8827 if (!ok)
ebfd146a 8828 {
73fbfcad 8829 if (dump_enabled_p ())
8644a673 8830 {
78c60e3d
SS
8831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8832 "not vectorized: relevant stmt not ");
8833 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8834 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8835 }
b8698a0f 8836
ebfd146a
IR
8837 return false;
8838 }
8839
a70d6342
IR
8840 if (bb_vinfo)
8841 return true;
8842
8644a673
IR
8843 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8844 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8845 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8846 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8847 {
73fbfcad 8848 if (dump_enabled_p ())
8644a673 8849 {
78c60e3d 8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8851 "not vectorized: live stmt not supported: ");
78c60e3d 8852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8853 }
b8698a0f 8854
8644a673 8855 return false;
ebfd146a
IR
8856 }
8857
ebfd146a
IR
8858 return true;
8859}
8860
8861
8862/* Function vect_transform_stmt.
8863
8864 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8865
8866bool
355fe088 8867vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8868 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8869 slp_instance slp_node_instance)
8870{
8871 bool is_store = false;
355fe088 8872 gimple *vec_stmt = NULL;
ebfd146a 8873 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8874 bool done;
ebfd146a 8875
fce57248 8876 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8877 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8878
ebfd146a
IR
8879 switch (STMT_VINFO_TYPE (stmt_info))
8880 {
8881 case type_demotion_vec_info_type:
ebfd146a 8882 case type_promotion_vec_info_type:
ebfd146a
IR
8883 case type_conversion_vec_info_type:
8884 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8885 gcc_assert (done);
8886 break;
8887
8888 case induc_vec_info_type:
e7baeb39 8889 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8890 gcc_assert (done);
8891 break;
8892
9dc3f7de
IR
8893 case shift_vec_info_type:
8894 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8895 gcc_assert (done);
8896 break;
8897
ebfd146a
IR
8898 case op_vec_info_type:
8899 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8900 gcc_assert (done);
8901 break;
8902
8903 case assignment_vec_info_type:
8904 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8905 gcc_assert (done);
8906 break;
8907
8908 case load_vec_info_type:
b8698a0f 8909 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8910 slp_node_instance);
8911 gcc_assert (done);
8912 break;
8913
8914 case store_vec_info_type:
8915 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8916 gcc_assert (done);
0d0293ac 8917 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8918 {
8919 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8920 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8921 one are skipped, and there vec_stmt_info shouldn't be freed
8922 meanwhile. */
0d0293ac 8923 *grouped_store = true;
ebfd146a
IR
8924 if (STMT_VINFO_VEC_STMT (stmt_info))
8925 is_store = true;
8926 }
8927 else
8928 is_store = true;
8929 break;
8930
8931 case condition_vec_info_type:
f7e531cf 8932 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8933 gcc_assert (done);
8934 break;
8935
42fd8198
IE
8936 case comparison_vec_info_type:
8937 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8938 gcc_assert (done);
8939 break;
8940
ebfd146a 8941 case call_vec_info_type:
190c2236 8942 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8943 stmt = gsi_stmt (*gsi);
8e4284d0 8944 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
5ce9450f 8945 is_store = true;
ebfd146a
IR
8946 break;
8947
0136f8f0
AH
8948 case call_simd_clone_vec_info_type:
8949 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8950 stmt = gsi_stmt (*gsi);
8951 break;
8952
ebfd146a 8953 case reduc_vec_info_type:
891ad31c
RB
8954 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8955 slp_node_instance);
ebfd146a
IR
8956 gcc_assert (done);
8957 break;
8958
8959 default:
8960 if (!STMT_VINFO_LIVE_P (stmt_info))
8961 {
73fbfcad 8962 if (dump_enabled_p ())
78c60e3d 8963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8964 "stmt not supported.\n");
ebfd146a
IR
8965 gcc_unreachable ();
8966 }
8967 }
8968
225ce44b
RB
8969 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8970 This would break hybrid SLP vectorization. */
8971 if (slp_node)
d90f8440
RB
8972 gcc_assert (!vec_stmt
8973 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8974
ebfd146a
IR
8975 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8976 is being vectorized, but outside the immediately enclosing loop. */
8977 if (vec_stmt
a70d6342
IR
8978 && STMT_VINFO_LOOP_VINFO (stmt_info)
8979 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8980 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8981 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8982 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8983 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8984 vect_used_in_outer_by_reduction))
ebfd146a 8985 {
a70d6342
IR
8986 struct loop *innerloop = LOOP_VINFO_LOOP (
8987 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8988 imm_use_iterator imm_iter;
8989 use_operand_p use_p;
8990 tree scalar_dest;
355fe088 8991 gimple *exit_phi;
ebfd146a 8992
73fbfcad 8993 if (dump_enabled_p ())
78c60e3d 8994 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8995 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8996
8997 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8998 (to be used when vectorizing outer-loop stmts that use the DEF of
8999 STMT). */
9000 if (gimple_code (stmt) == GIMPLE_PHI)
9001 scalar_dest = PHI_RESULT (stmt);
9002 else
9003 scalar_dest = gimple_assign_lhs (stmt);
9004
9005 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9006 {
9007 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9008 {
9009 exit_phi = USE_STMT (use_p);
9010 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9011 }
9012 }
9013 }
9014
9015 /* Handle stmts whose DEF is used outside the loop-nest that is
9016 being vectorized. */
68a0f2ff 9017 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9018 {
68a0f2ff 9019 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
9020 gcc_assert (done);
9021 }
9022
9023 if (vec_stmt)
83197f37 9024 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9025
b8698a0f 9026 return is_store;
ebfd146a
IR
9027}
9028
9029
b8698a0f 9030/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9031 stmt_vec_info. */
9032
9033void
355fe088 9034vect_remove_stores (gimple *first_stmt)
ebfd146a 9035{
355fe088
TS
9036 gimple *next = first_stmt;
9037 gimple *tmp;
ebfd146a
IR
9038 gimple_stmt_iterator next_si;
9039
9040 while (next)
9041 {
78048b1c
JJ
9042 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9043
9044 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9045 if (is_pattern_stmt_p (stmt_info))
9046 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9047 /* Free the attached stmt_vec_info and remove the stmt. */
9048 next_si = gsi_for_stmt (next);
3d3f2249 9049 unlink_stmt_vdef (next);
ebfd146a 9050 gsi_remove (&next_si, true);
3d3f2249 9051 release_defs (next);
ebfd146a
IR
9052 free_stmt_vec_info (next);
9053 next = tmp;
9054 }
9055}
9056
9057
9058/* Function new_stmt_vec_info.
9059
9060 Create and initialize a new stmt_vec_info struct for STMT. */
9061
9062stmt_vec_info
310213d4 9063new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9064{
9065 stmt_vec_info res;
9066 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9067
9068 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9069 STMT_VINFO_STMT (res) = stmt;
310213d4 9070 res->vinfo = vinfo;
8644a673 9071 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9072 STMT_VINFO_LIVE_P (res) = false;
9073 STMT_VINFO_VECTYPE (res) = NULL;
9074 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9075 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9076 STMT_VINFO_IN_PATTERN_P (res) = false;
9077 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9078 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9079 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9080 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9081 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9082
ebfd146a
IR
9083 if (gimple_code (stmt) == GIMPLE_PHI
9084 && is_loop_header_bb_p (gimple_bb (stmt)))
9085 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9086 else
8644a673
IR
9087 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9088
9771b263 9089 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9090 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9091 STMT_VINFO_NUM_SLP_USES (res) = 0;
9092
e14c1050
IR
9093 GROUP_FIRST_ELEMENT (res) = NULL;
9094 GROUP_NEXT_ELEMENT (res) = NULL;
9095 GROUP_SIZE (res) = 0;
9096 GROUP_STORE_COUNT (res) = 0;
9097 GROUP_GAP (res) = 0;
9098 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9099
9100 return res;
9101}
9102
9103
9104/* Create a hash table for stmt_vec_info. */
9105
9106void
9107init_stmt_vec_info_vec (void)
9108{
9771b263
DN
9109 gcc_assert (!stmt_vec_info_vec.exists ());
9110 stmt_vec_info_vec.create (50);
ebfd146a
IR
9111}
9112
9113
9114/* Free hash table for stmt_vec_info. */
9115
9116void
9117free_stmt_vec_info_vec (void)
9118{
93675444 9119 unsigned int i;
3161455c 9120 stmt_vec_info info;
93675444
JJ
9121 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9122 if (info != NULL)
3161455c 9123 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9124 gcc_assert (stmt_vec_info_vec.exists ());
9125 stmt_vec_info_vec.release ();
ebfd146a
IR
9126}
9127
9128
9129/* Free stmt vectorization related info. */
9130
9131void
355fe088 9132free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9133{
9134 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9135
9136 if (!stmt_info)
9137 return;
9138
78048b1c
JJ
9139 /* Check if this statement has a related "pattern stmt"
9140 (introduced by the vectorizer during the pattern recognition
9141 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9142 too. */
9143 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9144 {
9145 stmt_vec_info patt_info
9146 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9147 if (patt_info)
9148 {
363477c0 9149 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9150 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9151 gimple_set_bb (patt_stmt, NULL);
9152 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9153 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9154 release_ssa_name (lhs);
363477c0
JJ
9155 if (seq)
9156 {
9157 gimple_stmt_iterator si;
9158 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9159 {
355fe088 9160 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9161 gimple_set_bb (seq_stmt, NULL);
7532abf2 9162 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9163 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9164 release_ssa_name (lhs);
9165 free_stmt_vec_info (seq_stmt);
9166 }
363477c0 9167 }
f0281fde 9168 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9169 }
9170 }
9171
9771b263 9172 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9173 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9174 set_vinfo_for_stmt (stmt, NULL);
9175 free (stmt_info);
9176}
9177
9178
bb67d9c7 9179/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9180
bb67d9c7 9181 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9182 by the target. */
9183
bb67d9c7 9184static tree
86e36728 9185get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9186{
c7d97b28 9187 tree orig_scalar_type = scalar_type;
3bd8f481 9188 scalar_mode inner_mode;
ef4bddc2 9189 machine_mode simd_mode;
86e36728 9190 poly_uint64 nunits;
ebfd146a
IR
9191 tree vectype;
9192
3bd8f481
RS
9193 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9194 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9195 return NULL_TREE;
9196
3bd8f481 9197 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9198
7b7b1813
RG
9199 /* For vector types of elements whose mode precision doesn't
9200 match their types precision we use a element type of mode
9201 precision. The vectorization routines will have to make sure
48f2e373
RB
9202 they support the proper result truncation/extension.
9203 We also make sure to build vector types with INTEGER_TYPE
9204 component type only. */
6d7971b8 9205 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9206 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9207 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9208 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9209 TYPE_UNSIGNED (scalar_type));
6d7971b8 9210
ccbf5bb4
RG
9211 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9212 When the component mode passes the above test simply use a type
9213 corresponding to that mode. The theory is that any use that
9214 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9215 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9216 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9217 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9218
9219 /* We can't build a vector type of elements with alignment bigger than
9220 their size. */
dfc2e2ac 9221 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9222 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9223 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9224
dfc2e2ac
RB
9225 /* If we felt back to using the mode fail if there was
9226 no scalar type for it. */
9227 if (scalar_type == NULL_TREE)
9228 return NULL_TREE;
9229
bb67d9c7
RG
9230 /* If no size was supplied use the mode the target prefers. Otherwise
9231 lookup a vector mode of the specified size. */
86e36728 9232 if (known_eq (size, 0U))
bb67d9c7 9233 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9234 else if (!multiple_p (size, nbytes, &nunits)
9235 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9236 return NULL_TREE;
4c8fd8ac 9237 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9238 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9239 return NULL_TREE;
ebfd146a
IR
9240
9241 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9242
9243 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9244 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9245 return NULL_TREE;
ebfd146a 9246
c7d97b28
RB
9247 /* Re-attach the address-space qualifier if we canonicalized the scalar
9248 type. */
9249 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9250 return build_qualified_type
9251 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9252
ebfd146a
IR
9253 return vectype;
9254}
9255
86e36728 9256poly_uint64 current_vector_size;
bb67d9c7
RG
9257
9258/* Function get_vectype_for_scalar_type.
9259
9260 Returns the vector type corresponding to SCALAR_TYPE as supported
9261 by the target. */
9262
9263tree
9264get_vectype_for_scalar_type (tree scalar_type)
9265{
9266 tree vectype;
9267 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9268 current_vector_size);
9269 if (vectype
86e36728 9270 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9271 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9272 return vectype;
9273}
9274
42fd8198
IE
9275/* Function get_mask_type_for_scalar_type.
9276
9277 Returns the mask type corresponding to a result of comparison
9278 of vectors of specified SCALAR_TYPE as supported by target. */
9279
9280tree
9281get_mask_type_for_scalar_type (tree scalar_type)
9282{
9283 tree vectype = get_vectype_for_scalar_type (scalar_type);
9284
9285 if (!vectype)
9286 return NULL;
9287
9288 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9289 current_vector_size);
9290}
9291
b690cc0f
RG
9292/* Function get_same_sized_vectype
9293
9294 Returns a vector type corresponding to SCALAR_TYPE of size
9295 VECTOR_TYPE if supported by the target. */
9296
9297tree
bb67d9c7 9298get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9299{
2568d8a1 9300 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9301 return build_same_sized_truth_vector_type (vector_type);
9302
bb67d9c7
RG
9303 return get_vectype_for_scalar_type_and_size
9304 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9305}
9306
ebfd146a
IR
9307/* Function vect_is_simple_use.
9308
9309 Input:
81c40241
RB
9310 VINFO - the vect info of the loop or basic block that is being vectorized.
9311 OPERAND - operand in the loop or bb.
9312 Output:
9313 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9314 DT - the type of definition
ebfd146a
IR
9315
9316 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9317 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9318 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9319 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9320 is the case in reduction/induction computations).
9321 For basic blocks, supportable operands are constants and bb invariants.
9322 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9323
9324bool
81c40241
RB
9325vect_is_simple_use (tree operand, vec_info *vinfo,
9326 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9327{
ebfd146a 9328 *def_stmt = NULL;
3fc356dc 9329 *dt = vect_unknown_def_type;
b8698a0f 9330
73fbfcad 9331 if (dump_enabled_p ())
ebfd146a 9332 {
78c60e3d
SS
9333 dump_printf_loc (MSG_NOTE, vect_location,
9334 "vect_is_simple_use: operand ");
9335 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9336 dump_printf (MSG_NOTE, "\n");
ebfd146a 9337 }
b8698a0f 9338
b758f602 9339 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9340 {
9341 *dt = vect_constant_def;
9342 return true;
9343 }
b8698a0f 9344
ebfd146a
IR
9345 if (is_gimple_min_invariant (operand))
9346 {
8644a673 9347 *dt = vect_external_def;
ebfd146a
IR
9348 return true;
9349 }
9350
ebfd146a
IR
9351 if (TREE_CODE (operand) != SSA_NAME)
9352 {
73fbfcad 9353 if (dump_enabled_p ())
af29617a
AH
9354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9355 "not ssa-name.\n");
ebfd146a
IR
9356 return false;
9357 }
b8698a0f 9358
3fc356dc 9359 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9360 {
3fc356dc
RB
9361 *dt = vect_external_def;
9362 return true;
ebfd146a
IR
9363 }
9364
3fc356dc 9365 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9366 if (dump_enabled_p ())
ebfd146a 9367 {
78c60e3d
SS
9368 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9369 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9370 }
9371
61d371eb 9372 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9373 *dt = vect_external_def;
ebfd146a
IR
9374 else
9375 {
3fc356dc 9376 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9377 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9378 }
9379
2e8ab70c
RB
9380 if (dump_enabled_p ())
9381 {
9382 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9383 switch (*dt)
9384 {
9385 case vect_uninitialized_def:
9386 dump_printf (MSG_NOTE, "uninitialized\n");
9387 break;
9388 case vect_constant_def:
9389 dump_printf (MSG_NOTE, "constant\n");
9390 break;
9391 case vect_external_def:
9392 dump_printf (MSG_NOTE, "external\n");
9393 break;
9394 case vect_internal_def:
9395 dump_printf (MSG_NOTE, "internal\n");
9396 break;
9397 case vect_induction_def:
9398 dump_printf (MSG_NOTE, "induction\n");
9399 break;
9400 case vect_reduction_def:
9401 dump_printf (MSG_NOTE, "reduction\n");
9402 break;
9403 case vect_double_reduction_def:
9404 dump_printf (MSG_NOTE, "double reduction\n");
9405 break;
9406 case vect_nested_cycle:
9407 dump_printf (MSG_NOTE, "nested cycle\n");
9408 break;
9409 case vect_unknown_def_type:
9410 dump_printf (MSG_NOTE, "unknown\n");
9411 break;
9412 }
9413 }
9414
81c40241 9415 if (*dt == vect_unknown_def_type)
ebfd146a 9416 {
73fbfcad 9417 if (dump_enabled_p ())
78c60e3d 9418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9419 "Unsupported pattern.\n");
ebfd146a
IR
9420 return false;
9421 }
9422
ebfd146a
IR
9423 switch (gimple_code (*def_stmt))
9424 {
9425 case GIMPLE_PHI:
ebfd146a 9426 case GIMPLE_ASSIGN:
ebfd146a 9427 case GIMPLE_CALL:
81c40241 9428 break;
ebfd146a 9429 default:
73fbfcad 9430 if (dump_enabled_p ())
78c60e3d 9431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9432 "unsupported defining stmt:\n");
ebfd146a
IR
9433 return false;
9434 }
9435
9436 return true;
9437}
9438
81c40241 9439/* Function vect_is_simple_use.
b690cc0f 9440
81c40241 9441 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9442 type of OPERAND and stores it to *VECTYPE. If the definition of
9443 OPERAND is vect_uninitialized_def, vect_constant_def or
9444 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9445 is responsible to compute the best suited vector type for the
9446 scalar operand. */
9447
9448bool
81c40241
RB
9449vect_is_simple_use (tree operand, vec_info *vinfo,
9450 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9451{
81c40241 9452 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9453 return false;
9454
9455 /* Now get a vector type if the def is internal, otherwise supply
9456 NULL_TREE and leave it up to the caller to figure out a proper
9457 type for the use stmt. */
9458 if (*dt == vect_internal_def
9459 || *dt == vect_induction_def
9460 || *dt == vect_reduction_def
9461 || *dt == vect_double_reduction_def
9462 || *dt == vect_nested_cycle)
9463 {
9464 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9465
9466 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9467 && !STMT_VINFO_RELEVANT (stmt_info)
9468 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9469 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9470
b690cc0f
RG
9471 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9472 gcc_assert (*vectype != NULL_TREE);
9473 }
9474 else if (*dt == vect_uninitialized_def
9475 || *dt == vect_constant_def
9476 || *dt == vect_external_def)
9477 *vectype = NULL_TREE;
9478 else
9479 gcc_unreachable ();
9480
9481 return true;
9482}
9483
ebfd146a
IR
9484
9485/* Function supportable_widening_operation
9486
b8698a0f
L
9487 Check whether an operation represented by the code CODE is a
9488 widening operation that is supported by the target platform in
b690cc0f
RG
9489 vector form (i.e., when operating on arguments of type VECTYPE_IN
9490 producing a result of type VECTYPE_OUT).
b8698a0f 9491
ebfd146a
IR
9492 Widening operations we currently support are NOP (CONVERT), FLOAT
9493 and WIDEN_MULT. This function checks if these operations are supported
9494 by the target platform either directly (via vector tree-codes), or via
9495 target builtins.
9496
9497 Output:
b8698a0f
L
9498 - CODE1 and CODE2 are codes of vector operations to be used when
9499 vectorizing the operation, if available.
ebfd146a
IR
9500 - MULTI_STEP_CVT determines the number of required intermediate steps in
9501 case of multi-step conversion (like char->short->int - in that case
9502 MULTI_STEP_CVT will be 1).
b8698a0f
L
9503 - INTERM_TYPES contains the intermediate type required to perform the
9504 widening operation (short in the above example). */
ebfd146a
IR
9505
9506bool
355fe088 9507supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9508 tree vectype_out, tree vectype_in,
ebfd146a
IR
9509 enum tree_code *code1, enum tree_code *code2,
9510 int *multi_step_cvt,
9771b263 9511 vec<tree> *interm_types)
ebfd146a
IR
9512{
9513 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9514 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9515 struct loop *vect_loop = NULL;
ef4bddc2 9516 machine_mode vec_mode;
81f40b79 9517 enum insn_code icode1, icode2;
ebfd146a 9518 optab optab1, optab2;
b690cc0f
RG
9519 tree vectype = vectype_in;
9520 tree wide_vectype = vectype_out;
ebfd146a 9521 enum tree_code c1, c2;
4a00c761
JJ
9522 int i;
9523 tree prev_type, intermediate_type;
ef4bddc2 9524 machine_mode intermediate_mode, prev_mode;
4a00c761 9525 optab optab3, optab4;
ebfd146a 9526
4a00c761 9527 *multi_step_cvt = 0;
4ef69dfc
IR
9528 if (loop_info)
9529 vect_loop = LOOP_VINFO_LOOP (loop_info);
9530
ebfd146a
IR
9531 switch (code)
9532 {
9533 case WIDEN_MULT_EXPR:
6ae6116f
RH
9534 /* The result of a vectorized widening operation usually requires
9535 two vectors (because the widened results do not fit into one vector).
9536 The generated vector results would normally be expected to be
9537 generated in the same order as in the original scalar computation,
9538 i.e. if 8 results are generated in each vector iteration, they are
9539 to be organized as follows:
9540 vect1: [res1,res2,res3,res4],
9541 vect2: [res5,res6,res7,res8].
9542
9543 However, in the special case that the result of the widening
9544 operation is used in a reduction computation only, the order doesn't
9545 matter (because when vectorizing a reduction we change the order of
9546 the computation). Some targets can take advantage of this and
9547 generate more efficient code. For example, targets like Altivec,
9548 that support widen_mult using a sequence of {mult_even,mult_odd}
9549 generate the following vectors:
9550 vect1: [res1,res3,res5,res7],
9551 vect2: [res2,res4,res6,res8].
9552
9553 When vectorizing outer-loops, we execute the inner-loop sequentially
9554 (each vectorized inner-loop iteration contributes to VF outer-loop
9555 iterations in parallel). We therefore don't allow to change the
9556 order of the computation in the inner-loop during outer-loop
9557 vectorization. */
9558 /* TODO: Another case in which order doesn't *really* matter is when we
9559 widen and then contract again, e.g. (short)((int)x * y >> 8).
9560 Normally, pack_trunc performs an even/odd permute, whereas the
9561 repack from an even/odd expansion would be an interleave, which
9562 would be significantly simpler for e.g. AVX2. */
9563 /* In any case, in order to avoid duplicating the code below, recurse
9564 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9565 are properly set up for the caller. If we fail, we'll continue with
9566 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9567 if (vect_loop
9568 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9569 && !nested_in_vect_loop_p (vect_loop, stmt)
9570 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9571 stmt, vectype_out, vectype_in,
a86ec597
RH
9572 code1, code2, multi_step_cvt,
9573 interm_types))
ebc047a2
CH
9574 {
9575 /* Elements in a vector with vect_used_by_reduction property cannot
9576 be reordered if the use chain with this property does not have the
9577 same operation. One such an example is s += a * b, where elements
9578 in a and b cannot be reordered. Here we check if the vector defined
9579 by STMT is only directly used in the reduction statement. */
9580 tree lhs = gimple_assign_lhs (stmt);
9581 use_operand_p dummy;
355fe088 9582 gimple *use_stmt;
ebc047a2
CH
9583 stmt_vec_info use_stmt_info = NULL;
9584 if (single_imm_use (lhs, &dummy, &use_stmt)
9585 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9586 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9587 return true;
9588 }
4a00c761
JJ
9589 c1 = VEC_WIDEN_MULT_LO_EXPR;
9590 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9591 break;
9592
81c40241
RB
9593 case DOT_PROD_EXPR:
9594 c1 = DOT_PROD_EXPR;
9595 c2 = DOT_PROD_EXPR;
9596 break;
9597
9598 case SAD_EXPR:
9599 c1 = SAD_EXPR;
9600 c2 = SAD_EXPR;
9601 break;
9602
6ae6116f
RH
9603 case VEC_WIDEN_MULT_EVEN_EXPR:
9604 /* Support the recursion induced just above. */
9605 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9606 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9607 break;
9608
36ba4aae 9609 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9610 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9611 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9612 break;
9613
ebfd146a 9614 CASE_CONVERT:
4a00c761
JJ
9615 c1 = VEC_UNPACK_LO_EXPR;
9616 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9617 break;
9618
9619 case FLOAT_EXPR:
4a00c761
JJ
9620 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9621 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9622 break;
9623
9624 case FIX_TRUNC_EXPR:
9625 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9626 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9627 computing the operation. */
9628 return false;
9629
9630 default:
9631 gcc_unreachable ();
9632 }
9633
6ae6116f 9634 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9635 std::swap (c1, c2);
4a00c761 9636
ebfd146a
IR
9637 if (code == FIX_TRUNC_EXPR)
9638 {
9639 /* The signedness is determined from output operand. */
b690cc0f
RG
9640 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9641 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9642 }
9643 else
9644 {
9645 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9646 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9647 }
9648
9649 if (!optab1 || !optab2)
9650 return false;
9651
9652 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9653 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9654 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9655 return false;
9656
4a00c761
JJ
9657 *code1 = c1;
9658 *code2 = c2;
9659
9660 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9661 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9662 /* For scalar masks we may have different boolean
9663 vector types having the same QImode. Thus we
9664 add additional check for elements number. */
9665 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9666 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9667 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 9668
b8698a0f 9669 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9670 types. */
ebfd146a 9671
4a00c761
JJ
9672 prev_type = vectype;
9673 prev_mode = vec_mode;
b8698a0f 9674
4a00c761
JJ
9675 if (!CONVERT_EXPR_CODE_P (code))
9676 return false;
b8698a0f 9677
4a00c761
JJ
9678 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9679 intermediate steps in promotion sequence. We try
9680 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9681 not. */
9771b263 9682 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9683 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9684 {
9685 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9686 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9687 {
928686b1
RS
9688 poly_uint64 intermediate_nelts
9689 = exact_div (TYPE_VECTOR_SUBPARTS (prev_type), 2);
3ae0661a 9690 intermediate_type
928686b1 9691 = build_truth_vector_type (intermediate_nelts,
3ae0661a
IE
9692 current_vector_size);
9693 if (intermediate_mode != TYPE_MODE (intermediate_type))
9694 return false;
9695 }
9696 else
9697 intermediate_type
9698 = lang_hooks.types.type_for_mode (intermediate_mode,
9699 TYPE_UNSIGNED (prev_type));
9700
4a00c761
JJ
9701 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9702 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9703
9704 if (!optab3 || !optab4
9705 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9706 || insn_data[icode1].operand[0].mode != intermediate_mode
9707 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9708 || insn_data[icode2].operand[0].mode != intermediate_mode
9709 || ((icode1 = optab_handler (optab3, intermediate_mode))
9710 == CODE_FOR_nothing)
9711 || ((icode2 = optab_handler (optab4, intermediate_mode))
9712 == CODE_FOR_nothing))
9713 break;
ebfd146a 9714
9771b263 9715 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9716 (*multi_step_cvt)++;
9717
9718 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9719 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 9720 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9721 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9722 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
9723
9724 prev_type = intermediate_type;
9725 prev_mode = intermediate_mode;
ebfd146a
IR
9726 }
9727
9771b263 9728 interm_types->release ();
4a00c761 9729 return false;
ebfd146a
IR
9730}
9731
9732
9733/* Function supportable_narrowing_operation
9734
b8698a0f
L
9735 Check whether an operation represented by the code CODE is a
9736 narrowing operation that is supported by the target platform in
b690cc0f
RG
9737 vector form (i.e., when operating on arguments of type VECTYPE_IN
9738 and producing a result of type VECTYPE_OUT).
b8698a0f 9739
ebfd146a 9740 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9741 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9742 the target platform directly via vector tree-codes.
9743
9744 Output:
b8698a0f
L
9745 - CODE1 is the code of a vector operation to be used when
9746 vectorizing the operation, if available.
ebfd146a
IR
9747 - MULTI_STEP_CVT determines the number of required intermediate steps in
9748 case of multi-step conversion (like int->short->char - in that case
9749 MULTI_STEP_CVT will be 1).
9750 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9751 narrowing operation (short in the above example). */
ebfd146a
IR
9752
9753bool
9754supportable_narrowing_operation (enum tree_code code,
b690cc0f 9755 tree vectype_out, tree vectype_in,
ebfd146a 9756 enum tree_code *code1, int *multi_step_cvt,
9771b263 9757 vec<tree> *interm_types)
ebfd146a 9758{
ef4bddc2 9759 machine_mode vec_mode;
ebfd146a
IR
9760 enum insn_code icode1;
9761 optab optab1, interm_optab;
b690cc0f
RG
9762 tree vectype = vectype_in;
9763 tree narrow_vectype = vectype_out;
ebfd146a 9764 enum tree_code c1;
3ae0661a 9765 tree intermediate_type, prev_type;
ef4bddc2 9766 machine_mode intermediate_mode, prev_mode;
ebfd146a 9767 int i;
4a00c761 9768 bool uns;
ebfd146a 9769
4a00c761 9770 *multi_step_cvt = 0;
ebfd146a
IR
9771 switch (code)
9772 {
9773 CASE_CONVERT:
9774 c1 = VEC_PACK_TRUNC_EXPR;
9775 break;
9776
9777 case FIX_TRUNC_EXPR:
9778 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9779 break;
9780
9781 case FLOAT_EXPR:
9782 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9783 tree code and optabs used for computing the operation. */
9784 return false;
9785
9786 default:
9787 gcc_unreachable ();
9788 }
9789
9790 if (code == FIX_TRUNC_EXPR)
9791 /* The signedness is determined from output operand. */
b690cc0f 9792 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9793 else
9794 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9795
9796 if (!optab1)
9797 return false;
9798
9799 vec_mode = TYPE_MODE (vectype);
947131ba 9800 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9801 return false;
9802
4a00c761
JJ
9803 *code1 = c1;
9804
9805 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9806 /* For scalar masks we may have different boolean
9807 vector types having the same QImode. Thus we
9808 add additional check for elements number. */
9809 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9810 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9811 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9812
ebfd146a
IR
9813 /* Check if it's a multi-step conversion that can be done using intermediate
9814 types. */
4a00c761 9815 prev_mode = vec_mode;
3ae0661a 9816 prev_type = vectype;
4a00c761
JJ
9817 if (code == FIX_TRUNC_EXPR)
9818 uns = TYPE_UNSIGNED (vectype_out);
9819 else
9820 uns = TYPE_UNSIGNED (vectype);
9821
9822 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9823 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9824 costly than signed. */
9825 if (code == FIX_TRUNC_EXPR && uns)
9826 {
9827 enum insn_code icode2;
9828
9829 intermediate_type
9830 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9831 interm_optab
9832 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9833 if (interm_optab != unknown_optab
4a00c761
JJ
9834 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9835 && insn_data[icode1].operand[0].mode
9836 == insn_data[icode2].operand[0].mode)
9837 {
9838 uns = false;
9839 optab1 = interm_optab;
9840 icode1 = icode2;
9841 }
9842 }
ebfd146a 9843
4a00c761
JJ
9844 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9845 intermediate steps in promotion sequence. We try
9846 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9847 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9848 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9849 {
9850 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9851 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9852 {
9853 intermediate_type
9854 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9855 current_vector_size);
9856 if (intermediate_mode != TYPE_MODE (intermediate_type))
9857 return false;
9858 }
9859 else
9860 intermediate_type
9861 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9862 interm_optab
9863 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9864 optab_default);
9865 if (!interm_optab
9866 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9867 || insn_data[icode1].operand[0].mode != intermediate_mode
9868 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9869 == CODE_FOR_nothing))
9870 break;
9871
9771b263 9872 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9873 (*multi_step_cvt)++;
9874
9875 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 9876 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9877 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
9878 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9879
9880 prev_mode = intermediate_mode;
3ae0661a 9881 prev_type = intermediate_type;
4a00c761 9882 optab1 = interm_optab;
ebfd146a
IR
9883 }
9884
9771b263 9885 interm_types->release ();
4a00c761 9886 return false;
ebfd146a 9887}