]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
poly_int: SLP max_units
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
ebfd146a 53
7ee2468b
SB
54/* For lang_hooks.types.type_for_mode. */
55#include "langhooks.h"
ebfd146a 56
2de001ee
RS
57/* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59enum vec_load_store_type {
60 VLS_LOAD,
61 VLS_STORE,
62 VLS_STORE_INVARIANT
63};
64
c3e7ee41
BS
65/* Return the vectorized type for the given statement. */
66
67tree
68stmt_vectype (struct _stmt_vec_info *stmt_info)
69{
70 return STMT_VINFO_VECTYPE (stmt_info);
71}
72
73/* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
75bool
76stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
77{
355fe088 78 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
79 basic_block bb = gimple_bb (stmt);
80 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
81 struct loop* loop;
82
83 if (!loop_vinfo)
84 return false;
85
86 loop = LOOP_VINFO_LOOP (loop_vinfo);
87
88 return (bb->loop_father == loop->inner);
89}
90
91/* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
94
95unsigned
92345349 96record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 97 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 98 int misalign, enum vect_cost_model_location where)
c3e7ee41 99{
cc9fe6bb
JH
100 if ((kind == vector_load || kind == unaligned_load)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
105 kind = vector_scatter_store;
92345349 106 if (body_cost_vec)
c3e7ee41 107 {
92345349 108 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
109 stmt_info_for_cost si = { count, kind,
110 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
111 misalign };
112 body_cost_vec->safe_push (si);
c3e7ee41 113 return (unsigned)
92345349 114 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
115 }
116 else
310213d4
RB
117 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
118 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
119}
120
272c6793
RS
121/* Return a variable of type ELEM_TYPE[NELEMS]. */
122
123static tree
124create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125{
126 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
127 "vect_array");
128}
129
130/* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
134
135static tree
355fe088 136read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
137 tree array, unsigned HOST_WIDE_INT n)
138{
139 tree vect_type, vect, vect_name, array_ref;
355fe088 140 gimple *new_stmt;
272c6793
RS
141
142 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
143 vect_type = TREE_TYPE (TREE_TYPE (array));
144 vect = vect_create_destination_var (scalar_dest, vect_type);
145 array_ref = build4 (ARRAY_REF, vect_type, array,
146 build_int_cst (size_type_node, n),
147 NULL_TREE, NULL_TREE);
148
149 new_stmt = gimple_build_assign (vect, array_ref);
150 vect_name = make_ssa_name (vect, new_stmt);
151 gimple_assign_set_lhs (new_stmt, vect_name);
152 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
153
154 return vect_name;
155}
156
157/* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
160
161static void
355fe088 162write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
163 tree array, unsigned HOST_WIDE_INT n)
164{
165 tree array_ref;
355fe088 166 gimple *new_stmt;
272c6793
RS
167
168 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
171
172 new_stmt = gimple_build_assign (array_ref, vect);
173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
174}
175
176/* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
178 (and its group). */
179
180static tree
44fc7854 181create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 182{
44fc7854 183 tree mem_ref;
272c6793 184
272c6793
RS
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
644ffefd 187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
188 return mem_ref;
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196
197static void
355fe088 198vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 199 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
200{
201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
202 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
203 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 204 gimple *pattern_stmt;
ebfd146a 205
73fbfcad 206 if (dump_enabled_p ())
66c16fd9
RB
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
ebfd146a 212
83197f37
IR
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
ebfd146a
IR
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
97ecdb46
JJ
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
83197f37 223
97ecdb46
JJ
224 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, vect_location,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info = vinfo_for_stmt (pattern_stmt);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 stmt = pattern_stmt;
ebfd146a
IR
235 }
236
237 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
238 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
239 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240
241 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 {
73fbfcad 244 if (dump_enabled_p ())
78c60e3d 245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 246 "already marked relevant/live.\n");
ebfd146a
IR
247 return;
248 }
249
9771b263 250 worklist->safe_push (stmt);
ebfd146a
IR
251}
252
253
b28ead45
AH
254/* Function is_simple_and_all_uses_invariant
255
256 Return true if STMT is simple and all uses of it are invariant. */
257
258bool
259is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
260{
261 tree op;
262 gimple *def_stmt;
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
272 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
400 switch (gimple_call_internal_fn (stmt))
401 {
402 case IFN_MASK_STORE:
403 operand = gimple_call_arg (stmt, 3);
404 if (operand == use)
405 return true;
406 /* FALLTHRU */
407 case IFN_MASK_LOAD:
408 operand = gimple_call_arg (stmt, 2);
409 if (operand == use)
410 return true;
411 break;
412 default:
413 break;
414 }
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
818
b8698a0f 819/* Function vect_model_simple_cost.
ebfd146a 820
b8698a0f 821 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
824
825void
b8698a0f 826vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 827 enum vect_def_type *dt,
4fc5ebf1 828 int ndts,
92345349
BS
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
831{
832 int i;
92345349 833 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
834
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
838
4fc5ebf1
JG
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
841 cost model. */
842 for (i = 0; i < ndts; i++)
92345349 843 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 844 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 845 stmt_info, 0, vect_prologue);
c3e7ee41
BS
846
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
848 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
849 stmt_info, 0, vect_body);
c3e7ee41 850
73fbfcad 851 if (dump_enabled_p ())
78c60e3d
SS
852 dump_printf_loc (MSG_NOTE, vect_location,
853 "vect_model_simple_cost: inside_cost = %d, "
e645e942 854 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
855}
856
857
8bd37302
BS
858/* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
862
863static void
864vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
865 enum vect_def_type *dt, int pwr)
866{
867 int i, tmp;
92345349 868 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
871 void *target_cost_data;
8bd37302
BS
872
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
876
c3e7ee41
BS
877 if (loop_vinfo)
878 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
879 else
880 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
881
8bd37302
BS
882 for (i = 0; i < pwr + 1; i++)
883 {
884 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
885 (i + 1) : i;
c3e7ee41 886 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
887 vec_promote_demote, stmt_info, 0,
888 vect_body);
8bd37302
BS
889 }
890
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i = 0; i < 2; i++)
92345349
BS
893 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
894 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
895 stmt_info, 0, vect_prologue);
8bd37302 896
73fbfcad 897 if (dump_enabled_p ())
78c60e3d
SS
898 dump_printf_loc (MSG_NOTE, vect_location,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 900 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
901}
902
ebfd146a
IR
903/* Function vect_model_store_cost
904
0d0293ac
MM
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
907
908void
b8698a0f 909vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
910 vect_memory_access_type memory_access_type,
911 enum vect_def_type dt, slp_tree slp_node,
92345349
BS
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
ebfd146a 914{
92345349 915 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
918 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 919
8644a673 920 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
921 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
922 stmt_info, 0, vect_prologue);
ebfd146a 923
892a981f
RS
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node && grouped_access_p)
720f5239 927 {
892a981f
RS
928 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
929 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 930 }
ebfd146a 931
892a981f
RS
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
936
272c6793 937 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 939 access is instead being provided by a permute-and-store operation,
2de001ee
RS
940 include the cost of the permutes. */
941 if (first_stmt_p
942 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 943 {
e1377713
ES
944 /* Uses a high and low interleave or shuffle operations for each
945 needed permute. */
892a981f 946 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 947 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
ebfd146a 950
73fbfcad 951 if (dump_enabled_p ())
78c60e3d 952 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 953 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 954 group_size);
ebfd146a
IR
955 }
956
cee62fee 957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 958 /* Costs of the stores. */
067bc855
RB
959 if (memory_access_type == VMAT_ELEMENTWISE
960 || memory_access_type == VMAT_GATHER_SCATTER)
2de001ee
RS
961 /* N scalar stores plus extracting the elements. */
962 inside_cost += record_stmt_cost (body_cost_vec,
963 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
964 scalar_store, stmt_info, 0, vect_body);
f2e2a985 965 else
892a981f 966 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 967
2de001ee
RS
968 if (memory_access_type == VMAT_ELEMENTWISE
969 || memory_access_type == VMAT_STRIDED_SLP)
cee62fee
MM
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
972 vec_to_scalar, stmt_info, 0, vect_body);
973
73fbfcad 974 if (dump_enabled_p ())
78c60e3d
SS
975 dump_printf_loc (MSG_NOTE, vect_location,
976 "vect_model_store_cost: inside_cost = %d, "
e645e942 977 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
978}
979
980
720f5239
IR
981/* Calculate cost of DR's memory access. */
982void
983vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 984 unsigned int *inside_cost,
92345349 985 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
986{
987 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 988 gimple *stmt = DR_STMT (dr);
c3e7ee41 989 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
990
991 switch (alignment_support_scheme)
992 {
993 case dr_aligned:
994 {
92345349
BS
995 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 vector_store, stmt_info, 0,
997 vect_body);
720f5239 998
73fbfcad 999 if (dump_enabled_p ())
78c60e3d 1000 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1001 "vect_model_store_cost: aligned.\n");
720f5239
IR
1002 break;
1003 }
1004
1005 case dr_unaligned_supported:
1006 {
720f5239 1007 /* Here, we assign an additional cost for the unaligned store. */
92345349 1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1009 unaligned_store, stmt_info,
92345349 1010 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1011 if (dump_enabled_p ())
78c60e3d
SS
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: unaligned supported by "
e645e942 1014 "hardware.\n");
720f5239
IR
1015 break;
1016 }
1017
38eec4c6
UW
1018 case dr_unaligned_unsupported:
1019 {
1020 *inside_cost = VECT_MAX_COST;
1021
73fbfcad 1022 if (dump_enabled_p ())
78c60e3d 1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1024 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1025 break;
1026 }
1027
720f5239
IR
1028 default:
1029 gcc_unreachable ();
1030 }
1031}
1032
1033
ebfd146a
IR
1034/* Function vect_model_load_cost
1035
892a981f
RS
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1038 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1039 access scheme chosen. */
1040
1041void
92345349 1042vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1043 vect_memory_access_type memory_access_type,
1044 slp_tree slp_node,
92345349
BS
1045 stmt_vector_for_cost *prologue_cost_vec,
1046 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1047{
892a981f
RS
1048 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1049 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1050 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1051 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1052
892a981f
RS
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node && grouped_access_p)
ebfd146a 1056 {
892a981f
RS
1057 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1058 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1059 }
1060
892a981f
RS
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065
272c6793 1066 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1068 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1069 include the cost of the permutes. */
1070 if (first_stmt_p
1071 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1072 {
2c23db6d
ES
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
892a981f 1075 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1076 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1077 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1078 stmt_info, 0, vect_body);
ebfd146a 1079
73fbfcad 1080 if (dump_enabled_p ())
e645e942
TJ
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1083 group_size);
ebfd146a
IR
1084 }
1085
1086 /* The loads themselves. */
067bc855
RB
1087 if (memory_access_type == VMAT_ELEMENTWISE
1088 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1089 {
a21892ad
BS
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1092 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1093 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1094 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1095 }
1096 else
892a981f 1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
720f5239 1104
73fbfcad 1105 if (dump_enabled_p ())
78c60e3d
SS
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
e645e942 1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1109}
1110
1111
1112/* Calculate cost of DR's memory access. */
1113void
1114vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1115 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
720f5239
IR
1120{
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1122 gimple *stmt = DR_STMT (dr);
c3e7ee41 1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1124
1125 switch (alignment_support_scheme)
ebfd146a
IR
1126 {
1127 case dr_aligned:
1128 {
92345349
BS
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
ebfd146a 1131
73fbfcad 1132 if (dump_enabled_p ())
78c60e3d 1133 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1134 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1135
1136 break;
1137 }
1138 case dr_unaligned_supported:
1139 {
720f5239 1140 /* Here, we assign an additional cost for the unaligned load. */
92345349 1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1142 unaligned_load, stmt_info,
92345349 1143 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1144
73fbfcad 1145 if (dump_enabled_p ())
78c60e3d
SS
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
e645e942 1148 "hardware.\n");
ebfd146a
IR
1149
1150 break;
1151 }
1152 case dr_explicit_realign:
1153 {
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1158
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
92345349 1161 prologue costs. */
ebfd146a 1162 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
ebfd146a 1165
73fbfcad 1166 if (dump_enabled_p ())
e645e942
TJ
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
8bd37302 1169
ebfd146a
IR
1170 break;
1171 }
1172 case dr_explicit_realign_optimized:
1173 {
73fbfcad 1174 if (dump_enabled_p ())
e645e942 1175 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1176 "vect_model_load_cost: unaligned software "
e645e942 1177 "pipelined.\n");
ebfd146a
IR
1178
1179 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1180 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1181 if this is an access in a group of loads, which provide grouped
ebfd146a 1182 access, then the above cost should only be considered for one
ff802fa1 1183 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1184 and a realignment op. */
1185
92345349 1186 if (add_realign_cost && record_prologue_costs)
ebfd146a 1187 {
92345349
BS
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
ebfd146a 1191 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
ebfd146a
IR
1195 }
1196
92345349
BS
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
8bd37302 1201
73fbfcad 1202 if (dump_enabled_p ())
78c60e3d 1203 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
8bd37302 1206
ebfd146a
IR
1207 break;
1208 }
1209
38eec4c6
UW
1210 case dr_unaligned_unsupported:
1211 {
1212 *inside_cost = VECT_MAX_COST;
1213
73fbfcad 1214 if (dump_enabled_p ())
78c60e3d 1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1216 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1217 break;
1218 }
1219
ebfd146a
IR
1220 default:
1221 gcc_unreachable ();
1222 }
ebfd146a
IR
1223}
1224
418b7df3
RG
1225/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1227
418b7df3 1228static void
355fe088 1229vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1230{
ebfd146a 1231 if (gsi)
418b7df3 1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1233 else
1234 {
418b7df3 1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1237
a70d6342
IR
1238 if (loop_vinfo)
1239 {
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1241 basic_block new_bb;
1242 edge pe;
a70d6342
IR
1243
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
b8698a0f 1246
a70d6342 1247 pe = loop_preheader_edge (loop);
418b7df3 1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1249 gcc_assert (!new_bb);
1250 }
1251 else
1252 {
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1256
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1259 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1261 }
ebfd146a
IR
1262 }
1263
73fbfcad 1264 if (dump_enabled_p ())
ebfd146a 1265 {
78c60e3d
SS
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1269 }
418b7df3
RG
1270}
1271
1272/* Function vect_init_vector.
ebfd146a 1273
5467ee52
RG
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
418b7df3
RG
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1281
1282tree
355fe088 1283vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1284{
355fe088 1285 gimple *init_stmt;
418b7df3
RG
1286 tree new_temp;
1287
e412ece4
RB
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1290 {
e412ece4
RB
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1293 {
5a308cf1
IE
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1297 {
b3d51f23
IE
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1300
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1304 {
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1310 }
1311 }
1312 else if (CONSTANT_CLASS_P (val))
42fd8198 1313 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1314 else
1315 {
b731b390 1316 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1324 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1325 val = new_temp;
418b7df3
RG
1326 }
1327 }
5467ee52 1328 val = build_vector_from_val (type, val);
418b7df3
RG
1329 }
1330
0e22bb5a
RB
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1333 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1334 return new_temp;
ebfd146a
IR
1335}
1336
c83a894c 1337/* Function vect_get_vec_def_for_operand_1.
a70d6342 1338
c83a894c
AH
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1341
1342tree
c83a894c 1343vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1344{
1345 tree vec_oprnd;
355fe088 1346 gimple *vec_stmt;
ebfd146a 1347 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1348
1349 switch (dt)
1350 {
81c40241 1351 /* operand is a constant or a loop invariant. */
ebfd146a 1352 case vect_constant_def:
81c40241 1353 case vect_external_def:
c83a894c
AH
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
ebfd146a 1356
81c40241 1357 /* operand is defined inside the loop. */
8644a673 1358 case vect_internal_def:
ebfd146a 1359 {
ebfd146a
IR
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1362
ebfd146a 1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1378 }
1379
c78e3652 1380 /* operand is defined by a loop header phi. */
ebfd146a 1381 case vect_reduction_def:
06066f92 1382 case vect_double_reduction_def:
7c5222ff 1383 case vect_nested_cycle:
ebfd146a
IR
1384 case vect_induction_def:
1385 {
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1387
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1395 return vec_oprnd;
1396 }
1397
1398 default:
1399 gcc_unreachable ();
1400 }
1401}
1402
1403
c83a894c
AH
1404/* Function vect_get_vec_def_for_operand.
1405
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1408
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1415
1416tree
1417vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1418{
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1424
1425 if (dump_enabled_p ())
1426 {
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1431 }
1432
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1436 {
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 }
1440
1441 if (dt == vect_constant_def || dt == vect_external_def)
1442 {
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1445
1446 if (vectype)
1447 vector_type = vectype;
2568d8a1 1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1453
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1456 }
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459}
1460
1461
ebfd146a
IR
1462/* Function vect_get_vec_def_for_stmt_copy
1463
ff802fa1 1464 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1467 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1469 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1470 DT is the type of the vector def VEC_OPRND.
1471
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1475 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1476 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1480 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1483
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1485
ebfd146a
IR
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1489 VS1.3: vx.3 = memref3
ebfd146a
IR
1490
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1495
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
b8698a0f
L
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1500 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1501 returns the vector-def 'vx.0'.
1502
b8698a0f
L
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507
b8698a0f
L
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517
1518tree
1519vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520{
355fe088 1521 gimple *vec_stmt_for_operand;
ebfd146a
IR
1522 stmt_vec_info def_stmt_info;
1523
1524 /* Do nothing; can reuse same def. */
8644a673 1525 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1526 return vec_oprnd;
1527
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1538}
1539
1540
1541/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1543
c78e3652 1544void
b8698a0f 1545vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
ebfd146a 1548{
9771b263 1549 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1550
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1552 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1553
9771b263 1554 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1555 {
9771b263 1556 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1558 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1559 }
1560}
1561
1562
c78e3652 1563/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1564
c78e3652 1565void
355fe088 1566vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
306b0c92 1569 slp_tree slp_node)
ebfd146a
IR
1570{
1571 if (slp_node)
d092494c
IR
1572 {
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1576
9771b263 1577 ops.quick_push (op0);
d092494c 1578 if (op1)
9771b263 1579 ops.quick_push (op1);
d092494c 1580
306b0c92 1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1582
37b5ec8f 1583 *vec_oprnds0 = vec_defs[0];
d092494c 1584 if (op1)
37b5ec8f 1585 *vec_oprnds1 = vec_defs[1];
d092494c 1586 }
ebfd146a
IR
1587 else
1588 {
1589 tree vec_oprnd;
1590
9771b263 1591 vec_oprnds0->create (1);
81c40241 1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1593 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1594
1595 if (op1)
1596 {
9771b263 1597 vec_oprnds1->create (1);
81c40241 1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1599 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1600 }
1601 }
1602}
1603
1604
1605/* Function vect_finish_stmt_generation.
1606
1607 Insert a new stmt. */
1608
1609void
355fe088 1610vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1611 gimple_stmt_iterator *gsi)
1612{
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1614 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1615
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617
54e8e2c3
RG
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1620 {
355fe088 1621 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 {
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 {
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1641 }
1642 }
1643 }
ebfd146a
IR
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645
310213d4 1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1647
73fbfcad 1648 if (dump_enabled_p ())
ebfd146a 1649 {
78c60e3d
SS
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1652 }
1653
ad885386 1654 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1655
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1662}
1663
70439f0d
RS
1664/* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1668
70439f0d
RS
1669static internal_fn
1670vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
ebfd146a 1672{
70439f0d
RS
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 {
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1682 {
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1687 return ifn;
1688 }
1689 }
1690 return IFN_LAST;
ebfd146a
IR
1691}
1692
5ce9450f 1693
355fe088 1694static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1695 gimple_stmt_iterator *);
1696
62da9e14
RS
1697/* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700
1701static int
1702compare_step_with_zero (gimple *stmt)
1703{
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
62da9e14
RS
1708}
1709
1710/* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1712
1713static tree
1714perm_mask_for_reverse (tree vectype)
1715{
1716 int i, nunits;
62da9e14
RS
1717
1718 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1719
d980067b
RS
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder sel (nunits, 1, 3);
1722 for (i = 0; i < 3; ++i)
908a1a16 1723 sel.quick_push (nunits - 1 - i);
62da9e14 1724
e3342de4
RS
1725 vec_perm_indices indices (sel, 1, nunits);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1727 return NULL_TREE;
e3342de4 1728 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1729}
5ce9450f 1730
2de001ee
RS
1731/* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1733 or store.
1734
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1739
1740static bool
1741get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1742 vec_load_store_type vls_type,
1743 vect_memory_access_type *memory_access_type)
1744{
1745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1746 vec_info *vinfo = stmt_info->vinfo;
1747 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1748 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1749 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1750 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1751 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1752 bool single_element_p = (stmt == first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info));
1754 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
522fcdd7 1755 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1756
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p = false;
1760
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1764
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1768
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1771
1772 if (slp)
1773 {
1774 if (STMT_VINFO_STRIDED_P (stmt_info))
1775 {
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits % group_size == 0)
1780 *memory_access_type = VMAT_STRIDED_SLP;
1781 else
1782 *memory_access_type = VMAT_ELEMENTWISE;
1783 }
1784 else
1785 {
1786 overrun_p = loop_vinfo && gap != 0;
1787 if (overrun_p && vls_type != VLS_LOAD)
1788 {
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1792 return false;
1793 }
f702e7d4
RS
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
f9ef2c76 1798 if (overrun_p
f702e7d4
RS
1799 && gap < (vect_known_alignment_in_bytes (first_dr)
1800 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1801 overrun_p = false;
2de001ee
RS
1802 if (overrun_p && !can_overrun_p)
1803 {
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "Peeling for outer loop is not supported\n");
1807 return false;
1808 }
1809 *memory_access_type = VMAT_CONTIGUOUS;
1810 }
1811 }
1812 else
1813 {
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type = VMAT_ELEMENTWISE;
1817
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
f9ef2c76 1825 if (would_overrun_p
f702e7d4
RS
1826 && gap < (vect_known_alignment_in_bytes (first_dr)
1827 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1828 would_overrun_p = false;
f702e7d4 1829
2de001ee 1830 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1831 && (can_overrun_p || !would_overrun_p)
1832 && compare_step_with_zero (stmt) > 0)
2de001ee
RS
1833 {
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type == VLS_LOAD
1836 ? vect_load_lanes_supported (vectype, group_size)
1837 : vect_store_lanes_supported (vectype, group_size))
1838 {
1839 *memory_access_type = VMAT_LOAD_STORE_LANES;
1840 overrun_p = would_overrun_p;
1841 }
1842
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type == VMAT_ELEMENTWISE
1845 && (vls_type == VLS_LOAD
1846 ? vect_grouped_load_supported (vectype, single_element_p,
1847 group_size)
1848 : vect_grouped_store_supported (vectype, group_size)))
1849 {
1850 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1851 overrun_p = would_overrun_p;
1852 }
1853 }
1854 }
1855
1856 if (vls_type != VLS_LOAD && first_stmt == stmt)
1857 {
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1861 while (next_stmt)
1862 {
1863 gcc_assert (gimple_assign_single_p (next_stmt));
1864 tree op = gimple_assign_rhs1 (next_stmt);
1865 gimple *def_stmt;
1866 enum vect_def_type dt;
1867 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1868 {
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "use not simple.\n");
1872 return false;
1873 }
1874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1875 }
1876 }
1877
1878 if (overrun_p)
1879 {
1880 gcc_assert (can_overrun_p);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1883 "Data access with gaps requires scalar "
1884 "epilogue loop\n");
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1886 }
1887
1888 return true;
1889}
1890
62da9e14
RS
1891/* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1894
1895static vect_memory_access_type
1896get_negative_load_store_type (gimple *stmt, tree vectype,
1897 vec_load_store_type vls_type,
1898 unsigned int ncopies)
1899{
1900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1901 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1902 dr_alignment_support alignment_support_scheme;
1903
1904 if (ncopies > 1)
1905 {
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE;
1910 }
1911
1912 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1913 if (alignment_support_scheme != dr_aligned
1914 && alignment_support_scheme != dr_unaligned_supported)
1915 {
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE;
1920 }
1921
1922 if (vls_type == VLS_STORE_INVARIANT)
1923 {
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE, vect_location,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN;
1929 }
1930
1931 if (!perm_mask_for_reverse (vectype))
1932 {
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE;
1937 }
1938
1939 return VMAT_CONTIGUOUS_REVERSE;
1940}
1941
2de001ee
RS
1942/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1946
1947 SLP says whether we're performing SLP rather than loop vectorization.
62da9e14
RS
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
1950
1951static bool
1952get_load_store_type (gimple *stmt, tree vectype, bool slp,
62da9e14 1953 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
1954 vect_memory_access_type *memory_access_type,
1955 gather_scatter_info *gs_info)
1956{
1957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1958 vec_info *vinfo = stmt_info->vinfo;
1959 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1961 {
1962 *memory_access_type = VMAT_GATHER_SCATTER;
1963 gimple *def_stmt;
1964 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1965 gcc_unreachable ();
1966 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1967 &gs_info->offset_dt,
1968 &gs_info->offset_vectype))
1969 {
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "%s index use not simple.\n",
1973 vls_type == VLS_LOAD ? "gather" : "scatter");
1974 return false;
1975 }
1976 }
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1978 {
1979 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1980 memory_access_type))
1981 return false;
1982 }
1983 else if (STMT_VINFO_STRIDED_P (stmt_info))
1984 {
1985 gcc_assert (!slp);
1986 *memory_access_type = VMAT_ELEMENTWISE;
1987 }
1988 else
62da9e14
RS
1989 {
1990 int cmp = compare_step_with_zero (stmt);
1991 if (cmp < 0)
1992 *memory_access_type = get_negative_load_store_type
1993 (stmt, vectype, vls_type, ncopies);
1994 else if (cmp == 0)
1995 {
1996 gcc_assert (vls_type == VLS_LOAD);
1997 *memory_access_type = VMAT_INVARIANT;
1998 }
1999 else
2000 *memory_access_type = VMAT_CONTIGUOUS;
2001 }
2de001ee
RS
2002
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type == VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info))
2008 {
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "not falling back to elementwise accesses\n");
2012 return false;
2013 }
2014 return true;
2015}
2016
5ce9450f
JJ
2017/* Function vectorizable_mask_load_store.
2018
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2023
2024static bool
355fe088
TS
2025vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2026 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
2027{
2028 tree vec_dest = NULL;
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 stmt_vec_info prev_stmt_info;
2031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2032 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2033 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2034 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 2036 tree rhs_vectype = NULL_TREE;
045c1278 2037 tree mask_vectype;
5ce9450f 2038 tree elem_type;
355fe088 2039 gimple *new_stmt;
5ce9450f
JJ
2040 tree dummy;
2041 tree dataref_ptr = NULL_TREE;
355fe088 2042 gimple *ptr_incr;
5ce9450f
JJ
2043 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2044 int ncopies;
2045 int i, j;
2046 bool inv_p;
134c85ca 2047 gather_scatter_info gs_info;
2de001ee 2048 vec_load_store_type vls_type;
5ce9450f 2049 tree mask;
355fe088 2050 gimple *def_stmt;
5ce9450f
JJ
2051 enum vect_def_type dt;
2052
2053 if (slp_node != NULL)
2054 return false;
2055
e8f142e2 2056 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5ce9450f
JJ
2057 gcc_assert (ncopies >= 1);
2058
5ce9450f 2059 mask = gimple_call_arg (stmt, 2);
045c1278 2060
2568d8a1 2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
5ce9450f
JJ
2062 return false;
2063
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop && ncopies > 1)
2066 {
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "multiple types in nested loop.");
2070 return false;
2071 }
2072
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2074 return false;
2075
66c16fd9
RB
2076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2077 && ! vec_stmt)
5ce9450f
JJ
2078 return false;
2079
2080 if (!STMT_VINFO_DATA_REF (stmt_info))
2081 return false;
2082
2083 elem_type = TREE_TYPE (vectype);
2084
045c1278
IE
2085 if (TREE_CODE (mask) != SSA_NAME)
2086 return false;
2087
2088 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2089 return false;
2090
2091 if (!mask_vectype)
2092 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2093
dc6a3147
IE
2094 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
045c1278
IE
2096 return false;
2097
2de001ee 2098 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
57e2f6ad
IE
2099 {
2100 tree rhs = gimple_call_arg (stmt, 3);
2101 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2102 return false;
2de001ee
RS
2103 if (dt == vect_constant_def || dt == vect_external_def)
2104 vls_type = VLS_STORE_INVARIANT;
2105 else
2106 vls_type = VLS_STORE;
57e2f6ad 2107 }
2de001ee
RS
2108 else
2109 vls_type = VLS_LOAD;
57e2f6ad 2110
2de001ee 2111 vect_memory_access_type memory_access_type;
62da9e14 2112 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2de001ee
RS
2113 &memory_access_type, &gs_info))
2114 return false;
03b9e8e4 2115
2de001ee
RS
2116 if (memory_access_type == VMAT_GATHER_SCATTER)
2117 {
134c85ca 2118 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
03b9e8e4
JJ
2119 tree masktype
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2121 if (TREE_CODE (masktype) == INTEGER_TYPE)
2122 {
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "masked gather with integer mask not supported.");
2126 return false;
2127 }
5ce9450f 2128 }
2de001ee
RS
2129 else if (memory_access_type != VMAT_CONTIGUOUS)
2130 {
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "unsupported access type for masked %s.\n",
2134 vls_type == VLS_LOAD ? "load" : "store");
2135 return false;
2136 }
5ce9450f 2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2139 TYPE_MODE (mask_vectype),
2de001ee 2140 vls_type == VLS_LOAD)
57e2f6ad
IE
2141 || (rhs_vectype
2142 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
2143 return false;
2144
5ce9450f
JJ
2145 if (!vec_stmt) /* transformation not required. */
2146 {
2de001ee 2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5ce9450f 2148 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2de001ee
RS
2149 if (vls_type == VLS_LOAD)
2150 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2151 NULL, NULL, NULL);
5ce9450f 2152 else
2de001ee
RS
2153 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2154 dt, NULL, NULL, NULL);
5ce9450f
JJ
2155 return true;
2156 }
2de001ee 2157 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5ce9450f 2158
67b8dbac 2159 /* Transform. */
5ce9450f 2160
2de001ee 2161 if (memory_access_type == VMAT_GATHER_SCATTER)
5ce9450f
JJ
2162 {
2163 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 2164 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5ce9450f 2165 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 2166 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 2167 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 2168 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
2169 edge pe = loop_preheader_edge (loop);
2170 gimple_seq seq;
2171 basic_block new_bb;
2172 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 2173 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5ce9450f 2174
134c85ca 2175 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
acdcd61b
JJ
2176 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2177 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2178 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2179 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2180 scaletype = TREE_VALUE (arglist);
2181 gcc_checking_assert (types_compatible_p (srctype, rettype)
2182 && types_compatible_p (srctype, masktype));
2183
5ce9450f
JJ
2184 if (nunits == gather_off_nunits)
2185 modifier = NONE;
2186 else if (nunits == gather_off_nunits / 2)
2187 {
5ce9450f
JJ
2188 modifier = WIDEN;
2189
e3342de4 2190 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
5ce9450f 2191 for (i = 0; i < gather_off_nunits; ++i)
908a1a16 2192 sel.quick_push (i | nunits);
5ce9450f 2193
e3342de4
RS
2194 vec_perm_indices indices (sel, 1, gather_off_nunits);
2195 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
2196 indices);
5ce9450f
JJ
2197 }
2198 else if (nunits == gather_off_nunits * 2)
2199 {
5ce9450f
JJ
2200 modifier = NARROW;
2201
e3342de4 2202 vec_perm_builder sel (nunits, nunits, 1);
908a1a16 2203 sel.quick_grow (nunits);
5ce9450f
JJ
2204 for (i = 0; i < nunits; ++i)
2205 sel[i] = i < gather_off_nunits
2206 ? i : i + nunits - gather_off_nunits;
e3342de4
RS
2207 vec_perm_indices indices (sel, 2, nunits);
2208 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5ce9450f 2209
5ce9450f 2210 ncopies *= 2;
e3342de4 2211
acdcd61b
JJ
2212 for (i = 0; i < nunits; ++i)
2213 sel[i] = i | gather_off_nunits;
e3342de4
RS
2214 indices.new_vector (sel, 2, gather_off_nunits);
2215 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
5ce9450f
JJ
2216 }
2217 else
2218 gcc_unreachable ();
2219
5ce9450f
JJ
2220 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2221
134c85ca 2222 ptr = fold_convert (ptrtype, gs_info.base);
5ce9450f
JJ
2223 if (!is_gimple_min_invariant (ptr))
2224 {
2225 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2226 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2227 gcc_assert (!new_bb);
2228 }
2229
134c85ca 2230 scale = build_int_cst (scaletype, gs_info.scale);
5ce9450f
JJ
2231
2232 prev_stmt_info = NULL;
2233 for (j = 0; j < ncopies; ++j)
2234 {
2235 if (modifier == WIDEN && (j & 1))
2236 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2237 perm_mask, stmt, gsi);
2238 else if (j == 0)
2239 op = vec_oprnd0
134c85ca 2240 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5ce9450f
JJ
2241 else
2242 op = vec_oprnd0
134c85ca 2243 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
5ce9450f
JJ
2244
2245 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2246 {
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2248 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 2249 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
2250 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2251 new_stmt
0d0e4a03 2252 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2253 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2254 op = var;
2255 }
2256
acdcd61b
JJ
2257 if (mask_perm_mask && (j & 1))
2258 mask_op = permute_vec_elements (mask_op, mask_op,
2259 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
2260 else
2261 {
acdcd61b 2262 if (j == 0)
81c40241 2263 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
2264 else
2265 {
81c40241 2266 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
2267 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2268 }
5ce9450f 2269
acdcd61b
JJ
2270 mask_op = vec_mask;
2271 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2272 {
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2274 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 2275 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
2276 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2277 new_stmt
0d0e4a03 2278 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
2279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2280 mask_op = var;
2281 }
5ce9450f
JJ
2282 }
2283
2284 new_stmt
134c85ca 2285 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
5ce9450f
JJ
2286 scale);
2287
2288 if (!useless_type_conversion_p (vectype, rettype))
2289 {
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2291 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 2292 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
2293 gimple_call_set_lhs (new_stmt, op);
2294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 2295 var = make_ssa_name (vec_dest);
5ce9450f 2296 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 2297 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
2298 }
2299 else
2300 {
2301 var = make_ssa_name (vec_dest, new_stmt);
2302 gimple_call_set_lhs (new_stmt, var);
2303 }
2304
2305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2306
2307 if (modifier == NARROW)
2308 {
2309 if ((j & 1) == 0)
2310 {
2311 prev_res = var;
2312 continue;
2313 }
2314 var = permute_vec_elements (prev_res, var,
2315 perm_mask, stmt, gsi);
2316 new_stmt = SSA_NAME_DEF_STMT (var);
2317 }
2318
2319 if (prev_stmt_info == NULL)
2320 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2321 else
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2323 prev_stmt_info = vinfo_for_stmt (new_stmt);
2324 }
3efe2e2c
JJ
2325
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2327 from the IL. */
e6f5c25d
IE
2328 if (STMT_VINFO_RELATED_STMT (stmt_info))
2329 {
2330 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2331 stmt_info = vinfo_for_stmt (stmt);
2332 }
3efe2e2c
JJ
2333 tree lhs = gimple_call_lhs (stmt);
2334 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2335 set_vinfo_for_stmt (new_stmt, stmt_info);
2336 set_vinfo_for_stmt (stmt, NULL);
2337 STMT_VINFO_STMT (stmt_info) = new_stmt;
2338 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2339 return true;
2340 }
2de001ee 2341 else if (vls_type != VLS_LOAD)
5ce9450f
JJ
2342 {
2343 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2344 prev_stmt_info = NULL;
2d4dc223 2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2346 for (i = 0; i < ncopies; i++)
2347 {
2348 unsigned align, misalign;
2349
2350 if (i == 0)
2351 {
2352 tree rhs = gimple_call_arg (stmt, 3);
81c40241 2353 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
7251b0bf
RS
2354 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2355 mask_vectype);
5ce9450f
JJ
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype,
2358 TREE_TYPE (vec_rhs)));
2359 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2360 NULL_TREE, &dummy, gsi,
2361 &ptr_incr, false, &inv_p);
2362 gcc_assert (!inv_p);
2363 }
2364 else
2365 {
81c40241 2366 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2367 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2368 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2369 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2370 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2371 TYPE_SIZE_UNIT (vectype));
2372 }
2373
f702e7d4 2374 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2375 if (aligned_access_p (dr))
2376 misalign = 0;
2377 else if (DR_MISALIGNMENT (dr) == -1)
2378 {
2379 align = TYPE_ALIGN_UNIT (elem_type);
2380 misalign = 0;
2381 }
2382 else
2383 misalign = DR_MISALIGNMENT (dr);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2385 misalign);
08554c26 2386 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2387 misalign ? least_bit_hwi (misalign) : align);
a844293d 2388 gcall *call
5ce9450f 2389 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2390 ptr, vec_mask, vec_rhs);
a844293d
RS
2391 gimple_call_set_nothrow (call, true);
2392 new_stmt = call;
5ce9450f
JJ
2393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2394 if (i == 0)
2395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2396 else
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2398 prev_stmt_info = vinfo_for_stmt (new_stmt);
2399 }
2400 }
2401 else
2402 {
2403 tree vec_mask = NULL_TREE;
2404 prev_stmt_info = NULL;
2405 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2406 for (i = 0; i < ncopies; i++)
2407 {
2408 unsigned align, misalign;
2409
2410 if (i == 0)
2411 {
7251b0bf
RS
2412 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
2413 mask_vectype);
5ce9450f
JJ
2414 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2415 NULL_TREE, &dummy, gsi,
2416 &ptr_incr, false, &inv_p);
2417 gcc_assert (!inv_p);
2418 }
2419 else
2420 {
81c40241 2421 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2422 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2423 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2424 TYPE_SIZE_UNIT (vectype));
2425 }
2426
f702e7d4 2427 align = DR_TARGET_ALIGNMENT (dr);
5ce9450f
JJ
2428 if (aligned_access_p (dr))
2429 misalign = 0;
2430 else if (DR_MISALIGNMENT (dr) == -1)
2431 {
2432 align = TYPE_ALIGN_UNIT (elem_type);
2433 misalign = 0;
2434 }
2435 else
2436 misalign = DR_MISALIGNMENT (dr);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2438 misalign);
08554c26 2439 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
146ec50f 2440 misalign ? least_bit_hwi (misalign) : align);
a844293d 2441 gcall *call
5ce9450f 2442 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2443 ptr, vec_mask);
a844293d
RS
2444 gimple_call_set_lhs (call, make_ssa_name (vec_dest));
2445 gimple_call_set_nothrow (call, true);
2446 vect_finish_stmt_generation (stmt, call, gsi);
5ce9450f 2447 if (i == 0)
a844293d 2448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
5ce9450f 2449 else
a844293d
RS
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
2451 prev_stmt_info = vinfo_for_stmt (call);
5ce9450f
JJ
2452 }
2453 }
2454
2de001ee 2455 if (vls_type == VLS_LOAD)
3efe2e2c
JJ
2456 {
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2458 from the IL. */
e6f5c25d
IE
2459 if (STMT_VINFO_RELATED_STMT (stmt_info))
2460 {
2461 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2462 stmt_info = vinfo_for_stmt (stmt);
2463 }
3efe2e2c
JJ
2464 tree lhs = gimple_call_lhs (stmt);
2465 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2466 set_vinfo_for_stmt (new_stmt, stmt_info);
2467 set_vinfo_for_stmt (stmt, NULL);
2468 STMT_VINFO_STMT (stmt_info) = new_stmt;
2469 gsi_replace (gsi, new_stmt, true);
2470 }
2471
5ce9450f
JJ
2472 return true;
2473}
2474
37b14185
RB
2475/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2476
2477static bool
2478vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2479 gimple **vec_stmt, slp_tree slp_node,
2480 tree vectype_in, enum vect_def_type *dt)
2481{
2482 tree op, vectype;
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2485 unsigned ncopies, nunits;
2486
2487 op = gimple_call_arg (stmt, 0);
2488 vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2490
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2493 case of SLP. */
2494 if (slp_node)
2495 ncopies = 1;
2496 else
e8f142e2 2497 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2498
2499 gcc_assert (ncopies >= 1);
2500
2501 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2502 if (! char_vectype)
2503 return false;
2504
794e3180 2505 unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
794e3180 2506 unsigned word_bytes = num_bytes / nunits;
908a1a16 2507
d980067b
RS
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder elts (num_bytes, word_bytes, 3);
2510 for (unsigned i = 0; i < 3; ++i)
37b14185 2511 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2512 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2513
e3342de4
RS
2514 vec_perm_indices indices (elts, 1, num_bytes);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2516 return false;
2517
2518 if (! vec_stmt)
2519 {
2520 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2523 "\n");
2524 if (! PURE_SLP_STMT (stmt_info))
2525 {
2526 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2527 1, vector_stmt, stmt_info, 0, vect_prologue);
2528 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2529 ncopies, vec_perm, stmt_info, 0, vect_body);
2530 }
2531 return true;
2532 }
2533
736d0f28 2534 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2535
2536 /* Transform. */
2537 vec<tree> vec_oprnds = vNULL;
2538 gimple *new_stmt = NULL;
2539 stmt_vec_info prev_stmt_info = NULL;
2540 for (unsigned j = 0; j < ncopies; j++)
2541 {
2542 /* Handle uses. */
2543 if (j == 0)
306b0c92 2544 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2545 else
2546 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2547
2548 /* Arguments are ready. create the new vector stmt. */
2549 unsigned i;
2550 tree vop;
2551 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2552 {
2553 tree tem = make_ssa_name (char_vectype);
2554 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2555 char_vectype, vop));
2556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2557 tree tem2 = make_ssa_name (char_vectype);
2558 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2559 tem, tem, bswap_vconst);
2560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2561 tem = make_ssa_name (vectype);
2562 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2563 vectype, tem2));
2564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 if (slp_node)
2566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2567 }
2568
2569 if (slp_node)
2570 continue;
2571
2572 if (j == 0)
2573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2574 else
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2576
2577 prev_stmt_info = vinfo_for_stmt (new_stmt);
2578 }
2579
2580 vec_oprnds.release ();
2581 return true;
2582}
2583
b1b6836e
RS
2584/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2587 *CONVERT_CODE. */
2588
2589static bool
2590simple_integer_narrowing (tree vectype_out, tree vectype_in,
2591 tree_code *convert_code)
2592{
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2595 return false;
2596
2597 tree_code code;
2598 int multi_step_cvt = 0;
2599 auto_vec <tree, 8> interm_types;
2600 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2601 &code, &multi_step_cvt,
2602 &interm_types)
2603 || multi_step_cvt)
2604 return false;
2605
2606 *convert_code = code;
2607 return true;
2608}
5ce9450f 2609
ebfd146a
IR
2610/* Function vectorizable_call.
2611
538dd0b7 2612 Check if GS performs a function call that can be vectorized.
b8698a0f 2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2616
2617static bool
355fe088 2618vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2619 slp_tree slp_node)
ebfd146a 2620{
538dd0b7 2621 gcall *stmt;
ebfd146a
IR
2622 tree vec_dest;
2623 tree scalar_dest;
2624 tree op, type;
2625 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2626 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2627 tree vectype_out, vectype_in;
2628 int nunits_in;
2629 int nunits_out;
2630 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2632 vec_info *vinfo = stmt_info->vinfo;
81c40241 2633 tree fndecl, new_temp, rhs_type;
355fe088 2634 gimple *def_stmt;
0502fb85
UB
2635 enum vect_def_type dt[3]
2636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2637 int ndts = 3;
355fe088 2638 gimple *new_stmt = NULL;
ebfd146a 2639 int ncopies, j;
6e1aa848 2640 vec<tree> vargs = vNULL;
ebfd146a
IR
2641 enum { NARROW, NONE, WIDEN } modifier;
2642 size_t i, nargs;
9d5e7640 2643 tree lhs;
ebfd146a 2644
190c2236 2645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2646 return false;
2647
66c16fd9
RB
2648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2649 && ! vec_stmt)
ebfd146a
IR
2650 return false;
2651
538dd0b7
DM
2652 /* Is GS a vectorizable call? */
2653 stmt = dyn_cast <gcall *> (gs);
2654 if (!stmt)
ebfd146a
IR
2655 return false;
2656
5ce9450f
JJ
2657 if (gimple_call_internal_p (stmt)
2658 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2660 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2661 slp_node);
2662
0136f8f0
AH
2663 if (gimple_call_lhs (stmt) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2665 return false;
2666
0136f8f0 2667 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2668
b690cc0f
RG
2669 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2670
ebfd146a
IR
2671 /* Process function arguments. */
2672 rhs_type = NULL_TREE;
b690cc0f 2673 vectype_in = NULL_TREE;
ebfd146a
IR
2674 nargs = gimple_call_num_args (stmt);
2675
1b1562a5
MM
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2680 return false;
2681
74bf76ed
JJ
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt)
2684 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2685 {
2686 nargs = 0;
2687 rhs_type = unsigned_type_node;
2688 }
2689
ebfd146a
IR
2690 for (i = 0; i < nargs; i++)
2691 {
b690cc0f
RG
2692 tree opvectype;
2693
ebfd146a
IR
2694 op = gimple_call_arg (stmt, i);
2695
2696 /* We can only handle calls with arguments of the same type. */
2697 if (rhs_type
8533c9d8 2698 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2699 {
73fbfcad 2700 if (dump_enabled_p ())
78c60e3d 2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2702 "argument types differ.\n");
ebfd146a
IR
2703 return false;
2704 }
b690cc0f
RG
2705 if (!rhs_type)
2706 rhs_type = TREE_TYPE (op);
ebfd146a 2707
81c40241 2708 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2709 {
73fbfcad 2710 if (dump_enabled_p ())
78c60e3d 2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2712 "use not simple.\n");
ebfd146a
IR
2713 return false;
2714 }
ebfd146a 2715
b690cc0f
RG
2716 if (!vectype_in)
2717 vectype_in = opvectype;
2718 else if (opvectype
2719 && opvectype != vectype_in)
2720 {
73fbfcad 2721 if (dump_enabled_p ())
78c60e3d 2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2723 "argument vector types differ.\n");
b690cc0f
RG
2724 return false;
2725 }
2726 }
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
ebfd146a 2729 if (!vectype_in)
b690cc0f 2730 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2731 if (vec_stmt)
2732 gcc_assert (vectype_in);
2733 if (!vectype_in)
2734 {
73fbfcad 2735 if (dump_enabled_p ())
7d8930a0 2736 {
78c60e3d
SS
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2740 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2741 }
2742
2743 return false;
2744 }
ebfd146a
IR
2745
2746 /* FORNOW */
b690cc0f
RG
2747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2749 if (nunits_in == nunits_out / 2)
2750 modifier = NARROW;
2751 else if (nunits_out == nunits_in)
2752 modifier = NONE;
2753 else if (nunits_out == nunits_in / 2)
2754 modifier = WIDEN;
2755 else
2756 return false;
2757
70439f0d
RS
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt))
2760 {
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "function reads from or writes to memory.\n");
2764 return false;
2765 }
2766
ebfd146a
IR
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
70439f0d
RS
2771 fndecl = NULL_TREE;
2772 internal_fn ifn = IFN_LAST;
2773 combined_fn cfn = gimple_call_combined_fn (stmt);
2774 tree callee = gimple_call_fndecl (stmt);
2775
2776 /* First try using an internal function. */
b1b6836e
RS
2777 tree_code convert_code = ERROR_MARK;
2778 if (cfn != CFN_LAST
2779 && (modifier == NONE
2780 || (modifier == NARROW
2781 && simple_integer_narrowing (vectype_out, vectype_in,
2782 &convert_code))))
70439f0d
RS
2783 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2784 vectype_in);
2785
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn == IFN_LAST)
2788 {
2789 if (cfn != CFN_LAST)
2790 fndecl = targetm.vectorize.builtin_vectorized_function
2791 (cfn, vectype_out, vectype_in);
2792 else
2793 fndecl = targetm.vectorize.builtin_md_vectorized_function
2794 (callee, vectype_out, vectype_in);
2795 }
2796
2797 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2798 {
70439f0d 2799 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2800 && !slp_node
2801 && loop_vinfo
2802 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2806 {
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs == 0);
2810 }
37b14185
RB
2811 else if (modifier == NONE
2812 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2813 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2814 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2815 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2816 vectype_in, dt);
74bf76ed
JJ
2817 else
2818 {
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2821 "function is not vectorizable.\n");
74bf76ed
JJ
2822 return false;
2823 }
ebfd146a
IR
2824 }
2825
fce57248 2826 if (slp_node)
190c2236 2827 ncopies = 1;
b1b6836e 2828 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2829 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2830 else
e8f142e2 2831 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2832
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies >= 1);
2836
2837 if (!vec_stmt) /* transformation not required. */
2838 {
2839 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2840 if (dump_enabled_p ())
e645e942
TJ
2841 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2842 "\n");
4fc5ebf1 2843 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2844 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2845 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2846 vec_promote_demote, stmt_info, 0, vect_body);
2847
ebfd146a
IR
2848 return true;
2849 }
2850
67b8dbac 2851 /* Transform. */
ebfd146a 2852
73fbfcad 2853 if (dump_enabled_p ())
e645e942 2854 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2855
2856 /* Handle def. */
2857 scalar_dest = gimple_call_lhs (stmt);
2858 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2859
2860 prev_stmt_info = NULL;
b1b6836e 2861 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2862 {
b1b6836e 2863 tree prev_res = NULL_TREE;
ebfd146a
IR
2864 for (j = 0; j < ncopies; ++j)
2865 {
2866 /* Build argument list for the vectorized call. */
2867 if (j == 0)
9771b263 2868 vargs.create (nargs);
ebfd146a 2869 else
9771b263 2870 vargs.truncate (0);
ebfd146a 2871
190c2236
JJ
2872 if (slp_node)
2873 {
ef062b13 2874 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2875 vec<tree> vec_oprnds0;
190c2236
JJ
2876
2877 for (i = 0; i < nargs; i++)
9771b263 2878 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2879 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2880 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2881
2882 /* Arguments are ready. Create the new vector stmt. */
9771b263 2883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2884 {
2885 size_t k;
2886 for (k = 0; k < nargs; k++)
2887 {
37b5ec8f 2888 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2889 vargs[k] = vec_oprndsk[i];
190c2236 2890 }
b1b6836e
RS
2891 if (modifier == NARROW)
2892 {
2893 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2894 gcall *call
2895 = gimple_build_call_internal_vec (ifn, vargs);
2896 gimple_call_set_lhs (call, half_res);
2897 gimple_call_set_nothrow (call, true);
2898 new_stmt = call;
b1b6836e
RS
2899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2900 if ((i & 1) == 0)
2901 {
2902 prev_res = half_res;
2903 continue;
2904 }
2905 new_temp = make_ssa_name (vec_dest);
2906 new_stmt = gimple_build_assign (new_temp, convert_code,
2907 prev_res, half_res);
2908 }
70439f0d 2909 else
b1b6836e 2910 {
a844293d 2911 gcall *call;
b1b6836e 2912 if (ifn != IFN_LAST)
a844293d 2913 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2914 else
a844293d
RS
2915 call = gimple_build_call_vec (fndecl, vargs);
2916 new_temp = make_ssa_name (vec_dest, call);
2917 gimple_call_set_lhs (call, new_temp);
2918 gimple_call_set_nothrow (call, true);
2919 new_stmt = call;
b1b6836e 2920 }
190c2236 2921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2923 }
2924
2925 for (i = 0; i < nargs; i++)
2926 {
37b5ec8f 2927 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2928 vec_oprndsi.release ();
190c2236 2929 }
190c2236
JJ
2930 continue;
2931 }
2932
ebfd146a
IR
2933 for (i = 0; i < nargs; i++)
2934 {
2935 op = gimple_call_arg (stmt, i);
2936 if (j == 0)
2937 vec_oprnd0
81c40241 2938 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2939 else
63827fb8
IR
2940 {
2941 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2942 vec_oprnd0
2943 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2944 }
ebfd146a 2945
9771b263 2946 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2947 }
2948
74bf76ed
JJ
2949 if (gimple_call_internal_p (stmt)
2950 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2951 {
5ebaa477
RS
2952 tree_vector_builder v (vectype_out, 1, 3);
2953 for (int k = 0; k < 3; ++k)
794e3180
RS
2954 v.quick_push (build_int_cst (unsigned_type_node,
2955 j * nunits_out + k));
5ebaa477 2956 tree cst = v.build ();
74bf76ed 2957 tree new_var
0e22bb5a 2958 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2959 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2960 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2961 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2962 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2963 }
b1b6836e
RS
2964 else if (modifier == NARROW)
2965 {
2966 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2967 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2968 gimple_call_set_lhs (call, half_res);
2969 gimple_call_set_nothrow (call, true);
2970 new_stmt = call;
b1b6836e
RS
2971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2972 if ((j & 1) == 0)
2973 {
2974 prev_res = half_res;
2975 continue;
2976 }
2977 new_temp = make_ssa_name (vec_dest);
2978 new_stmt = gimple_build_assign (new_temp, convert_code,
2979 prev_res, half_res);
2980 }
74bf76ed
JJ
2981 else
2982 {
a844293d 2983 gcall *call;
70439f0d 2984 if (ifn != IFN_LAST)
a844293d 2985 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 2986 else
a844293d 2987 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 2988 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
2989 gimple_call_set_lhs (call, new_temp);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
74bf76ed 2992 }
ebfd146a
IR
2993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2994
b1b6836e 2995 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2997 else
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2999
3000 prev_stmt_info = vinfo_for_stmt (new_stmt);
3001 }
b1b6836e
RS
3002 }
3003 else if (modifier == NARROW)
3004 {
ebfd146a
IR
3005 for (j = 0; j < ncopies; ++j)
3006 {
3007 /* Build argument list for the vectorized call. */
3008 if (j == 0)
9771b263 3009 vargs.create (nargs * 2);
ebfd146a 3010 else
9771b263 3011 vargs.truncate (0);
ebfd146a 3012
190c2236
JJ
3013 if (slp_node)
3014 {
ef062b13 3015 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3016 vec<tree> vec_oprnds0;
190c2236
JJ
3017
3018 for (i = 0; i < nargs; i++)
9771b263 3019 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3020 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3021 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3022
3023 /* Arguments are ready. Create the new vector stmt. */
9771b263 3024 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3025 {
3026 size_t k;
9771b263 3027 vargs.truncate (0);
190c2236
JJ
3028 for (k = 0; k < nargs; k++)
3029 {
37b5ec8f 3030 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3031 vargs.quick_push (vec_oprndsk[i]);
3032 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3033 }
a844293d 3034 gcall *call;
70439f0d 3035 if (ifn != IFN_LAST)
a844293d 3036 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3037 else
a844293d
RS
3038 call = gimple_build_call_vec (fndecl, vargs);
3039 new_temp = make_ssa_name (vec_dest, call);
3040 gimple_call_set_lhs (call, new_temp);
3041 gimple_call_set_nothrow (call, true);
3042 new_stmt = call;
190c2236 3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3044 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3045 }
3046
3047 for (i = 0; i < nargs; i++)
3048 {
37b5ec8f 3049 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3050 vec_oprndsi.release ();
190c2236 3051 }
190c2236
JJ
3052 continue;
3053 }
3054
ebfd146a
IR
3055 for (i = 0; i < nargs; i++)
3056 {
3057 op = gimple_call_arg (stmt, i);
3058 if (j == 0)
3059 {
3060 vec_oprnd0
81c40241 3061 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3062 vec_oprnd1
63827fb8 3063 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3064 }
3065 else
3066 {
336ecb65 3067 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3068 vec_oprnd0
63827fb8 3069 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3070 vec_oprnd1
63827fb8 3071 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3072 }
3073
9771b263
DN
3074 vargs.quick_push (vec_oprnd0);
3075 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3076 }
3077
b1b6836e 3078 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3079 new_temp = make_ssa_name (vec_dest, new_stmt);
3080 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3082
3083 if (j == 0)
3084 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3085 else
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3087
3088 prev_stmt_info = vinfo_for_stmt (new_stmt);
3089 }
3090
3091 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3092 }
b1b6836e
RS
3093 else
3094 /* No current target implements this case. */
3095 return false;
ebfd146a 3096
9771b263 3097 vargs.release ();
ebfd146a 3098
ebfd146a
IR
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3103
dd34c087
JJ
3104 if (slp_node)
3105 return true;
3106
ebfd146a 3107 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3108 if (is_pattern_stmt_p (stmt_info))
3109 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3110 else
3111 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3112
9d5e7640 3113 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3114 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3115 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3116 STMT_VINFO_STMT (stmt_info) = new_stmt;
3117 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3118
3119 return true;
3120}
3121
3122
0136f8f0
AH
3123struct simd_call_arg_info
3124{
3125 tree vectype;
3126 tree op;
0136f8f0 3127 HOST_WIDE_INT linear_step;
34e82342 3128 enum vect_def_type dt;
0136f8f0 3129 unsigned int align;
17b658af 3130 bool simd_lane_linear;
0136f8f0
AH
3131};
3132
17b658af
JJ
3133/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3135 *ARGINFO. */
3136
3137static void
3138vect_simd_lane_linear (tree op, struct loop *loop,
3139 struct simd_call_arg_info *arginfo)
3140{
355fe088 3141 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3142
3143 if (!is_gimple_assign (def_stmt)
3144 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3146 return;
3147
3148 tree base = gimple_assign_rhs1 (def_stmt);
3149 HOST_WIDE_INT linear_step = 0;
3150 tree v = gimple_assign_rhs2 (def_stmt);
3151 while (TREE_CODE (v) == SSA_NAME)
3152 {
3153 tree t;
3154 def_stmt = SSA_NAME_DEF_STMT (v);
3155 if (is_gimple_assign (def_stmt))
3156 switch (gimple_assign_rhs_code (def_stmt))
3157 {
3158 case PLUS_EXPR:
3159 t = gimple_assign_rhs2 (def_stmt);
3160 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3161 return;
3162 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3163 v = gimple_assign_rhs1 (def_stmt);
3164 continue;
3165 case MULT_EXPR:
3166 t = gimple_assign_rhs2 (def_stmt);
3167 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3168 return;
3169 linear_step = tree_to_shwi (t);
3170 v = gimple_assign_rhs1 (def_stmt);
3171 continue;
3172 CASE_CONVERT:
3173 t = gimple_assign_rhs1 (def_stmt);
3174 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v))
3176 < TYPE_PRECISION (TREE_TYPE (t))))
3177 return;
3178 if (!linear_step)
3179 linear_step = 1;
3180 v = t;
3181 continue;
3182 default:
3183 return;
3184 }
8e4284d0 3185 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3186 && loop->simduid
3187 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3189 == loop->simduid))
3190 {
3191 if (!linear_step)
3192 linear_step = 1;
3193 arginfo->linear_step = linear_step;
3194 arginfo->op = base;
3195 arginfo->simd_lane_linear = true;
3196 return;
3197 }
3198 }
3199}
3200
0136f8f0
AH
3201/* Function vectorizable_simd_clone_call.
3202
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3208
3209static bool
355fe088
TS
3210vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3211 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3212{
3213 tree vec_dest;
3214 tree scalar_dest;
3215 tree op, type;
3216 tree vec_oprnd0 = NULL_TREE;
3217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3218 tree vectype;
3219 unsigned int nunits;
3220 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3222 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3223 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3224 tree fndecl, new_temp;
355fe088
TS
3225 gimple *def_stmt;
3226 gimple *new_stmt = NULL;
0136f8f0 3227 int ncopies, j;
00426f9a 3228 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3229 vec<tree> vargs = vNULL;
3230 size_t i, nargs;
3231 tree lhs, rtype, ratype;
e7a74006 3232 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3233
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt))
3236 return false;
3237
3238 fndecl = gimple_call_fndecl (stmt);
3239 if (fndecl == NULL_TREE)
3240 return false;
3241
d52f5295 3242 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3243 if (node == NULL || node->simd_clones == NULL)
3244 return false;
3245
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3247 return false;
3248
66c16fd9
RB
3249 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3250 && ! vec_stmt)
0136f8f0
AH
3251 return false;
3252
3253 if (gimple_call_lhs (stmt)
3254 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3255 return false;
3256
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3258
3259 vectype = STMT_VINFO_VECTYPE (stmt_info);
3260
3261 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3262 return false;
3263
3264 /* FORNOW */
fce57248 3265 if (slp_node)
0136f8f0
AH
3266 return false;
3267
3268 /* Process function arguments. */
3269 nargs = gimple_call_num_args (stmt);
3270
3271 /* Bail out if the function has zero arguments. */
3272 if (nargs == 0)
3273 return false;
3274
00426f9a 3275 arginfo.reserve (nargs, true);
0136f8f0
AH
3276
3277 for (i = 0; i < nargs; i++)
3278 {
3279 simd_call_arg_info thisarginfo;
3280 affine_iv iv;
3281
3282 thisarginfo.linear_step = 0;
3283 thisarginfo.align = 0;
3284 thisarginfo.op = NULL_TREE;
17b658af 3285 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3286
3287 op = gimple_call_arg (stmt, i);
81c40241
RB
3288 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3289 &thisarginfo.vectype)
0136f8f0
AH
3290 || thisarginfo.dt == vect_uninitialized_def)
3291 {
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294 "use not simple.\n");
0136f8f0
AH
3295 return false;
3296 }
3297
3298 if (thisarginfo.dt == vect_constant_def
3299 || thisarginfo.dt == vect_external_def)
3300 gcc_assert (thisarginfo.vectype == NULL_TREE);
3301 else
3302 gcc_assert (thisarginfo.vectype != NULL_TREE);
3303
6c9e85fb
JJ
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3306 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3308 {
3309 gcc_assert (vec_stmt);
3310 thisarginfo.linear_step
17b658af 3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3312 thisarginfo.op
17b658af
JJ
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3314 thisarginfo.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3316 == boolean_true_node);
6c9e85fb
JJ
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3319 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3320 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3321 {
3322 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3323 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3324 tree opt = TREE_TYPE (thisarginfo.op);
3325 bias = fold_convert (TREE_TYPE (step), bias);
3326 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3327 thisarginfo.op
3328 = fold_build2 (POINTER_TYPE_P (opt)
3329 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3330 thisarginfo.op, bias);
3331 }
3332 }
3333 else if (!vec_stmt
3334 && thisarginfo.dt != vect_constant_def
3335 && thisarginfo.dt != vect_external_def
3336 && loop_vinfo
3337 && TREE_CODE (op) == SSA_NAME
3338 && simple_iv (loop, loop_containing_stmt (stmt), op,
3339 &iv, false)
3340 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3341 {
3342 thisarginfo.linear_step = tree_to_shwi (iv.step);
3343 thisarginfo.op = iv.base;
3344 }
3345 else if ((thisarginfo.dt == vect_constant_def
3346 || thisarginfo.dt == vect_external_def)
3347 && POINTER_TYPE_P (TREE_TYPE (op)))
3348 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3350 linear too. */
3351 if (POINTER_TYPE_P (TREE_TYPE (op))
3352 && !thisarginfo.linear_step
3353 && !vec_stmt
3354 && thisarginfo.dt != vect_constant_def
3355 && thisarginfo.dt != vect_external_def
3356 && loop_vinfo
3357 && !slp_node
3358 && TREE_CODE (op) == SSA_NAME)
3359 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3360
3361 arginfo.quick_push (thisarginfo);
3362 }
3363
d9f21f6a
RS
3364 unsigned HOST_WIDE_INT vf;
3365 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3366 {
3367 if (dump_enabled_p ())
3368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3369 "not considering SIMD clones; not yet supported"
3370 " for variable-width vectors.\n");
3371 return NULL;
3372 }
3373
0136f8f0
AH
3374 unsigned int badness = 0;
3375 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3376 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3377 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3378 else
3379 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3380 n = n->simdclone->next_clone)
3381 {
3382 unsigned int this_badness = 0;
d9f21f6a 3383 if (n->simdclone->simdlen > vf
0136f8f0
AH
3384 || n->simdclone->nargs != nargs)
3385 continue;
d9f21f6a
RS
3386 if (n->simdclone->simdlen < vf)
3387 this_badness += (exact_log2 (vf)
0136f8f0
AH
3388 - exact_log2 (n->simdclone->simdlen)) * 1024;
3389 if (n->simdclone->inbranch)
3390 this_badness += 2048;
3391 int target_badness = targetm.simd_clone.usable (n);
3392 if (target_badness < 0)
3393 continue;
3394 this_badness += target_badness * 512;
3395 /* FORNOW: Have to add code to add the mask argument. */
3396 if (n->simdclone->inbranch)
3397 continue;
3398 for (i = 0; i < nargs; i++)
3399 {
3400 switch (n->simdclone->args[i].arg_type)
3401 {
3402 case SIMD_CLONE_ARG_TYPE_VECTOR:
3403 if (!useless_type_conversion_p
3404 (n->simdclone->args[i].orig_type,
3405 TREE_TYPE (gimple_call_arg (stmt, i))))
3406 i = -1;
3407 else if (arginfo[i].dt == vect_constant_def
3408 || arginfo[i].dt == vect_external_def
3409 || arginfo[i].linear_step)
3410 this_badness += 64;
3411 break;
3412 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3413 if (arginfo[i].dt != vect_constant_def
3414 && arginfo[i].dt != vect_external_def)
3415 i = -1;
3416 break;
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3418 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3419 if (arginfo[i].dt == vect_constant_def
3420 || arginfo[i].dt == vect_external_def
3421 || (arginfo[i].linear_step
3422 != n->simdclone->args[i].linear_step))
3423 i = -1;
3424 break;
3425 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3426 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3427 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3428 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3429 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3430 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3431 /* FORNOW */
3432 i = -1;
3433 break;
3434 case SIMD_CLONE_ARG_TYPE_MASK:
3435 gcc_unreachable ();
3436 }
3437 if (i == (size_t) -1)
3438 break;
3439 if (n->simdclone->args[i].alignment > arginfo[i].align)
3440 {
3441 i = -1;
3442 break;
3443 }
3444 if (arginfo[i].align)
3445 this_badness += (exact_log2 (arginfo[i].align)
3446 - exact_log2 (n->simdclone->args[i].alignment));
3447 }
3448 if (i == (size_t) -1)
3449 continue;
3450 if (bestn == NULL || this_badness < badness)
3451 {
3452 bestn = n;
3453 badness = this_badness;
3454 }
3455 }
3456
3457 if (bestn == NULL)
00426f9a 3458 return false;
0136f8f0
AH
3459
3460 for (i = 0; i < nargs; i++)
3461 if ((arginfo[i].dt == vect_constant_def
3462 || arginfo[i].dt == vect_external_def)
3463 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3464 {
3465 arginfo[i].vectype
3466 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3467 i)));
3468 if (arginfo[i].vectype == NULL
3469 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3470 > bestn->simdclone->simdlen))
00426f9a 3471 return false;
0136f8f0
AH
3472 }
3473
3474 fndecl = bestn->decl;
3475 nunits = bestn->simdclone->simdlen;
d9f21f6a 3476 ncopies = vf / nunits;
0136f8f0
AH
3477
3478 /* If the function isn't const, only allow it in simd loops where user
3479 has asserted that at least nunits consecutive iterations can be
3480 performed using SIMD instructions. */
3481 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3482 && gimple_vuse (stmt))
00426f9a 3483 return false;
0136f8f0
AH
3484
3485 /* Sanity check: make sure that at least one copy of the vectorized stmt
3486 needs to be generated. */
3487 gcc_assert (ncopies >= 1);
3488
3489 if (!vec_stmt) /* transformation not required. */
3490 {
6c9e85fb
JJ
3491 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3492 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3493 if ((bestn->simdclone->args[i].arg_type
3494 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3495 || (bestn->simdclone->args[i].arg_type
3496 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3497 {
17b658af 3498 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3499 + 1);
3500 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3501 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3502 ? size_type_node : TREE_TYPE (arginfo[i].op);
3503 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3504 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3505 tree sll = arginfo[i].simd_lane_linear
3506 ? boolean_true_node : boolean_false_node;
3507 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3508 }
0136f8f0
AH
3509 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3510 if (dump_enabled_p ())
3511 dump_printf_loc (MSG_NOTE, vect_location,
3512 "=== vectorizable_simd_clone_call ===\n");
3513/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3514 return true;
3515 }
3516
67b8dbac 3517 /* Transform. */
0136f8f0
AH
3518
3519 if (dump_enabled_p ())
3520 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3521
3522 /* Handle def. */
3523 scalar_dest = gimple_call_lhs (stmt);
3524 vec_dest = NULL_TREE;
3525 rtype = NULL_TREE;
3526 ratype = NULL_TREE;
3527 if (scalar_dest)
3528 {
3529 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3530 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3531 if (TREE_CODE (rtype) == ARRAY_TYPE)
3532 {
3533 ratype = rtype;
3534 rtype = TREE_TYPE (ratype);
3535 }
3536 }
3537
3538 prev_stmt_info = NULL;
3539 for (j = 0; j < ncopies; ++j)
3540 {
3541 /* Build argument list for the vectorized call. */
3542 if (j == 0)
3543 vargs.create (nargs);
3544 else
3545 vargs.truncate (0);
3546
3547 for (i = 0; i < nargs; i++)
3548 {
3549 unsigned int k, l, m, o;
3550 tree atype;
3551 op = gimple_call_arg (stmt, i);
3552 switch (bestn->simdclone->args[i].arg_type)
3553 {
3554 case SIMD_CLONE_ARG_TYPE_VECTOR:
3555 atype = bestn->simdclone->args[i].vector_type;
3556 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3557 for (m = j * o; m < (j + 1) * o; m++)
3558 {
3559 if (TYPE_VECTOR_SUBPARTS (atype)
3560 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3561 {
3562 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3563 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3564 / TYPE_VECTOR_SUBPARTS (atype));
3565 gcc_assert ((k & (k - 1)) == 0);
3566 if (m == 0)
3567 vec_oprnd0
81c40241 3568 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3569 else
3570 {
3571 vec_oprnd0 = arginfo[i].op;
3572 if ((m & (k - 1)) == 0)
3573 vec_oprnd0
3574 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3575 vec_oprnd0);
3576 }
3577 arginfo[i].op = vec_oprnd0;
3578 vec_oprnd0
3579 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3580 bitsize_int (prec),
0136f8f0
AH
3581 bitsize_int ((m & (k - 1)) * prec));
3582 new_stmt
b731b390 3583 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3584 vec_oprnd0);
3585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3586 vargs.safe_push (gimple_assign_lhs (new_stmt));
3587 }
3588 else
3589 {
3590 k = (TYPE_VECTOR_SUBPARTS (atype)
3591 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3592 gcc_assert ((k & (k - 1)) == 0);
3593 vec<constructor_elt, va_gc> *ctor_elts;
3594 if (k != 1)
3595 vec_alloc (ctor_elts, k);
3596 else
3597 ctor_elts = NULL;
3598 for (l = 0; l < k; l++)
3599 {
3600 if (m == 0 && l == 0)
3601 vec_oprnd0
81c40241 3602 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3603 else
3604 vec_oprnd0
3605 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3606 arginfo[i].op);
3607 arginfo[i].op = vec_oprnd0;
3608 if (k == 1)
3609 break;
3610 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3611 vec_oprnd0);
3612 }
3613 if (k == 1)
3614 vargs.safe_push (vec_oprnd0);
3615 else
3616 {
3617 vec_oprnd0 = build_constructor (atype, ctor_elts);
3618 new_stmt
b731b390 3619 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3620 vec_oprnd0);
3621 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3622 vargs.safe_push (gimple_assign_lhs (new_stmt));
3623 }
3624 }
3625 }
3626 break;
3627 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3628 vargs.safe_push (op);
3629 break;
3630 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3631 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3632 if (j == 0)
3633 {
3634 gimple_seq stmts;
3635 arginfo[i].op
3636 = force_gimple_operand (arginfo[i].op, &stmts, true,
3637 NULL_TREE);
3638 if (stmts != NULL)
3639 {
3640 basic_block new_bb;
3641 edge pe = loop_preheader_edge (loop);
3642 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3643 gcc_assert (!new_bb);
3644 }
17b658af
JJ
3645 if (arginfo[i].simd_lane_linear)
3646 {
3647 vargs.safe_push (arginfo[i].op);
3648 break;
3649 }
b731b390 3650 tree phi_res = copy_ssa_name (op);
538dd0b7 3651 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3652 set_vinfo_for_stmt (new_phi,
310213d4 3653 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3654 add_phi_arg (new_phi, arginfo[i].op,
3655 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3656 enum tree_code code
3657 = POINTER_TYPE_P (TREE_TYPE (op))
3658 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3659 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3660 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3661 widest_int cst
3662 = wi::mul (bestn->simdclone->args[i].linear_step,
3663 ncopies * nunits);
3664 tree tcst = wide_int_to_tree (type, cst);
b731b390 3665 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3666 new_stmt
3667 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3668 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3669 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3670 set_vinfo_for_stmt (new_stmt,
310213d4 3671 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3672 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3673 UNKNOWN_LOCATION);
3674 arginfo[i].op = phi_res;
3675 vargs.safe_push (phi_res);
3676 }
3677 else
3678 {
3679 enum tree_code code
3680 = POINTER_TYPE_P (TREE_TYPE (op))
3681 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3682 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3683 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3684 widest_int cst
3685 = wi::mul (bestn->simdclone->args[i].linear_step,
3686 j * nunits);
3687 tree tcst = wide_int_to_tree (type, cst);
b731b390 3688 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3689 new_stmt = gimple_build_assign (new_temp, code,
3690 arginfo[i].op, tcst);
0136f8f0
AH
3691 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3692 vargs.safe_push (new_temp);
3693 }
3694 break;
7adb26f2
JJ
3695 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3696 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3697 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3698 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3699 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3700 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3701 default:
3702 gcc_unreachable ();
3703 }
3704 }
3705
3706 new_stmt = gimple_build_call_vec (fndecl, vargs);
3707 if (vec_dest)
3708 {
3709 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3710 if (ratype)
b731b390 3711 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3712 else if (TYPE_VECTOR_SUBPARTS (vectype)
3713 == TYPE_VECTOR_SUBPARTS (rtype))
3714 new_temp = make_ssa_name (vec_dest, new_stmt);
3715 else
3716 new_temp = make_ssa_name (rtype, new_stmt);
3717 gimple_call_set_lhs (new_stmt, new_temp);
3718 }
3719 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3720
3721 if (vec_dest)
3722 {
3723 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3724 {
3725 unsigned int k, l;
3726 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3727 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3728 gcc_assert ((k & (k - 1)) == 0);
3729 for (l = 0; l < k; l++)
3730 {
3731 tree t;
3732 if (ratype)
3733 {
3734 t = build_fold_addr_expr (new_temp);
3735 t = build2 (MEM_REF, vectype, t,
3736 build_int_cst (TREE_TYPE (t),
3737 l * prec / BITS_PER_UNIT));
3738 }
3739 else
3740 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3741 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3742 new_stmt
b731b390 3743 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3744 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3745 if (j == 0 && l == 0)
3746 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3747 else
3748 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3749
3750 prev_stmt_info = vinfo_for_stmt (new_stmt);
3751 }
3752
3753 if (ratype)
3754 {
3755 tree clobber = build_constructor (ratype, NULL);
3756 TREE_THIS_VOLATILE (clobber) = 1;
3757 new_stmt = gimple_build_assign (new_temp, clobber);
3758 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3759 }
3760 continue;
3761 }
3762 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3763 {
3764 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3765 / TYPE_VECTOR_SUBPARTS (rtype));
3766 gcc_assert ((k & (k - 1)) == 0);
3767 if ((j & (k - 1)) == 0)
3768 vec_alloc (ret_ctor_elts, k);
3769 if (ratype)
3770 {
3771 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3772 for (m = 0; m < o; m++)
3773 {
3774 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3775 size_int (m), NULL_TREE, NULL_TREE);
3776 new_stmt
b731b390 3777 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3778 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3779 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3780 gimple_assign_lhs (new_stmt));
3781 }
3782 tree clobber = build_constructor (ratype, NULL);
3783 TREE_THIS_VOLATILE (clobber) = 1;
3784 new_stmt = gimple_build_assign (new_temp, clobber);
3785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3786 }
3787 else
3788 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3789 if ((j & (k - 1)) != k - 1)
3790 continue;
3791 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3792 new_stmt
b731b390 3793 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3795
3796 if ((unsigned) j == k - 1)
3797 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3798 else
3799 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3800
3801 prev_stmt_info = vinfo_for_stmt (new_stmt);
3802 continue;
3803 }
3804 else if (ratype)
3805 {
3806 tree t = build_fold_addr_expr (new_temp);
3807 t = build2 (MEM_REF, vectype, t,
3808 build_int_cst (TREE_TYPE (t), 0));
3809 new_stmt
b731b390 3810 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3811 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3812 tree clobber = build_constructor (ratype, NULL);
3813 TREE_THIS_VOLATILE (clobber) = 1;
3814 vect_finish_stmt_generation (stmt,
3815 gimple_build_assign (new_temp,
3816 clobber), gsi);
3817 }
3818 }
3819
3820 if (j == 0)
3821 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3822 else
3823 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3824
3825 prev_stmt_info = vinfo_for_stmt (new_stmt);
3826 }
3827
3828 vargs.release ();
3829
3830 /* The call in STMT might prevent it from being removed in dce.
3831 We however cannot remove it here, due to the way the ssa name
3832 it defines is mapped to the new definition. So just replace
3833 rhs of the statement with something harmless. */
3834
3835 if (slp_node)
3836 return true;
3837
3838 if (scalar_dest)
3839 {
3840 type = TREE_TYPE (scalar_dest);
3841 if (is_pattern_stmt_p (stmt_info))
3842 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3843 else
3844 lhs = gimple_call_lhs (stmt);
3845 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3846 }
3847 else
3848 new_stmt = gimple_build_nop ();
3849 set_vinfo_for_stmt (new_stmt, stmt_info);
3850 set_vinfo_for_stmt (stmt, NULL);
3851 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3852 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3853 unlink_stmt_vdef (stmt);
3854
3855 return true;
3856}
3857
3858
ebfd146a
IR
3859/* Function vect_gen_widened_results_half
3860
3861 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3862 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3863 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3864 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3865 needs to be created (DECL is a function-decl of a target-builtin).
3866 STMT is the original scalar stmt that we are vectorizing. */
3867
355fe088 3868static gimple *
ebfd146a
IR
3869vect_gen_widened_results_half (enum tree_code code,
3870 tree decl,
3871 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3872 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3873 gimple *stmt)
b8698a0f 3874{
355fe088 3875 gimple *new_stmt;
b8698a0f
L
3876 tree new_temp;
3877
3878 /* Generate half of the widened result: */
3879 if (code == CALL_EXPR)
3880 {
3881 /* Target specific support */
ebfd146a
IR
3882 if (op_type == binary_op)
3883 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3884 else
3885 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3886 new_temp = make_ssa_name (vec_dest, new_stmt);
3887 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3888 }
3889 else
ebfd146a 3890 {
b8698a0f
L
3891 /* Generic support */
3892 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3893 if (op_type != binary_op)
3894 vec_oprnd1 = NULL;
0d0e4a03 3895 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3896 new_temp = make_ssa_name (vec_dest, new_stmt);
3897 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3898 }
ebfd146a
IR
3899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3900
ebfd146a
IR
3901 return new_stmt;
3902}
3903
4a00c761
JJ
3904
3905/* Get vectorized definitions for loop-based vectorization. For the first
3906 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3907 scalar operand), and for the rest we get a copy with
3908 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3909 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3910 The vectors are collected into VEC_OPRNDS. */
3911
3912static void
355fe088 3913vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3914 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3915{
3916 tree vec_oprnd;
3917
3918 /* Get first vector operand. */
3919 /* All the vector operands except the very first one (that is scalar oprnd)
3920 are stmt copies. */
3921 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3922 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3923 else
3924 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3925
9771b263 3926 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3927
3928 /* Get second vector operand. */
3929 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3930 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3931
3932 *oprnd = vec_oprnd;
3933
3934 /* For conversion in multiple steps, continue to get operands
3935 recursively. */
3936 if (multi_step_cvt)
3937 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3938}
3939
3940
3941/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3942 For multi-step conversions store the resulting vectors and call the function
3943 recursively. */
3944
3945static void
9771b263 3946vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3947 int multi_step_cvt, gimple *stmt,
9771b263 3948 vec<tree> vec_dsts,
4a00c761
JJ
3949 gimple_stmt_iterator *gsi,
3950 slp_tree slp_node, enum tree_code code,
3951 stmt_vec_info *prev_stmt_info)
3952{
3953 unsigned int i;
3954 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3955 gimple *new_stmt;
4a00c761
JJ
3956 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3957
9771b263 3958 vec_dest = vec_dsts.pop ();
4a00c761 3959
9771b263 3960 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3961 {
3962 /* Create demotion operation. */
9771b263
DN
3963 vop0 = (*vec_oprnds)[i];
3964 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3965 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3966 new_tmp = make_ssa_name (vec_dest, new_stmt);
3967 gimple_assign_set_lhs (new_stmt, new_tmp);
3968 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3969
3970 if (multi_step_cvt)
3971 /* Store the resulting vector for next recursive call. */
9771b263 3972 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3973 else
3974 {
3975 /* This is the last step of the conversion sequence. Store the
3976 vectors in SLP_NODE or in vector info of the scalar statement
3977 (or in STMT_VINFO_RELATED_STMT chain). */
3978 if (slp_node)
9771b263 3979 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3980 else
c689ce1e
RB
3981 {
3982 if (!*prev_stmt_info)
3983 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3984 else
3985 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3986
c689ce1e
RB
3987 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3988 }
4a00c761
JJ
3989 }
3990 }
3991
3992 /* For multi-step demotion operations we first generate demotion operations
3993 from the source type to the intermediate types, and then combine the
3994 results (stored in VEC_OPRNDS) in demotion operation to the destination
3995 type. */
3996 if (multi_step_cvt)
3997 {
3998 /* At each level of recursion we have half of the operands we had at the
3999 previous level. */
9771b263 4000 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4001 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4002 stmt, vec_dsts, gsi, slp_node,
4003 VEC_PACK_TRUNC_EXPR,
4004 prev_stmt_info);
4005 }
4006
9771b263 4007 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4008}
4009
4010
4011/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4012 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4013 the resulting vectors and call the function recursively. */
4014
4015static void
9771b263
DN
4016vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4017 vec<tree> *vec_oprnds1,
355fe088 4018 gimple *stmt, tree vec_dest,
4a00c761
JJ
4019 gimple_stmt_iterator *gsi,
4020 enum tree_code code1,
4021 enum tree_code code2, tree decl1,
4022 tree decl2, int op_type)
4023{
4024 int i;
4025 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4026 gimple *new_stmt1, *new_stmt2;
6e1aa848 4027 vec<tree> vec_tmp = vNULL;
4a00c761 4028
9771b263
DN
4029 vec_tmp.create (vec_oprnds0->length () * 2);
4030 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4031 {
4032 if (op_type == binary_op)
9771b263 4033 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4034 else
4035 vop1 = NULL_TREE;
4036
4037 /* Generate the two halves of promotion operation. */
4038 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4039 op_type, vec_dest, gsi, stmt);
4040 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4041 op_type, vec_dest, gsi, stmt);
4042 if (is_gimple_call (new_stmt1))
4043 {
4044 new_tmp1 = gimple_call_lhs (new_stmt1);
4045 new_tmp2 = gimple_call_lhs (new_stmt2);
4046 }
4047 else
4048 {
4049 new_tmp1 = gimple_assign_lhs (new_stmt1);
4050 new_tmp2 = gimple_assign_lhs (new_stmt2);
4051 }
4052
4053 /* Store the results for the next step. */
9771b263
DN
4054 vec_tmp.quick_push (new_tmp1);
4055 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4056 }
4057
689eaba3 4058 vec_oprnds0->release ();
4a00c761
JJ
4059 *vec_oprnds0 = vec_tmp;
4060}
4061
4062
b8698a0f
L
4063/* Check if STMT performs a conversion operation, that can be vectorized.
4064 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4065 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4066 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4067
4068static bool
355fe088
TS
4069vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4070 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4071{
4072 tree vec_dest;
4073 tree scalar_dest;
4a00c761 4074 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4075 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4077 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4078 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4079 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4080 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4081 tree new_temp;
355fe088 4082 gimple *def_stmt;
ebfd146a 4083 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4084 int ndts = 2;
355fe088 4085 gimple *new_stmt = NULL;
ebfd146a
IR
4086 stmt_vec_info prev_stmt_info;
4087 int nunits_in;
4088 int nunits_out;
4089 tree vectype_out, vectype_in;
4a00c761
JJ
4090 int ncopies, i, j;
4091 tree lhs_type, rhs_type;
ebfd146a 4092 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4093 vec<tree> vec_oprnds0 = vNULL;
4094 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4095 tree vop0;
4a00c761 4096 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4097 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4098 int multi_step_cvt = 0;
6e1aa848 4099 vec<tree> interm_types = vNULL;
4a00c761
JJ
4100 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4101 int op_type;
4a00c761 4102 unsigned short fltsz;
ebfd146a
IR
4103
4104 /* Is STMT a vectorizable conversion? */
4105
4a00c761 4106 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4107 return false;
4108
66c16fd9
RB
4109 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4110 && ! vec_stmt)
ebfd146a
IR
4111 return false;
4112
4113 if (!is_gimple_assign (stmt))
4114 return false;
4115
4116 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4117 return false;
4118
4119 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4120 if (!CONVERT_EXPR_CODE_P (code)
4121 && code != FIX_TRUNC_EXPR
4122 && code != FLOAT_EXPR
4123 && code != WIDEN_MULT_EXPR
4124 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4125 return false;
4126
4a00c761
JJ
4127 op_type = TREE_CODE_LENGTH (code);
4128
ebfd146a 4129 /* Check types of lhs and rhs. */
b690cc0f 4130 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4131 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4132 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4133
ebfd146a
IR
4134 op0 = gimple_assign_rhs1 (stmt);
4135 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4136
4137 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4138 && !((INTEGRAL_TYPE_P (lhs_type)
4139 && INTEGRAL_TYPE_P (rhs_type))
4140 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4141 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4142 return false;
4143
e6f5c25d
IE
4144 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4145 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4146 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4147 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4148 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4149 {
73fbfcad 4150 if (dump_enabled_p ())
78c60e3d 4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4152 "type conversion to/from bit-precision unsupported."
4153 "\n");
4a00c761
JJ
4154 return false;
4155 }
4156
b690cc0f 4157 /* Check the operands of the operation. */
81c40241 4158 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4159 {
73fbfcad 4160 if (dump_enabled_p ())
78c60e3d 4161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4162 "use not simple.\n");
b690cc0f
RG
4163 return false;
4164 }
4a00c761
JJ
4165 if (op_type == binary_op)
4166 {
4167 bool ok;
4168
4169 op1 = gimple_assign_rhs2 (stmt);
4170 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4171 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4172 OP1. */
4173 if (CONSTANT_CLASS_P (op0))
81c40241 4174 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4175 else
81c40241 4176 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4177
4178 if (!ok)
4179 {
73fbfcad 4180 if (dump_enabled_p ())
78c60e3d 4181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4182 "use not simple.\n");
4a00c761
JJ
4183 return false;
4184 }
4185 }
4186
b690cc0f
RG
4187 /* If op0 is an external or constant defs use a vector type of
4188 the same size as the output vector type. */
ebfd146a 4189 if (!vectype_in)
b690cc0f 4190 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4191 if (vec_stmt)
4192 gcc_assert (vectype_in);
4193 if (!vectype_in)
4194 {
73fbfcad 4195 if (dump_enabled_p ())
4a00c761 4196 {
78c60e3d
SS
4197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4198 "no vectype for scalar type ");
4199 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4200 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4201 }
7d8930a0
IR
4202
4203 return false;
4204 }
ebfd146a 4205
e6f5c25d
IE
4206 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4207 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4208 {
4209 if (dump_enabled_p ())
4210 {
4211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4212 "can't convert between boolean and non "
4213 "boolean vectors");
4214 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4215 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4216 }
4217
4218 return false;
4219 }
4220
b690cc0f
RG
4221 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4222 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 4223 if (nunits_in < nunits_out)
ebfd146a
IR
4224 modifier = NARROW;
4225 else if (nunits_out == nunits_in)
4226 modifier = NONE;
ebfd146a 4227 else
4a00c761 4228 modifier = WIDEN;
ebfd146a 4229
ff802fa1
IR
4230 /* Multiple types in SLP are handled by creating the appropriate number of
4231 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4232 case of SLP. */
fce57248 4233 if (slp_node)
ebfd146a 4234 ncopies = 1;
4a00c761 4235 else if (modifier == NARROW)
e8f142e2 4236 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4237 else
e8f142e2 4238 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4239
ebfd146a
IR
4240 /* Sanity check: make sure that at least one copy of the vectorized stmt
4241 needs to be generated. */
4242 gcc_assert (ncopies >= 1);
4243
16d22000
RS
4244 bool found_mode = false;
4245 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4246 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4247 opt_scalar_mode rhs_mode_iter;
b397965c 4248
ebfd146a 4249 /* Supportable by target? */
4a00c761 4250 switch (modifier)
ebfd146a 4251 {
4a00c761
JJ
4252 case NONE:
4253 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4254 return false;
4255 if (supportable_convert_operation (code, vectype_out, vectype_in,
4256 &decl1, &code1))
4257 break;
4258 /* FALLTHRU */
4259 unsupported:
73fbfcad 4260 if (dump_enabled_p ())
78c60e3d 4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4262 "conversion not supported by target.\n");
ebfd146a 4263 return false;
ebfd146a 4264
4a00c761
JJ
4265 case WIDEN:
4266 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4267 &code1, &code2, &multi_step_cvt,
4268 &interm_types))
4a00c761
JJ
4269 {
4270 /* Binary widening operation can only be supported directly by the
4271 architecture. */
4272 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4273 break;
4274 }
4275
4276 if (code != FLOAT_EXPR
b397965c 4277 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4278 goto unsupported;
4279
b397965c 4280 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4281 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4282 {
16d22000 4283 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4284 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4285 break;
4286
4a00c761
JJ
4287 cvt_type
4288 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4289 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4290 if (cvt_type == NULL_TREE)
4291 goto unsupported;
4292
4293 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4294 {
4295 if (!supportable_convert_operation (code, vectype_out,
4296 cvt_type, &decl1, &codecvt1))
4297 goto unsupported;
4298 }
4299 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4300 cvt_type, &codecvt1,
4301 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4302 &interm_types))
4303 continue;
4304 else
4305 gcc_assert (multi_step_cvt == 0);
4306
4307 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4308 vectype_in, &code1, &code2,
4309 &multi_step_cvt, &interm_types))
16d22000
RS
4310 {
4311 found_mode = true;
4312 break;
4313 }
4a00c761
JJ
4314 }
4315
16d22000 4316 if (!found_mode)
4a00c761
JJ
4317 goto unsupported;
4318
4319 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4320 codecvt2 = ERROR_MARK;
4321 else
4322 {
4323 multi_step_cvt++;
9771b263 4324 interm_types.safe_push (cvt_type);
4a00c761
JJ
4325 cvt_type = NULL_TREE;
4326 }
4327 break;
4328
4329 case NARROW:
4330 gcc_assert (op_type == unary_op);
4331 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4332 &code1, &multi_step_cvt,
4333 &interm_types))
4334 break;
4335
4336 if (code != FIX_TRUNC_EXPR
b397965c 4337 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4338 goto unsupported;
4339
4a00c761
JJ
4340 cvt_type
4341 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4342 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4343 if (cvt_type == NULL_TREE)
4344 goto unsupported;
4345 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4346 &decl1, &codecvt1))
4347 goto unsupported;
4348 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4349 &code1, &multi_step_cvt,
4350 &interm_types))
4351 break;
4352 goto unsupported;
4353
4354 default:
4355 gcc_unreachable ();
ebfd146a
IR
4356 }
4357
4358 if (!vec_stmt) /* transformation not required. */
4359 {
73fbfcad 4360 if (dump_enabled_p ())
78c60e3d 4361 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4362 "=== vectorizable_conversion ===\n");
4a00c761 4363 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4364 {
4365 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4366 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4367 }
4a00c761
JJ
4368 else if (modifier == NARROW)
4369 {
4370 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4371 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4372 }
4373 else
4374 {
4375 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4376 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4377 }
9771b263 4378 interm_types.release ();
ebfd146a
IR
4379 return true;
4380 }
4381
67b8dbac 4382 /* Transform. */
73fbfcad 4383 if (dump_enabled_p ())
78c60e3d 4384 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4385 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4386
4a00c761
JJ
4387 if (op_type == binary_op)
4388 {
4389 if (CONSTANT_CLASS_P (op0))
4390 op0 = fold_convert (TREE_TYPE (op1), op0);
4391 else if (CONSTANT_CLASS_P (op1))
4392 op1 = fold_convert (TREE_TYPE (op0), op1);
4393 }
4394
4395 /* In case of multi-step conversion, we first generate conversion operations
4396 to the intermediate types, and then from that types to the final one.
4397 We create vector destinations for the intermediate type (TYPES) received
4398 from supportable_*_operation, and store them in the correct order
4399 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4400 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4401 vec_dest = vect_create_destination_var (scalar_dest,
4402 (cvt_type && modifier == WIDEN)
4403 ? cvt_type : vectype_out);
9771b263 4404 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4405
4406 if (multi_step_cvt)
4407 {
9771b263
DN
4408 for (i = interm_types.length () - 1;
4409 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4410 {
4411 vec_dest = vect_create_destination_var (scalar_dest,
4412 intermediate_type);
9771b263 4413 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4414 }
4415 }
ebfd146a 4416
4a00c761 4417 if (cvt_type)
82294ec1
JJ
4418 vec_dest = vect_create_destination_var (scalar_dest,
4419 modifier == WIDEN
4420 ? vectype_out : cvt_type);
4a00c761
JJ
4421
4422 if (!slp_node)
4423 {
30862efc 4424 if (modifier == WIDEN)
4a00c761 4425 {
c3284718 4426 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4427 if (op_type == binary_op)
9771b263 4428 vec_oprnds1.create (1);
4a00c761 4429 }
30862efc 4430 else if (modifier == NARROW)
9771b263
DN
4431 vec_oprnds0.create (
4432 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4433 }
4434 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4435 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4436
4a00c761 4437 last_oprnd = op0;
ebfd146a
IR
4438 prev_stmt_info = NULL;
4439 switch (modifier)
4440 {
4441 case NONE:
4442 for (j = 0; j < ncopies; j++)
4443 {
ebfd146a 4444 if (j == 0)
306b0c92 4445 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4446 else
4447 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4448
9771b263 4449 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4450 {
4451 /* Arguments are ready, create the new vector stmt. */
4452 if (code1 == CALL_EXPR)
4453 {
4454 new_stmt = gimple_build_call (decl1, 1, vop0);
4455 new_temp = make_ssa_name (vec_dest, new_stmt);
4456 gimple_call_set_lhs (new_stmt, new_temp);
4457 }
4458 else
4459 {
4460 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4461 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4462 new_temp = make_ssa_name (vec_dest, new_stmt);
4463 gimple_assign_set_lhs (new_stmt, new_temp);
4464 }
4465
4466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4467 if (slp_node)
9771b263 4468 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4469 else
4470 {
4471 if (!prev_stmt_info)
4472 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4473 else
4474 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4475 prev_stmt_info = vinfo_for_stmt (new_stmt);
4476 }
4a00c761 4477 }
ebfd146a
IR
4478 }
4479 break;
4480
4481 case WIDEN:
4482 /* In case the vectorization factor (VF) is bigger than the number
4483 of elements that we can fit in a vectype (nunits), we have to
4484 generate more than one vector stmt - i.e - we need to "unroll"
4485 the vector stmt by a factor VF/nunits. */
4486 for (j = 0; j < ncopies; j++)
4487 {
4a00c761 4488 /* Handle uses. */
ebfd146a 4489 if (j == 0)
4a00c761
JJ
4490 {
4491 if (slp_node)
4492 {
4493 if (code == WIDEN_LSHIFT_EXPR)
4494 {
4495 unsigned int k;
ebfd146a 4496
4a00c761
JJ
4497 vec_oprnd1 = op1;
4498 /* Store vec_oprnd1 for every vector stmt to be created
4499 for SLP_NODE. We check during the analysis that all
4500 the shift arguments are the same. */
4501 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4502 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4503
4504 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4505 slp_node);
4a00c761
JJ
4506 }
4507 else
4508 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4509 &vec_oprnds1, slp_node);
4a00c761
JJ
4510 }
4511 else
4512 {
81c40241 4513 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4514 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4515 if (op_type == binary_op)
4516 {
4517 if (code == WIDEN_LSHIFT_EXPR)
4518 vec_oprnd1 = op1;
4519 else
81c40241 4520 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4521 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4522 }
4523 }
4524 }
ebfd146a 4525 else
4a00c761
JJ
4526 {
4527 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4528 vec_oprnds0.truncate (0);
4529 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4530 if (op_type == binary_op)
4531 {
4532 if (code == WIDEN_LSHIFT_EXPR)
4533 vec_oprnd1 = op1;
4534 else
4535 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4536 vec_oprnd1);
9771b263
DN
4537 vec_oprnds1.truncate (0);
4538 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4539 }
4540 }
ebfd146a 4541
4a00c761
JJ
4542 /* Arguments are ready. Create the new vector stmts. */
4543 for (i = multi_step_cvt; i >= 0; i--)
4544 {
9771b263 4545 tree this_dest = vec_dsts[i];
4a00c761
JJ
4546 enum tree_code c1 = code1, c2 = code2;
4547 if (i == 0 && codecvt2 != ERROR_MARK)
4548 {
4549 c1 = codecvt1;
4550 c2 = codecvt2;
4551 }
4552 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4553 &vec_oprnds1,
4554 stmt, this_dest, gsi,
4555 c1, c2, decl1, decl2,
4556 op_type);
4557 }
4558
9771b263 4559 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4560 {
4561 if (cvt_type)
4562 {
4563 if (codecvt1 == CALL_EXPR)
4564 {
4565 new_stmt = gimple_build_call (decl1, 1, vop0);
4566 new_temp = make_ssa_name (vec_dest, new_stmt);
4567 gimple_call_set_lhs (new_stmt, new_temp);
4568 }
4569 else
4570 {
4571 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4572 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4573 new_stmt = gimple_build_assign (new_temp, codecvt1,
4574 vop0);
4a00c761
JJ
4575 }
4576
4577 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4578 }
4579 else
4580 new_stmt = SSA_NAME_DEF_STMT (vop0);
4581
4582 if (slp_node)
9771b263 4583 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4584 else
c689ce1e
RB
4585 {
4586 if (!prev_stmt_info)
4587 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4588 else
4589 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4590 prev_stmt_info = vinfo_for_stmt (new_stmt);
4591 }
4a00c761 4592 }
ebfd146a 4593 }
4a00c761
JJ
4594
4595 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4596 break;
4597
4598 case NARROW:
4599 /* In case the vectorization factor (VF) is bigger than the number
4600 of elements that we can fit in a vectype (nunits), we have to
4601 generate more than one vector stmt - i.e - we need to "unroll"
4602 the vector stmt by a factor VF/nunits. */
4603 for (j = 0; j < ncopies; j++)
4604 {
4605 /* Handle uses. */
4a00c761
JJ
4606 if (slp_node)
4607 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4608 slp_node);
ebfd146a
IR
4609 else
4610 {
9771b263 4611 vec_oprnds0.truncate (0);
4a00c761
JJ
4612 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4613 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4614 }
4615
4a00c761
JJ
4616 /* Arguments are ready. Create the new vector stmts. */
4617 if (cvt_type)
9771b263 4618 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4619 {
4620 if (codecvt1 == CALL_EXPR)
4621 {
4622 new_stmt = gimple_build_call (decl1, 1, vop0);
4623 new_temp = make_ssa_name (vec_dest, new_stmt);
4624 gimple_call_set_lhs (new_stmt, new_temp);
4625 }
4626 else
4627 {
4628 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4629 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4630 new_stmt = gimple_build_assign (new_temp, codecvt1,
4631 vop0);
4a00c761 4632 }
ebfd146a 4633
4a00c761 4634 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4635 vec_oprnds0[i] = new_temp;
4a00c761 4636 }
ebfd146a 4637
4a00c761
JJ
4638 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4639 stmt, vec_dsts, gsi,
4640 slp_node, code1,
4641 &prev_stmt_info);
ebfd146a
IR
4642 }
4643
4644 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4645 break;
ebfd146a
IR
4646 }
4647
9771b263
DN
4648 vec_oprnds0.release ();
4649 vec_oprnds1.release ();
9771b263 4650 interm_types.release ();
ebfd146a
IR
4651
4652 return true;
4653}
ff802fa1
IR
4654
4655
ebfd146a
IR
4656/* Function vectorizable_assignment.
4657
b8698a0f
L
4658 Check if STMT performs an assignment (copy) that can be vectorized.
4659 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4660 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4661 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4662
4663static bool
355fe088
TS
4664vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4665 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4666{
4667 tree vec_dest;
4668 tree scalar_dest;
4669 tree op;
4670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4671 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4672 tree new_temp;
355fe088 4673 gimple *def_stmt;
4fc5ebf1
JG
4674 enum vect_def_type dt[1] = {vect_unknown_def_type};
4675 int ndts = 1;
ebfd146a 4676 int ncopies;
f18b55bd 4677 int i, j;
6e1aa848 4678 vec<tree> vec_oprnds = vNULL;
ebfd146a 4679 tree vop;
a70d6342 4680 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4681 vec_info *vinfo = stmt_info->vinfo;
355fe088 4682 gimple *new_stmt = NULL;
f18b55bd 4683 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4684 enum tree_code code;
4685 tree vectype_in;
ebfd146a 4686
a70d6342 4687 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4688 return false;
4689
66c16fd9
RB
4690 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4691 && ! vec_stmt)
ebfd146a
IR
4692 return false;
4693
4694 /* Is vectorizable assignment? */
4695 if (!is_gimple_assign (stmt))
4696 return false;
4697
4698 scalar_dest = gimple_assign_lhs (stmt);
4699 if (TREE_CODE (scalar_dest) != SSA_NAME)
4700 return false;
4701
fde9c428 4702 code = gimple_assign_rhs_code (stmt);
ebfd146a 4703 if (gimple_assign_single_p (stmt)
fde9c428
RG
4704 || code == PAREN_EXPR
4705 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4706 op = gimple_assign_rhs1 (stmt);
4707 else
4708 return false;
4709
7b7ec6c5
RG
4710 if (code == VIEW_CONVERT_EXPR)
4711 op = TREE_OPERAND (op, 0);
4712
465c8c19
JJ
4713 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4714 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4715
4716 /* Multiple types in SLP are handled by creating the appropriate number of
4717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4718 case of SLP. */
fce57248 4719 if (slp_node)
465c8c19
JJ
4720 ncopies = 1;
4721 else
e8f142e2 4722 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4723
4724 gcc_assert (ncopies >= 1);
4725
81c40241 4726 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4727 {
73fbfcad 4728 if (dump_enabled_p ())
78c60e3d 4729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4730 "use not simple.\n");
ebfd146a
IR
4731 return false;
4732 }
4733
fde9c428
RG
4734 /* We can handle NOP_EXPR conversions that do not change the number
4735 of elements or the vector size. */
7b7ec6c5
RG
4736 if ((CONVERT_EXPR_CODE_P (code)
4737 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4738 && (!vectype_in
4739 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4740 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4741 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4742 return false;
4743
7b7b1813
RG
4744 /* We do not handle bit-precision changes. */
4745 if ((CONVERT_EXPR_CODE_P (code)
4746 || code == VIEW_CONVERT_EXPR)
4747 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4748 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4749 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4750 /* But a conversion that does not change the bit-pattern is ok. */
4751 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4752 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4753 && TYPE_UNSIGNED (TREE_TYPE (op)))
4754 /* Conversion between boolean types of different sizes is
4755 a simple assignment in case their vectypes are same
4756 boolean vectors. */
4757 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4758 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4759 {
73fbfcad 4760 if (dump_enabled_p ())
78c60e3d
SS
4761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4762 "type conversion to/from bit-precision "
e645e942 4763 "unsupported.\n");
7b7b1813
RG
4764 return false;
4765 }
4766
ebfd146a
IR
4767 if (!vec_stmt) /* transformation not required. */
4768 {
4769 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4770 if (dump_enabled_p ())
78c60e3d 4771 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4772 "=== vectorizable_assignment ===\n");
4fc5ebf1 4773 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4774 return true;
4775 }
4776
67b8dbac 4777 /* Transform. */
73fbfcad 4778 if (dump_enabled_p ())
e645e942 4779 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4780
4781 /* Handle def. */
4782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4783
4784 /* Handle use. */
f18b55bd 4785 for (j = 0; j < ncopies; j++)
ebfd146a 4786 {
f18b55bd
IR
4787 /* Handle uses. */
4788 if (j == 0)
306b0c92 4789 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4790 else
4791 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4792
4793 /* Arguments are ready. create the new vector stmt. */
9771b263 4794 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4795 {
7b7ec6c5
RG
4796 if (CONVERT_EXPR_CODE_P (code)
4797 || code == VIEW_CONVERT_EXPR)
4a73490d 4798 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4799 new_stmt = gimple_build_assign (vec_dest, vop);
4800 new_temp = make_ssa_name (vec_dest, new_stmt);
4801 gimple_assign_set_lhs (new_stmt, new_temp);
4802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4803 if (slp_node)
9771b263 4804 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4805 }
ebfd146a
IR
4806
4807 if (slp_node)
f18b55bd
IR
4808 continue;
4809
4810 if (j == 0)
4811 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4812 else
4813 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4814
4815 prev_stmt_info = vinfo_for_stmt (new_stmt);
4816 }
b8698a0f 4817
9771b263 4818 vec_oprnds.release ();
ebfd146a
IR
4819 return true;
4820}
4821
9dc3f7de 4822
1107f3ae
IR
4823/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4824 either as shift by a scalar or by a vector. */
4825
4826bool
4827vect_supportable_shift (enum tree_code code, tree scalar_type)
4828{
4829
ef4bddc2 4830 machine_mode vec_mode;
1107f3ae
IR
4831 optab optab;
4832 int icode;
4833 tree vectype;
4834
4835 vectype = get_vectype_for_scalar_type (scalar_type);
4836 if (!vectype)
4837 return false;
4838
4839 optab = optab_for_tree_code (code, vectype, optab_scalar);
4840 if (!optab
4841 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4842 {
4843 optab = optab_for_tree_code (code, vectype, optab_vector);
4844 if (!optab
4845 || (optab_handler (optab, TYPE_MODE (vectype))
4846 == CODE_FOR_nothing))
4847 return false;
4848 }
4849
4850 vec_mode = TYPE_MODE (vectype);
4851 icode = (int) optab_handler (optab, vec_mode);
4852 if (icode == CODE_FOR_nothing)
4853 return false;
4854
4855 return true;
4856}
4857
4858
9dc3f7de
IR
4859/* Function vectorizable_shift.
4860
4861 Check if STMT performs a shift operation that can be vectorized.
4862 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4863 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4864 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4865
4866static bool
355fe088
TS
4867vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4868 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4869{
4870 tree vec_dest;
4871 tree scalar_dest;
4872 tree op0, op1 = NULL;
4873 tree vec_oprnd1 = NULL_TREE;
4874 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4875 tree vectype;
4876 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4877 enum tree_code code;
ef4bddc2 4878 machine_mode vec_mode;
9dc3f7de
IR
4879 tree new_temp;
4880 optab optab;
4881 int icode;
ef4bddc2 4882 machine_mode optab_op2_mode;
355fe088 4883 gimple *def_stmt;
9dc3f7de 4884 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4885 int ndts = 2;
355fe088 4886 gimple *new_stmt = NULL;
9dc3f7de
IR
4887 stmt_vec_info prev_stmt_info;
4888 int nunits_in;
4889 int nunits_out;
4890 tree vectype_out;
cede2577 4891 tree op1_vectype;
9dc3f7de
IR
4892 int ncopies;
4893 int j, i;
6e1aa848
DN
4894 vec<tree> vec_oprnds0 = vNULL;
4895 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4896 tree vop0, vop1;
4897 unsigned int k;
49eab32e 4898 bool scalar_shift_arg = true;
9dc3f7de 4899 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4900 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4901
4902 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4903 return false;
4904
66c16fd9
RB
4905 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4906 && ! vec_stmt)
9dc3f7de
IR
4907 return false;
4908
4909 /* Is STMT a vectorizable binary/unary operation? */
4910 if (!is_gimple_assign (stmt))
4911 return false;
4912
4913 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4914 return false;
4915
4916 code = gimple_assign_rhs_code (stmt);
4917
4918 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4919 || code == RROTATE_EXPR))
4920 return false;
4921
4922 scalar_dest = gimple_assign_lhs (stmt);
4923 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 4924 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 4925 {
73fbfcad 4926 if (dump_enabled_p ())
78c60e3d 4927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4928 "bit-precision shifts not supported.\n");
7b7b1813
RG
4929 return false;
4930 }
9dc3f7de
IR
4931
4932 op0 = gimple_assign_rhs1 (stmt);
81c40241 4933 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4934 {
73fbfcad 4935 if (dump_enabled_p ())
78c60e3d 4936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4937 "use not simple.\n");
9dc3f7de
IR
4938 return false;
4939 }
4940 /* If op0 is an external or constant def use a vector type with
4941 the same size as the output vector type. */
4942 if (!vectype)
4943 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4944 if (vec_stmt)
4945 gcc_assert (vectype);
4946 if (!vectype)
4947 {
73fbfcad 4948 if (dump_enabled_p ())
78c60e3d 4949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4950 "no vectype for scalar type\n");
9dc3f7de
IR
4951 return false;
4952 }
4953
4954 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4955 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4956 if (nunits_out != nunits_in)
4957 return false;
4958
4959 op1 = gimple_assign_rhs2 (stmt);
81c40241 4960 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4961 {
73fbfcad 4962 if (dump_enabled_p ())
78c60e3d 4963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4964 "use not simple.\n");
9dc3f7de
IR
4965 return false;
4966 }
4967
9dc3f7de
IR
4968 /* Multiple types in SLP are handled by creating the appropriate number of
4969 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4970 case of SLP. */
fce57248 4971 if (slp_node)
9dc3f7de
IR
4972 ncopies = 1;
4973 else
e8f142e2 4974 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
4975
4976 gcc_assert (ncopies >= 1);
4977
4978 /* Determine whether the shift amount is a vector, or scalar. If the
4979 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4980
dbfa87aa
YR
4981 if ((dt[1] == vect_internal_def
4982 || dt[1] == vect_induction_def)
4983 && !slp_node)
49eab32e
JJ
4984 scalar_shift_arg = false;
4985 else if (dt[1] == vect_constant_def
4986 || dt[1] == vect_external_def
4987 || dt[1] == vect_internal_def)
4988 {
4989 /* In SLP, need to check whether the shift count is the same,
4990 in loops if it is a constant or invariant, it is always
4991 a scalar shift. */
4992 if (slp_node)
4993 {
355fe088
TS
4994 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4995 gimple *slpstmt;
49eab32e 4996
9771b263 4997 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4998 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4999 scalar_shift_arg = false;
5000 }
60d393e8
RB
5001
5002 /* If the shift amount is computed by a pattern stmt we cannot
5003 use the scalar amount directly thus give up and use a vector
5004 shift. */
5005 if (dt[1] == vect_internal_def)
5006 {
5007 gimple *def = SSA_NAME_DEF_STMT (op1);
5008 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5009 scalar_shift_arg = false;
5010 }
49eab32e
JJ
5011 }
5012 else
5013 {
73fbfcad 5014 if (dump_enabled_p ())
78c60e3d 5015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5016 "operand mode requires invariant argument.\n");
49eab32e
JJ
5017 return false;
5018 }
5019
9dc3f7de 5020 /* Vector shifted by vector. */
49eab32e 5021 if (!scalar_shift_arg)
9dc3f7de
IR
5022 {
5023 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5024 if (dump_enabled_p ())
78c60e3d 5025 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5026 "vector/vector shift/rotate found.\n");
78c60e3d 5027
aa948027
JJ
5028 if (!op1_vectype)
5029 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5030 if (op1_vectype == NULL_TREE
5031 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5032 {
73fbfcad 5033 if (dump_enabled_p ())
78c60e3d
SS
5034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5035 "unusable type for last operand in"
e645e942 5036 " vector/vector shift/rotate.\n");
cede2577
JJ
5037 return false;
5038 }
9dc3f7de
IR
5039 }
5040 /* See if the machine has a vector shifted by scalar insn and if not
5041 then see if it has a vector shifted by vector insn. */
49eab32e 5042 else
9dc3f7de
IR
5043 {
5044 optab = optab_for_tree_code (code, vectype, optab_scalar);
5045 if (optab
5046 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5047 {
73fbfcad 5048 if (dump_enabled_p ())
78c60e3d 5049 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5050 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5051 }
5052 else
5053 {
5054 optab = optab_for_tree_code (code, vectype, optab_vector);
5055 if (optab
5056 && (optab_handler (optab, TYPE_MODE (vectype))
5057 != CODE_FOR_nothing))
5058 {
49eab32e
JJ
5059 scalar_shift_arg = false;
5060
73fbfcad 5061 if (dump_enabled_p ())
78c60e3d 5062 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5063 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5064
5065 /* Unlike the other binary operators, shifts/rotates have
5066 the rhs being int, instead of the same type as the lhs,
5067 so make sure the scalar is the right type if we are
aa948027 5068 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5069 if (dt[1] == vect_constant_def)
5070 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5071 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5072 TREE_TYPE (op1)))
5073 {
5074 if (slp_node
5075 && TYPE_MODE (TREE_TYPE (vectype))
5076 != TYPE_MODE (TREE_TYPE (op1)))
5077 {
73fbfcad 5078 if (dump_enabled_p ())
78c60e3d
SS
5079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5080 "unusable type for last operand in"
e645e942 5081 " vector/vector shift/rotate.\n");
21c0a521 5082 return false;
aa948027
JJ
5083 }
5084 if (vec_stmt && !slp_node)
5085 {
5086 op1 = fold_convert (TREE_TYPE (vectype), op1);
5087 op1 = vect_init_vector (stmt, op1,
5088 TREE_TYPE (vectype), NULL);
5089 }
5090 }
9dc3f7de
IR
5091 }
5092 }
5093 }
9dc3f7de
IR
5094
5095 /* Supportable by target? */
5096 if (!optab)
5097 {
73fbfcad 5098 if (dump_enabled_p ())
78c60e3d 5099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5100 "no optab.\n");
9dc3f7de
IR
5101 return false;
5102 }
5103 vec_mode = TYPE_MODE (vectype);
5104 icode = (int) optab_handler (optab, vec_mode);
5105 if (icode == CODE_FOR_nothing)
5106 {
73fbfcad 5107 if (dump_enabled_p ())
78c60e3d 5108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5109 "op not supported by target.\n");
9dc3f7de
IR
5110 /* Check only during analysis. */
5111 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb
RS
5112 || (!vec_stmt
5113 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5114 return false;
73fbfcad 5115 if (dump_enabled_p ())
e645e942
TJ
5116 dump_printf_loc (MSG_NOTE, vect_location,
5117 "proceeding using word mode.\n");
9dc3f7de
IR
5118 }
5119
5120 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5121 if (!vec_stmt
5122 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5123 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5124 {
73fbfcad 5125 if (dump_enabled_p ())
78c60e3d 5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5127 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5128 return false;
5129 }
5130
5131 if (!vec_stmt) /* transformation not required. */
5132 {
5133 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5134 if (dump_enabled_p ())
e645e942
TJ
5135 dump_printf_loc (MSG_NOTE, vect_location,
5136 "=== vectorizable_shift ===\n");
4fc5ebf1 5137 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5138 return true;
5139 }
5140
67b8dbac 5141 /* Transform. */
9dc3f7de 5142
73fbfcad 5143 if (dump_enabled_p ())
78c60e3d 5144 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5145 "transform binary/unary operation.\n");
9dc3f7de
IR
5146
5147 /* Handle def. */
5148 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5149
9dc3f7de
IR
5150 prev_stmt_info = NULL;
5151 for (j = 0; j < ncopies; j++)
5152 {
5153 /* Handle uses. */
5154 if (j == 0)
5155 {
5156 if (scalar_shift_arg)
5157 {
5158 /* Vector shl and shr insn patterns can be defined with scalar
5159 operand 2 (shift operand). In this case, use constant or loop
5160 invariant op1 directly, without extending it to vector mode
5161 first. */
5162 optab_op2_mode = insn_data[icode].operand[2].mode;
5163 if (!VECTOR_MODE_P (optab_op2_mode))
5164 {
73fbfcad 5165 if (dump_enabled_p ())
78c60e3d 5166 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5167 "operand 1 using scalar mode.\n");
9dc3f7de 5168 vec_oprnd1 = op1;
8930f723 5169 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5170 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5171 if (slp_node)
5172 {
5173 /* Store vec_oprnd1 for every vector stmt to be created
5174 for SLP_NODE. We check during the analysis that all
5175 the shift arguments are the same.
5176 TODO: Allow different constants for different vector
5177 stmts generated for an SLP instance. */
5178 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5179 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5180 }
5181 }
5182 }
5183
5184 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5185 (a special case for certain kind of vector shifts); otherwise,
5186 operand 1 should be of a vector type (the usual case). */
5187 if (vec_oprnd1)
5188 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5189 slp_node);
9dc3f7de
IR
5190 else
5191 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5192 slp_node);
9dc3f7de
IR
5193 }
5194 else
5195 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5196
5197 /* Arguments are ready. Create the new vector stmt. */
9771b263 5198 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5199 {
9771b263 5200 vop1 = vec_oprnds1[i];
0d0e4a03 5201 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5202 new_temp = make_ssa_name (vec_dest, new_stmt);
5203 gimple_assign_set_lhs (new_stmt, new_temp);
5204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5205 if (slp_node)
9771b263 5206 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5207 }
5208
5209 if (slp_node)
5210 continue;
5211
5212 if (j == 0)
5213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5214 else
5215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5216 prev_stmt_info = vinfo_for_stmt (new_stmt);
5217 }
5218
9771b263
DN
5219 vec_oprnds0.release ();
5220 vec_oprnds1.release ();
9dc3f7de
IR
5221
5222 return true;
5223}
5224
5225
ebfd146a
IR
5226/* Function vectorizable_operation.
5227
16949072
RG
5228 Check if STMT performs a binary, unary or ternary operation that can
5229 be vectorized.
b8698a0f 5230 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5231 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5232 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5233
5234static bool
355fe088
TS
5235vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5236 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5237{
00f07b86 5238 tree vec_dest;
ebfd146a 5239 tree scalar_dest;
16949072 5240 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5242 tree vectype;
ebfd146a 5243 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5244 enum tree_code code, orig_code;
ef4bddc2 5245 machine_mode vec_mode;
ebfd146a
IR
5246 tree new_temp;
5247 int op_type;
00f07b86 5248 optab optab;
523ba738 5249 bool target_support_p;
355fe088 5250 gimple *def_stmt;
16949072
RG
5251 enum vect_def_type dt[3]
5252 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5253 int ndts = 3;
355fe088 5254 gimple *new_stmt = NULL;
ebfd146a 5255 stmt_vec_info prev_stmt_info;
b690cc0f 5256 int nunits_in;
ebfd146a
IR
5257 int nunits_out;
5258 tree vectype_out;
5259 int ncopies;
5260 int j, i;
6e1aa848
DN
5261 vec<tree> vec_oprnds0 = vNULL;
5262 vec<tree> vec_oprnds1 = vNULL;
5263 vec<tree> vec_oprnds2 = vNULL;
16949072 5264 tree vop0, vop1, vop2;
a70d6342 5265 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5266 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5267
a70d6342 5268 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5269 return false;
5270
66c16fd9
RB
5271 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5272 && ! vec_stmt)
ebfd146a
IR
5273 return false;
5274
5275 /* Is STMT a vectorizable binary/unary operation? */
5276 if (!is_gimple_assign (stmt))
5277 return false;
5278
5279 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5280 return false;
5281
0eb952ea 5282 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5283
1af4ebf5
MG
5284 /* For pointer addition and subtraction, we should use the normal
5285 plus and minus for the vector operation. */
ebfd146a
IR
5286 if (code == POINTER_PLUS_EXPR)
5287 code = PLUS_EXPR;
1af4ebf5
MG
5288 if (code == POINTER_DIFF_EXPR)
5289 code = MINUS_EXPR;
ebfd146a
IR
5290
5291 /* Support only unary or binary operations. */
5292 op_type = TREE_CODE_LENGTH (code);
16949072 5293 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5294 {
73fbfcad 5295 if (dump_enabled_p ())
78c60e3d 5296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5297 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5298 op_type);
ebfd146a
IR
5299 return false;
5300 }
5301
b690cc0f
RG
5302 scalar_dest = gimple_assign_lhs (stmt);
5303 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5304
7b7b1813
RG
5305 /* Most operations cannot handle bit-precision types without extra
5306 truncations. */
045c1278 5307 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5308 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5309 /* Exception are bitwise binary operations. */
5310 && code != BIT_IOR_EXPR
5311 && code != BIT_XOR_EXPR
5312 && code != BIT_AND_EXPR)
5313 {
73fbfcad 5314 if (dump_enabled_p ())
78c60e3d 5315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5316 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5317 return false;
5318 }
5319
ebfd146a 5320 op0 = gimple_assign_rhs1 (stmt);
81c40241 5321 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5322 {
73fbfcad 5323 if (dump_enabled_p ())
78c60e3d 5324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5325 "use not simple.\n");
ebfd146a
IR
5326 return false;
5327 }
b690cc0f
RG
5328 /* If op0 is an external or constant def use a vector type with
5329 the same size as the output vector type. */
5330 if (!vectype)
b036c6c5
IE
5331 {
5332 /* For boolean type we cannot determine vectype by
5333 invariant value (don't know whether it is a vector
5334 of booleans or vector of integers). We use output
5335 vectype because operations on boolean don't change
5336 type. */
2568d8a1 5337 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5338 {
2568d8a1 5339 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5340 {
5341 if (dump_enabled_p ())
5342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5343 "not supported operation on bool value.\n");
5344 return false;
5345 }
5346 vectype = vectype_out;
5347 }
5348 else
5349 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5350 }
7d8930a0
IR
5351 if (vec_stmt)
5352 gcc_assert (vectype);
5353 if (!vectype)
5354 {
73fbfcad 5355 if (dump_enabled_p ())
7d8930a0 5356 {
78c60e3d
SS
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "no vectype for scalar type ");
5359 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5360 TREE_TYPE (op0));
e645e942 5361 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5362 }
5363
5364 return false;
5365 }
b690cc0f
RG
5366
5367 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5368 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5369 if (nunits_out != nunits_in)
5370 return false;
ebfd146a 5371
16949072 5372 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5373 {
5374 op1 = gimple_assign_rhs2 (stmt);
81c40241 5375 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5376 {
73fbfcad 5377 if (dump_enabled_p ())
78c60e3d 5378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5379 "use not simple.\n");
ebfd146a
IR
5380 return false;
5381 }
5382 }
16949072
RG
5383 if (op_type == ternary_op)
5384 {
5385 op2 = gimple_assign_rhs3 (stmt);
81c40241 5386 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5387 {
73fbfcad 5388 if (dump_enabled_p ())
78c60e3d 5389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5390 "use not simple.\n");
16949072
RG
5391 return false;
5392 }
5393 }
ebfd146a 5394
b690cc0f 5395 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5396 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5397 case of SLP. */
fce57248 5398 if (slp_node)
b690cc0f
RG
5399 ncopies = 1;
5400 else
e8f142e2 5401 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5402
5403 gcc_assert (ncopies >= 1);
5404
9dc3f7de 5405 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5406 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5407 || code == RROTATE_EXPR)
9dc3f7de 5408 return false;
ebfd146a 5409
ebfd146a 5410 /* Supportable by target? */
00f07b86
RH
5411
5412 vec_mode = TYPE_MODE (vectype);
5413 if (code == MULT_HIGHPART_EXPR)
523ba738 5414 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5415 else
5416 {
5417 optab = optab_for_tree_code (code, vectype, optab_default);
5418 if (!optab)
5deb57cb 5419 {
73fbfcad 5420 if (dump_enabled_p ())
78c60e3d 5421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5422 "no optab.\n");
00f07b86 5423 return false;
5deb57cb 5424 }
523ba738
RS
5425 target_support_p = (optab_handler (optab, vec_mode)
5426 != CODE_FOR_nothing);
5deb57cb
JJ
5427 }
5428
523ba738 5429 if (!target_support_p)
ebfd146a 5430 {
73fbfcad 5431 if (dump_enabled_p ())
78c60e3d 5432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5433 "op not supported by target.\n");
ebfd146a
IR
5434 /* Check only during analysis. */
5435 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
ca09abcb 5436 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5437 return false;
73fbfcad 5438 if (dump_enabled_p ())
e645e942
TJ
5439 dump_printf_loc (MSG_NOTE, vect_location,
5440 "proceeding using word mode.\n");
383d9c83
IR
5441 }
5442
4a00c761 5443 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5444 if (!VECTOR_MODE_P (vec_mode)
5445 && !vec_stmt
ca09abcb 5446 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5447 {
73fbfcad 5448 if (dump_enabled_p ())
78c60e3d 5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5450 "not worthwhile without SIMD support.\n");
e34842c6 5451 return false;
7d8930a0 5452 }
ebfd146a 5453
ebfd146a
IR
5454 if (!vec_stmt) /* transformation not required. */
5455 {
4a00c761 5456 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5457 if (dump_enabled_p ())
78c60e3d 5458 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5459 "=== vectorizable_operation ===\n");
4fc5ebf1 5460 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5461 return true;
5462 }
5463
67b8dbac 5464 /* Transform. */
ebfd146a 5465
73fbfcad 5466 if (dump_enabled_p ())
78c60e3d 5467 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5468 "transform binary/unary operation.\n");
383d9c83 5469
ebfd146a 5470 /* Handle def. */
00f07b86 5471 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5472
0eb952ea
JJ
5473 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5474 vectors with unsigned elements, but the result is signed. So, we
5475 need to compute the MINUS_EXPR into vectype temporary and
5476 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5477 tree vec_cvt_dest = NULL_TREE;
5478 if (orig_code == POINTER_DIFF_EXPR)
5479 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5480
ebfd146a
IR
5481 /* In case the vectorization factor (VF) is bigger than the number
5482 of elements that we can fit in a vectype (nunits), we have to generate
5483 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5484 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5485 from one copy of the vector stmt to the next, in the field
5486 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5487 stages to find the correct vector defs to be used when vectorizing
5488 stmts that use the defs of the current stmt. The example below
5489 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5490 we need to create 4 vectorized stmts):
5491
5492 before vectorization:
5493 RELATED_STMT VEC_STMT
5494 S1: x = memref - -
5495 S2: z = x + 1 - -
5496
5497 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5498 there):
5499 RELATED_STMT VEC_STMT
5500 VS1_0: vx0 = memref0 VS1_1 -
5501 VS1_1: vx1 = memref1 VS1_2 -
5502 VS1_2: vx2 = memref2 VS1_3 -
5503 VS1_3: vx3 = memref3 - -
5504 S1: x = load - VS1_0
5505 S2: z = x + 1 - -
5506
5507 step2: vectorize stmt S2 (done here):
5508 To vectorize stmt S2 we first need to find the relevant vector
5509 def for the first operand 'x'. This is, as usual, obtained from
5510 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5511 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5512 relevant vector def 'vx0'. Having found 'vx0' we can generate
5513 the vector stmt VS2_0, and as usual, record it in the
5514 STMT_VINFO_VEC_STMT of stmt S2.
5515 When creating the second copy (VS2_1), we obtain the relevant vector
5516 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5517 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5518 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5519 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5520 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5521 chain of stmts and pointers:
5522 RELATED_STMT VEC_STMT
5523 VS1_0: vx0 = memref0 VS1_1 -
5524 VS1_1: vx1 = memref1 VS1_2 -
5525 VS1_2: vx2 = memref2 VS1_3 -
5526 VS1_3: vx3 = memref3 - -
5527 S1: x = load - VS1_0
5528 VS2_0: vz0 = vx0 + v1 VS2_1 -
5529 VS2_1: vz1 = vx1 + v1 VS2_2 -
5530 VS2_2: vz2 = vx2 + v1 VS2_3 -
5531 VS2_3: vz3 = vx3 + v1 - -
5532 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5533
5534 prev_stmt_info = NULL;
5535 for (j = 0; j < ncopies; j++)
5536 {
5537 /* Handle uses. */
5538 if (j == 0)
4a00c761
JJ
5539 {
5540 if (op_type == binary_op || op_type == ternary_op)
5541 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5542 slp_node);
4a00c761
JJ
5543 else
5544 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5545 slp_node);
4a00c761 5546 if (op_type == ternary_op)
c392943c 5547 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5548 slp_node);
4a00c761 5549 }
ebfd146a 5550 else
4a00c761
JJ
5551 {
5552 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5553 if (op_type == ternary_op)
5554 {
9771b263
DN
5555 tree vec_oprnd = vec_oprnds2.pop ();
5556 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5557 vec_oprnd));
4a00c761
JJ
5558 }
5559 }
5560
5561 /* Arguments are ready. Create the new vector stmt. */
9771b263 5562 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5563 {
4a00c761 5564 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5565 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5566 vop2 = ((op_type == ternary_op)
9771b263 5567 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5568 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5569 new_temp = make_ssa_name (vec_dest, new_stmt);
5570 gimple_assign_set_lhs (new_stmt, new_temp);
5571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5572 if (vec_cvt_dest)
5573 {
5574 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5575 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5576 new_temp);
5577 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5578 gimple_assign_set_lhs (new_stmt, new_temp);
5579 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5580 }
4a00c761 5581 if (slp_node)
9771b263 5582 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5583 }
5584
4a00c761
JJ
5585 if (slp_node)
5586 continue;
5587
5588 if (j == 0)
5589 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5590 else
5591 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5592 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5593 }
5594
9771b263
DN
5595 vec_oprnds0.release ();
5596 vec_oprnds1.release ();
5597 vec_oprnds2.release ();
ebfd146a 5598
ebfd146a
IR
5599 return true;
5600}
5601
f702e7d4 5602/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5603
5604static void
f702e7d4 5605ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5606{
5607 if (!dr->aux)
5608 return;
5609
52639a61 5610 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5611 {
52639a61 5612 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5613
f702e7d4
RS
5614 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5615
428f0c67 5616 if (decl_in_symtab_p (base_decl))
f702e7d4 5617 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5618 else
5619 {
f702e7d4 5620 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5621 DECL_USER_ALIGN (base_decl) = 1;
5622 }
52639a61 5623 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5624 }
5625}
5626
ebfd146a 5627
44fc7854
BE
5628/* Function get_group_alias_ptr_type.
5629
5630 Return the alias type for the group starting at FIRST_STMT. */
5631
5632static tree
5633get_group_alias_ptr_type (gimple *first_stmt)
5634{
5635 struct data_reference *first_dr, *next_dr;
5636 gimple *next_stmt;
5637
5638 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5639 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5640 while (next_stmt)
5641 {
5642 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5643 if (get_alias_set (DR_REF (first_dr))
5644 != get_alias_set (DR_REF (next_dr)))
5645 {
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_NOTE, vect_location,
5648 "conflicting alias set types.\n");
5649 return ptr_type_node;
5650 }
5651 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5652 }
5653 return reference_alias_ptr_type (DR_REF (first_dr));
5654}
5655
5656
ebfd146a
IR
5657/* Function vectorizable_store.
5658
b8698a0f
L
5659 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5660 can be vectorized.
5661 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5662 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5663 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5664
5665static bool
355fe088 5666vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5667 slp_tree slp_node)
ebfd146a
IR
5668{
5669 tree scalar_dest;
5670 tree data_ref;
5671 tree op;
5672 tree vec_oprnd = NULL_TREE;
5673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5674 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5675 tree elem_type;
ebfd146a 5676 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5677 struct loop *loop = NULL;
ef4bddc2 5678 machine_mode vec_mode;
ebfd146a
IR
5679 tree dummy;
5680 enum dr_alignment_support alignment_support_scheme;
355fe088 5681 gimple *def_stmt;
ebfd146a
IR
5682 enum vect_def_type dt;
5683 stmt_vec_info prev_stmt_info = NULL;
5684 tree dataref_ptr = NULL_TREE;
74bf76ed 5685 tree dataref_offset = NULL_TREE;
355fe088 5686 gimple *ptr_incr = NULL;
ebfd146a
IR
5687 int ncopies;
5688 int j;
2de001ee
RS
5689 gimple *next_stmt, *first_stmt;
5690 bool grouped_store;
ebfd146a 5691 unsigned int group_size, i;
6e1aa848
DN
5692 vec<tree> oprnds = vNULL;
5693 vec<tree> result_chain = vNULL;
ebfd146a 5694 bool inv_p;
09dfa495 5695 tree offset = NULL_TREE;
6e1aa848 5696 vec<tree> vec_oprnds = vNULL;
ebfd146a 5697 bool slp = (slp_node != NULL);
ebfd146a 5698 unsigned int vec_num;
a70d6342 5699 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5700 vec_info *vinfo = stmt_info->vinfo;
272c6793 5701 tree aggr_type;
134c85ca 5702 gather_scatter_info gs_info;
3bab6342 5703 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5704 gimple *new_stmt;
d9f21f6a 5705 poly_uint64 vf;
2de001ee 5706 vec_load_store_type vls_type;
44fc7854 5707 tree ref_type;
a70d6342 5708
a70d6342 5709 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5710 return false;
5711
66c16fd9
RB
5712 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5713 && ! vec_stmt)
ebfd146a
IR
5714 return false;
5715
5716 /* Is vectorizable store? */
5717
5718 if (!is_gimple_assign (stmt))
5719 return false;
5720
5721 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5722 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5723 && is_pattern_stmt_p (stmt_info))
5724 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5725 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5726 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5727 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5728 && TREE_CODE (scalar_dest) != COMPONENT_REF
5729 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5730 && TREE_CODE (scalar_dest) != REALPART_EXPR
5731 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5732 return false;
5733
fce57248
RS
5734 /* Cannot have hybrid store SLP -- that would mean storing to the
5735 same location twice. */
5736 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5737
ebfd146a 5738 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5739
f4d09712 5740 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
465c8c19
JJ
5741 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5742
5743 if (loop_vinfo)
b17dc4d4
RB
5744 {
5745 loop = LOOP_VINFO_LOOP (loop_vinfo);
5746 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5747 }
5748 else
5749 vf = 1;
465c8c19
JJ
5750
5751 /* Multiple types in SLP are handled by creating the appropriate number of
5752 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5753 case of SLP. */
fce57248 5754 if (slp)
465c8c19
JJ
5755 ncopies = 1;
5756 else
e8f142e2 5757 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5758
5759 gcc_assert (ncopies >= 1);
5760
5761 /* FORNOW. This restriction should be relaxed. */
5762 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5763 {
5764 if (dump_enabled_p ())
5765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5766 "multiple types in nested loop.\n");
5767 return false;
5768 }
5769
ebfd146a 5770 op = gimple_assign_rhs1 (stmt);
f4d09712 5771
2f391428 5772 /* In the case this is a store from a constant make sure
11a82e25 5773 native_encode_expr can handle it. */
2f391428 5774 if (CONSTANT_CLASS_P (op) && native_encode_expr (op, NULL, 64) == 0)
11a82e25
RB
5775 return false;
5776
f4d09712 5777 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5778 {
73fbfcad 5779 if (dump_enabled_p ())
78c60e3d 5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5781 "use not simple.\n");
ebfd146a
IR
5782 return false;
5783 }
5784
2de001ee
RS
5785 if (dt == vect_constant_def || dt == vect_external_def)
5786 vls_type = VLS_STORE_INVARIANT;
5787 else
5788 vls_type = VLS_STORE;
5789
f4d09712
KY
5790 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5791 return false;
5792
272c6793 5793 elem_type = TREE_TYPE (vectype);
ebfd146a 5794 vec_mode = TYPE_MODE (vectype);
7b7b1813 5795
ebfd146a
IR
5796 /* FORNOW. In some cases can vectorize even if data-type not supported
5797 (e.g. - array initialization with 0). */
947131ba 5798 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5799 return false;
5800
5801 if (!STMT_VINFO_DATA_REF (stmt_info))
5802 return false;
5803
2de001ee 5804 vect_memory_access_type memory_access_type;
62da9e14 5805 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
2de001ee
RS
5806 &memory_access_type, &gs_info))
5807 return false;
3bab6342 5808
ebfd146a
IR
5809 if (!vec_stmt) /* transformation not required. */
5810 {
2de001ee 5811 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 5812 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5813 /* The SLP costs are calculated during SLP analysis. */
5814 if (!PURE_SLP_STMT (stmt_info))
2de001ee 5815 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
2e8ab70c 5816 NULL, NULL, NULL);
ebfd146a
IR
5817 return true;
5818 }
2de001ee 5819 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5820
67b8dbac 5821 /* Transform. */
ebfd146a 5822
f702e7d4 5823 ensure_base_align (dr);
c716e67f 5824
2de001ee 5825 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342
AT
5826 {
5827 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
134c85ca 5828 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5829 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5830 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5831 edge pe = loop_preheader_edge (loop);
5832 gimple_seq seq;
5833 basic_block new_bb;
5834 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 5835 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342
AT
5836
5837 if (nunits == (unsigned int) scatter_off_nunits)
5838 modifier = NONE;
5839 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5840 {
3bab6342
AT
5841 modifier = WIDEN;
5842
e3342de4 5843 vec_perm_builder sel (scatter_off_nunits, scatter_off_nunits, 1);
3bab6342 5844 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
908a1a16 5845 sel.quick_push (i | nunits);
3bab6342 5846
e3342de4
RS
5847 vec_perm_indices indices (sel, 1, scatter_off_nunits);
5848 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5849 indices);
3bab6342
AT
5850 gcc_assert (perm_mask != NULL_TREE);
5851 }
5852 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5853 {
3bab6342
AT
5854 modifier = NARROW;
5855
e3342de4 5856 vec_perm_builder sel (nunits, nunits, 1);
3bab6342 5857 for (i = 0; i < (unsigned int) nunits; ++i)
908a1a16 5858 sel.quick_push (i | scatter_off_nunits);
3bab6342 5859
e3342de4
RS
5860 vec_perm_indices indices (sel, 2, nunits);
5861 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5862 gcc_assert (perm_mask != NULL_TREE);
5863 ncopies *= 2;
5864 }
5865 else
5866 gcc_unreachable ();
5867
134c85ca 5868 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5869 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5870 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5871 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5872 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5873 scaletype = TREE_VALUE (arglist);
5874
5875 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5876 && TREE_CODE (rettype) == VOID_TYPE);
5877
134c85ca 5878 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
5879 if (!is_gimple_min_invariant (ptr))
5880 {
5881 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5882 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5883 gcc_assert (!new_bb);
5884 }
5885
5886 /* Currently we support only unconditional scatter stores,
5887 so mask should be all ones. */
5888 mask = build_int_cst (masktype, -1);
5889 mask = vect_init_vector (stmt, mask, masktype, NULL);
5890
134c85ca 5891 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
5892
5893 prev_stmt_info = NULL;
5894 for (j = 0; j < ncopies; ++j)
5895 {
5896 if (j == 0)
5897 {
5898 src = vec_oprnd1
81c40241 5899 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5900 op = vec_oprnd0
134c85ca 5901 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
5902 }
5903 else if (modifier != NONE && (j & 1))
5904 {
5905 if (modifier == WIDEN)
5906 {
5907 src = vec_oprnd1
5908 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5909 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5910 stmt, gsi);
5911 }
5912 else if (modifier == NARROW)
5913 {
5914 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5915 stmt, gsi);
5916 op = vec_oprnd0
134c85ca
RS
5917 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5918 vec_oprnd0);
3bab6342
AT
5919 }
5920 else
5921 gcc_unreachable ();
5922 }
5923 else
5924 {
5925 src = vec_oprnd1
5926 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5927 op = vec_oprnd0
134c85ca
RS
5928 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5929 vec_oprnd0);
3bab6342
AT
5930 }
5931
5932 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5933 {
5934 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5935 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5936 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5937 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5938 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5940 src = var;
5941 }
5942
5943 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5944 {
5945 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5946 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5947 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5948 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5949 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5950 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5951 op = var;
5952 }
5953
5954 new_stmt
134c85ca 5955 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
5956
5957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5958
5959 if (prev_stmt_info == NULL)
5960 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5961 else
5962 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5963 prev_stmt_info = vinfo_for_stmt (new_stmt);
5964 }
5965 return true;
5966 }
5967
2de001ee 5968 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
0d0293ac 5969 if (grouped_store)
ebfd146a 5970 {
2de001ee 5971 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 5972 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5973 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5974
e14c1050 5975 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5976
5977 /* FORNOW */
a70d6342 5978 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5979
5980 /* We vectorize all the stmts of the interleaving group when we
5981 reach the last stmt in the group. */
e14c1050
IR
5982 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5983 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5984 && !slp)
5985 {
5986 *vec_stmt = NULL;
5987 return true;
5988 }
5989
5990 if (slp)
4b5caab7 5991 {
0d0293ac 5992 grouped_store = false;
4b5caab7
IR
5993 /* VEC_NUM is the number of vect stmts to be created for this
5994 group. */
5995 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5996 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5997 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5998 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5999 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 6000 }
ebfd146a 6001 else
4b5caab7
IR
6002 /* VEC_NUM is the number of vect stmts to be created for this
6003 group. */
ebfd146a 6004 vec_num = group_size;
44fc7854
BE
6005
6006 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6007 }
b8698a0f 6008 else
ebfd146a
IR
6009 {
6010 first_stmt = stmt;
6011 first_dr = dr;
6012 group_size = vec_num = 1;
44fc7854 6013 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a 6014 }
b8698a0f 6015
73fbfcad 6016 if (dump_enabled_p ())
78c60e3d 6017 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6018 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6019
2de001ee
RS
6020 if (memory_access_type == VMAT_ELEMENTWISE
6021 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6022 {
6023 gimple_stmt_iterator incr_gsi;
6024 bool insert_after;
355fe088 6025 gimple *incr;
f2e2a985
MM
6026 tree offvar;
6027 tree ivstep;
6028 tree running_off;
6029 gimple_seq stmts = NULL;
6030 tree stride_base, stride_step, alias_off;
6031 tree vec_oprnd;
f502d50e 6032 unsigned int g;
f2e2a985
MM
6033
6034 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6035
6036 stride_base
6037 = fold_build_pointer_plus
f502d50e 6038 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6039 size_binop (PLUS_EXPR,
f502d50e 6040 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6041 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6042 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6043
6044 /* For a store with loop-invariant (but other than power-of-2)
6045 stride (i.e. not a grouped access) like so:
6046
6047 for (i = 0; i < n; i += stride)
6048 array[i] = ...;
6049
6050 we generate a new induction variable and new stores from
6051 the components of the (vectorized) rhs:
6052
6053 for (j = 0; ; j += VF*stride)
6054 vectemp = ...;
6055 tmp1 = vectemp[0];
6056 array[j] = tmp1;
6057 tmp2 = vectemp[1];
6058 array[j + stride] = tmp2;
6059 ...
6060 */
6061
cee62fee 6062 unsigned nstores = nunits;
b17dc4d4 6063 unsigned lnel = 1;
cee62fee 6064 tree ltype = elem_type;
04199738 6065 tree lvectype = vectype;
cee62fee
MM
6066 if (slp)
6067 {
b17dc4d4
RB
6068 if (group_size < nunits
6069 && nunits % group_size == 0)
6070 {
6071 nstores = nunits / group_size;
6072 lnel = group_size;
6073 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6074 lvectype = vectype;
6075
6076 /* First check if vec_extract optab doesn't support extraction
6077 of vector elts directly. */
b397965c 6078 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6079 machine_mode vmode;
6080 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6081 || !VECTOR_MODE_P (vmode)
04199738
RB
6082 || (convert_optab_handler (vec_extract_optab,
6083 TYPE_MODE (vectype), vmode)
6084 == CODE_FOR_nothing))
6085 {
6086 /* Try to avoid emitting an extract of vector elements
6087 by performing the extracts using an integer type of the
6088 same size, extracting from a vector of those and then
6089 re-interpreting it as the original vector type if
6090 supported. */
6091 unsigned lsize
6092 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6093 elmode = int_mode_for_size (lsize, 0).require ();
04199738
RB
6094 /* If we can't construct such a vector fall back to
6095 element extracts from the original vector type and
6096 element size stores. */
9da15d40
RS
6097 if (mode_for_vector (elmode,
6098 nunits / group_size).exists (&vmode)
6099 && VECTOR_MODE_P (vmode)
04199738
RB
6100 && (convert_optab_handler (vec_extract_optab,
6101 vmode, elmode)
6102 != CODE_FOR_nothing))
6103 {
6104 nstores = nunits / group_size;
6105 lnel = group_size;
6106 ltype = build_nonstandard_integer_type (lsize, 1);
6107 lvectype = build_vector_type (ltype, nstores);
6108 }
6109 /* Else fall back to vector extraction anyway.
6110 Fewer stores are more important than avoiding spilling
6111 of the vector we extract from. Compared to the
6112 construction case in vectorizable_load no store-forwarding
6113 issue exists here for reasonable archs. */
6114 }
b17dc4d4
RB
6115 }
6116 else if (group_size >= nunits
6117 && group_size % nunits == 0)
6118 {
6119 nstores = 1;
6120 lnel = nunits;
6121 ltype = vectype;
04199738 6122 lvectype = vectype;
b17dc4d4 6123 }
cee62fee
MM
6124 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6125 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6126 }
6127
f2e2a985
MM
6128 ivstep = stride_step;
6129 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6130 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6131
6132 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6133
6134 create_iv (stride_base, ivstep, NULL,
6135 loop, &incr_gsi, insert_after,
6136 &offvar, NULL);
6137 incr = gsi_stmt (incr_gsi);
310213d4 6138 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6139
6140 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6141 if (stmts)
6142 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6143
6144 prev_stmt_info = NULL;
44fc7854 6145 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6146 next_stmt = first_stmt;
6147 for (g = 0; g < group_size; g++)
f2e2a985 6148 {
f502d50e
MM
6149 running_off = offvar;
6150 if (g)
f2e2a985 6151 {
f502d50e
MM
6152 tree size = TYPE_SIZE_UNIT (ltype);
6153 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6154 size);
f502d50e 6155 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6156 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6157 running_off, pos);
f2e2a985 6158 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6159 running_off = newoff;
f502d50e 6160 }
b17dc4d4
RB
6161 unsigned int group_el = 0;
6162 unsigned HOST_WIDE_INT
6163 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6164 for (j = 0; j < ncopies; j++)
6165 {
6166 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6167 and first_stmt == stmt. */
6168 if (j == 0)
6169 {
6170 if (slp)
6171 {
6172 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6173 slp_node);
f502d50e
MM
6174 vec_oprnd = vec_oprnds[0];
6175 }
6176 else
6177 {
6178 gcc_assert (gimple_assign_single_p (next_stmt));
6179 op = gimple_assign_rhs1 (next_stmt);
81c40241 6180 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6181 }
6182 }
f2e2a985 6183 else
f502d50e
MM
6184 {
6185 if (slp)
6186 vec_oprnd = vec_oprnds[j];
6187 else
c079cbac 6188 {
81c40241 6189 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6190 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6191 }
f502d50e 6192 }
04199738
RB
6193 /* Pun the vector to extract from if necessary. */
6194 if (lvectype != vectype)
6195 {
6196 tree tem = make_ssa_name (lvectype);
6197 gimple *pun
6198 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6199 lvectype, vec_oprnd));
6200 vect_finish_stmt_generation (stmt, pun, gsi);
6201 vec_oprnd = tem;
6202 }
f502d50e
MM
6203 for (i = 0; i < nstores; i++)
6204 {
6205 tree newref, newoff;
355fe088 6206 gimple *incr, *assign;
f502d50e
MM
6207 tree size = TYPE_SIZE (ltype);
6208 /* Extract the i'th component. */
6209 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6210 bitsize_int (i), size);
6211 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6212 size, pos);
6213
6214 elem = force_gimple_operand_gsi (gsi, elem, true,
6215 NULL_TREE, true,
6216 GSI_SAME_STMT);
6217
b17dc4d4
RB
6218 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6219 group_el * elsz);
f502d50e 6220 newref = build2 (MEM_REF, ltype,
b17dc4d4 6221 running_off, this_off);
f502d50e
MM
6222
6223 /* And store it to *running_off. */
6224 assign = gimple_build_assign (newref, elem);
6225 vect_finish_stmt_generation (stmt, assign, gsi);
6226
b17dc4d4
RB
6227 group_el += lnel;
6228 if (! slp
6229 || group_el == group_size)
6230 {
6231 newoff = copy_ssa_name (running_off, NULL);
6232 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6233 running_off, stride_step);
6234 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6235
b17dc4d4
RB
6236 running_off = newoff;
6237 group_el = 0;
6238 }
225ce44b
RB
6239 if (g == group_size - 1
6240 && !slp)
f502d50e
MM
6241 {
6242 if (j == 0 && i == 0)
225ce44b
RB
6243 STMT_VINFO_VEC_STMT (stmt_info)
6244 = *vec_stmt = assign;
f502d50e
MM
6245 else
6246 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6247 prev_stmt_info = vinfo_for_stmt (assign);
6248 }
6249 }
f2e2a985 6250 }
f502d50e 6251 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6252 if (slp)
6253 break;
f2e2a985 6254 }
778dd3b6
RB
6255
6256 vec_oprnds.release ();
f2e2a985
MM
6257 return true;
6258 }
6259
8c681247 6260 auto_vec<tree> dr_chain (group_size);
9771b263 6261 oprnds.create (group_size);
ebfd146a 6262
720f5239 6263 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6264 gcc_assert (alignment_support_scheme);
272c6793
RS
6265 /* Targets with store-lane instructions must not require explicit
6266 realignment. */
2de001ee 6267 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
6268 || alignment_support_scheme == dr_aligned
6269 || alignment_support_scheme == dr_unaligned_supported);
6270
62da9e14
RS
6271 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6272 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6273 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6274
2de001ee 6275 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6276 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6277 else
6278 aggr_type = vectype;
ebfd146a
IR
6279
6280 /* In case the vectorization factor (VF) is bigger than the number
6281 of elements that we can fit in a vectype (nunits), we have to generate
6282 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6283 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6284 vect_get_vec_def_for_copy_stmt. */
6285
0d0293ac 6286 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6287
6288 S1: &base + 2 = x2
6289 S2: &base = x0
6290 S3: &base + 1 = x1
6291 S4: &base + 3 = x3
6292
6293 We create vectorized stores starting from base address (the access of the
6294 first stmt in the chain (S2 in the above example), when the last store stmt
6295 of the chain (S4) is reached:
6296
6297 VS1: &base = vx2
6298 VS2: &base + vec_size*1 = vx0
6299 VS3: &base + vec_size*2 = vx1
6300 VS4: &base + vec_size*3 = vx3
6301
6302 Then permutation statements are generated:
6303
3fcc1b55
JJ
6304 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6305 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6306 ...
b8698a0f 6307
ebfd146a
IR
6308 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6309 (the order of the data-refs in the output of vect_permute_store_chain
6310 corresponds to the order of scalar stmts in the interleaving chain - see
6311 the documentation of vect_permute_store_chain()).
6312
6313 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6314 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6315 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6316 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6317 */
6318
6319 prev_stmt_info = NULL;
6320 for (j = 0; j < ncopies; j++)
6321 {
ebfd146a
IR
6322
6323 if (j == 0)
6324 {
6325 if (slp)
6326 {
6327 /* Get vectorized arguments for SLP_NODE. */
d092494c 6328 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6329 NULL, slp_node);
ebfd146a 6330
9771b263 6331 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6332 }
6333 else
6334 {
b8698a0f
L
6335 /* For interleaved stores we collect vectorized defs for all the
6336 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6337 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6338 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6339
0d0293ac 6340 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6341 OPRNDS are of size 1. */
b8698a0f 6342 next_stmt = first_stmt;
ebfd146a
IR
6343 for (i = 0; i < group_size; i++)
6344 {
b8698a0f
L
6345 /* Since gaps are not supported for interleaved stores,
6346 GROUP_SIZE is the exact number of stmts in the chain.
6347 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6348 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
6349 iteration of the loop will be executed. */
6350 gcc_assert (next_stmt
6351 && gimple_assign_single_p (next_stmt));
6352 op = gimple_assign_rhs1 (next_stmt);
6353
81c40241 6354 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6355 dr_chain.quick_push (vec_oprnd);
6356 oprnds.quick_push (vec_oprnd);
e14c1050 6357 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
6358 }
6359 }
6360
6361 /* We should have catched mismatched types earlier. */
6362 gcc_assert (useless_type_conversion_p (vectype,
6363 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6364 bool simd_lane_access_p
6365 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6366 if (simd_lane_access_p
6367 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6368 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6369 && integer_zerop (DR_OFFSET (first_dr))
6370 && integer_zerop (DR_INIT (first_dr))
6371 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6372 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6373 {
6374 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6375 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6376 inv_p = false;
74bf76ed
JJ
6377 }
6378 else
6379 dataref_ptr
6380 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6381 simd_lane_access_p ? loop : NULL,
09dfa495 6382 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6383 simd_lane_access_p, &inv_p);
a70d6342 6384 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6385 }
b8698a0f 6386 else
ebfd146a 6387 {
b8698a0f
L
6388 /* For interleaved stores we created vectorized defs for all the
6389 defs stored in OPRNDS in the previous iteration (previous copy).
6390 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6391 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6392 next copy.
0d0293ac 6393 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6394 OPRNDS are of size 1. */
6395 for (i = 0; i < group_size; i++)
6396 {
9771b263 6397 op = oprnds[i];
81c40241 6398 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6399 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6400 dr_chain[i] = vec_oprnd;
6401 oprnds[i] = vec_oprnd;
ebfd146a 6402 }
74bf76ed
JJ
6403 if (dataref_offset)
6404 dataref_offset
6405 = int_const_binop (PLUS_EXPR, dataref_offset,
6406 TYPE_SIZE_UNIT (aggr_type));
6407 else
6408 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6409 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6410 }
6411
2de001ee 6412 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6413 {
272c6793 6414 tree vec_array;
267d3070 6415
272c6793
RS
6416 /* Combine all the vectors into an array. */
6417 vec_array = create_vector_array (vectype, vec_num);
6418 for (i = 0; i < vec_num; i++)
c2d7ab2a 6419 {
9771b263 6420 vec_oprnd = dr_chain[i];
272c6793 6421 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6422 }
b8698a0f 6423
272c6793
RS
6424 /* Emit:
6425 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
44fc7854 6426 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
6427 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6428 vec_array);
6429 gimple_call_set_lhs (call, data_ref);
6430 gimple_call_set_nothrow (call, true);
6431 new_stmt = call;
267d3070 6432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6433 }
6434 else
6435 {
6436 new_stmt = NULL;
0d0293ac 6437 if (grouped_store)
272c6793 6438 {
b6b9227d
JJ
6439 if (j == 0)
6440 result_chain.create (group_size);
272c6793
RS
6441 /* Permute. */
6442 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6443 &result_chain);
6444 }
c2d7ab2a 6445
272c6793
RS
6446 next_stmt = first_stmt;
6447 for (i = 0; i < vec_num; i++)
6448 {
644ffefd 6449 unsigned align, misalign;
272c6793
RS
6450
6451 if (i > 0)
6452 /* Bump the vector pointer. */
6453 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6454 stmt, NULL_TREE);
6455
6456 if (slp)
9771b263 6457 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6458 else if (grouped_store)
6459 /* For grouped stores vectorized defs are interleaved in
272c6793 6460 vect_permute_store_chain(). */
9771b263 6461 vec_oprnd = result_chain[i];
272c6793 6462
69a2e8a1 6463 data_ref = fold_build2 (MEM_REF, vectype,
aed93b23
RB
6464 dataref_ptr,
6465 dataref_offset
6466 ? dataref_offset
44fc7854 6467 : build_int_cst (ref_type, 0));
f702e7d4 6468 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6469 if (aligned_access_p (first_dr))
644ffefd 6470 misalign = 0;
272c6793
RS
6471 else if (DR_MISALIGNMENT (first_dr) == -1)
6472 {
25f68d90 6473 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6474 misalign = 0;
272c6793
RS
6475 TREE_TYPE (data_ref)
6476 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6477 align * BITS_PER_UNIT);
272c6793
RS
6478 }
6479 else
6480 {
6481 TREE_TYPE (data_ref)
6482 = build_aligned_type (TREE_TYPE (data_ref),
6483 TYPE_ALIGN (elem_type));
644ffefd 6484 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6485 }
aed93b23
RB
6486 if (dataref_offset == NULL_TREE
6487 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6488 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6489 misalign);
c2d7ab2a 6490
62da9e14 6491 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6492 {
6493 tree perm_mask = perm_mask_for_reverse (vectype);
6494 tree perm_dest
6495 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6496 vectype);
b731b390 6497 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6498
6499 /* Generate the permute statement. */
355fe088 6500 gimple *perm_stmt
0d0e4a03
JJ
6501 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6502 vec_oprnd, perm_mask);
09dfa495
BM
6503 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6504
6505 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6506 vec_oprnd = new_temp;
6507 }
6508
272c6793
RS
6509 /* Arguments are ready. Create the new vector stmt. */
6510 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6512
6513 if (slp)
6514 continue;
6515
e14c1050 6516 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6517 if (!next_stmt)
6518 break;
6519 }
ebfd146a 6520 }
1da0876c
RS
6521 if (!slp)
6522 {
6523 if (j == 0)
6524 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6525 else
6526 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6527 prev_stmt_info = vinfo_for_stmt (new_stmt);
6528 }
ebfd146a
IR
6529 }
6530
9771b263
DN
6531 oprnds.release ();
6532 result_chain.release ();
6533 vec_oprnds.release ();
ebfd146a
IR
6534
6535 return true;
6536}
6537
557be5a8
AL
6538/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6539 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6540 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6541 vect_gen_perm_mask_checked. */
a1e53f3f 6542
3fcc1b55 6543tree
4aae3cb3 6544vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6545{
b00cb3bf 6546 tree mask_type;
a1e53f3f 6547
b00cb3bf
RS
6548 unsigned int nunits = sel.length ();
6549 gcc_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype));
6550
6551 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 6552 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
6553}
6554
7ac7e286 6555/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6556 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6557
6558tree
4aae3cb3 6559vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6560{
7ac7e286 6561 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6562 return vect_gen_perm_mask_any (vectype, sel);
6563}
6564
aec7ae7d
JJ
6565/* Given a vector variable X and Y, that was generated for the scalar
6566 STMT, generate instructions to permute the vector elements of X and Y
6567 using permutation mask MASK_VEC, insert them at *GSI and return the
6568 permuted vector variable. */
a1e53f3f
L
6569
6570static tree
355fe088 6571permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6572 gimple_stmt_iterator *gsi)
a1e53f3f
L
6573{
6574 tree vectype = TREE_TYPE (x);
aec7ae7d 6575 tree perm_dest, data_ref;
355fe088 6576 gimple *perm_stmt;
a1e53f3f 6577
acdcd61b 6578 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6579 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6580
6581 /* Generate the permute statement. */
0d0e4a03 6582 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6583 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6584
6585 return data_ref;
6586}
6587
6b916b36
RB
6588/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6589 inserting them on the loops preheader edge. Returns true if we
6590 were successful in doing so (and thus STMT can be moved then),
6591 otherwise returns false. */
6592
6593static bool
355fe088 6594hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6595{
6596 ssa_op_iter i;
6597 tree op;
6598 bool any = false;
6599
6600 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6601 {
355fe088 6602 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6603 if (!gimple_nop_p (def_stmt)
6604 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6605 {
6606 /* Make sure we don't need to recurse. While we could do
6607 so in simple cases when there are more complex use webs
6608 we don't have an easy way to preserve stmt order to fulfil
6609 dependencies within them. */
6610 tree op2;
6611 ssa_op_iter i2;
d1417442
JJ
6612 if (gimple_code (def_stmt) == GIMPLE_PHI)
6613 return false;
6b916b36
RB
6614 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6615 {
355fe088 6616 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6617 if (!gimple_nop_p (def_stmt2)
6618 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6619 return false;
6620 }
6621 any = true;
6622 }
6623 }
6624
6625 if (!any)
6626 return true;
6627
6628 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6629 {
355fe088 6630 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6631 if (!gimple_nop_p (def_stmt)
6632 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6633 {
6634 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6635 gsi_remove (&gsi, false);
6636 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6637 }
6638 }
6639
6640 return true;
6641}
6642
ebfd146a
IR
6643/* vectorizable_load.
6644
b8698a0f
L
6645 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6646 can be vectorized.
6647 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6648 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6649 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6650
6651static bool
355fe088 6652vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6653 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6654{
6655 tree scalar_dest;
6656 tree vec_dest = NULL;
6657 tree data_ref = NULL;
6658 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6659 stmt_vec_info prev_stmt_info;
ebfd146a 6660 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6661 struct loop *loop = NULL;
ebfd146a 6662 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6663 bool nested_in_vect_loop = false;
c716e67f 6664 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6665 tree elem_type;
ebfd146a 6666 tree new_temp;
ef4bddc2 6667 machine_mode mode;
355fe088 6668 gimple *new_stmt = NULL;
ebfd146a
IR
6669 tree dummy;
6670 enum dr_alignment_support alignment_support_scheme;
6671 tree dataref_ptr = NULL_TREE;
74bf76ed 6672 tree dataref_offset = NULL_TREE;
355fe088 6673 gimple *ptr_incr = NULL;
ebfd146a 6674 int ncopies;
d9f21f6a
RS
6675 int i, j, group_size;
6676 poly_int64 group_gap_adj;
ebfd146a
IR
6677 tree msq = NULL_TREE, lsq;
6678 tree offset = NULL_TREE;
356bbc4c 6679 tree byte_offset = NULL_TREE;
ebfd146a 6680 tree realignment_token = NULL_TREE;
538dd0b7 6681 gphi *phi = NULL;
6e1aa848 6682 vec<tree> dr_chain = vNULL;
0d0293ac 6683 bool grouped_load = false;
355fe088 6684 gimple *first_stmt;
4f0a0218 6685 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6686 bool inv_p;
6687 bool compute_in_loop = false;
6688 struct loop *at_loop;
6689 int vec_num;
6690 bool slp = (slp_node != NULL);
6691 bool slp_perm = false;
6692 enum tree_code code;
a70d6342 6693 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 6694 poly_uint64 vf;
272c6793 6695 tree aggr_type;
134c85ca 6696 gather_scatter_info gs_info;
310213d4 6697 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6698 tree ref_type;
a70d6342 6699
465c8c19
JJ
6700 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6701 return false;
6702
66c16fd9
RB
6703 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6704 && ! vec_stmt)
465c8c19
JJ
6705 return false;
6706
6707 /* Is vectorizable load? */
6708 if (!is_gimple_assign (stmt))
6709 return false;
6710
6711 scalar_dest = gimple_assign_lhs (stmt);
6712 if (TREE_CODE (scalar_dest) != SSA_NAME)
6713 return false;
6714
6715 code = gimple_assign_rhs_code (stmt);
6716 if (code != ARRAY_REF
6717 && code != BIT_FIELD_REF
6718 && code != INDIRECT_REF
6719 && code != COMPONENT_REF
6720 && code != IMAGPART_EXPR
6721 && code != REALPART_EXPR
6722 && code != MEM_REF
6723 && TREE_CODE_CLASS (code) != tcc_declaration)
6724 return false;
6725
6726 if (!STMT_VINFO_DATA_REF (stmt_info))
6727 return false;
6728
6729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6730 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6731
a70d6342
IR
6732 if (loop_vinfo)
6733 {
6734 loop = LOOP_VINFO_LOOP (loop_vinfo);
6735 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6736 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6737 }
6738 else
3533e503 6739 vf = 1;
ebfd146a
IR
6740
6741 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6742 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6743 case of SLP. */
fce57248 6744 if (slp)
ebfd146a
IR
6745 ncopies = 1;
6746 else
e8f142e2 6747 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6748
6749 gcc_assert (ncopies >= 1);
6750
6751 /* FORNOW. This restriction should be relaxed. */
6752 if (nested_in_vect_loop && ncopies > 1)
6753 {
73fbfcad 6754 if (dump_enabled_p ())
78c60e3d 6755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6756 "multiple types in nested loop.\n");
ebfd146a
IR
6757 return false;
6758 }
6759
f2556b68
RB
6760 /* Invalidate assumptions made by dependence analysis when vectorization
6761 on the unrolled body effectively re-orders stmts. */
6762 if (ncopies > 1
6763 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6764 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6765 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6766 {
6767 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6769 "cannot perform implicit CSE when unrolling "
6770 "with negative dependence distance\n");
6771 return false;
6772 }
6773
7b7b1813 6774 elem_type = TREE_TYPE (vectype);
947131ba 6775 mode = TYPE_MODE (vectype);
ebfd146a
IR
6776
6777 /* FORNOW. In some cases can vectorize even if data-type not supported
6778 (e.g. - data copies). */
947131ba 6779 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6780 {
73fbfcad 6781 if (dump_enabled_p ())
78c60e3d 6782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6783 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6784 return false;
6785 }
6786
ebfd146a 6787 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6788 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6789 {
0d0293ac 6790 grouped_load = true;
ebfd146a 6791 /* FORNOW */
2de001ee
RS
6792 gcc_assert (!nested_in_vect_loop);
6793 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6794
e14c1050 6795 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 6796 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 6797
b1af7da6
RB
6798 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6799 slp_perm = true;
6800
f2556b68
RB
6801 /* Invalidate assumptions made by dependence analysis when vectorization
6802 on the unrolled body effectively re-orders stmts. */
6803 if (!PURE_SLP_STMT (stmt_info)
6804 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6805 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6806 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6807 {
6808 if (dump_enabled_p ())
6809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6810 "cannot perform implicit CSE when performing "
6811 "group loads with negative dependence distance\n");
6812 return false;
6813 }
96bb56b2
RB
6814
6815 /* Similarly when the stmt is a load that is both part of a SLP
6816 instance and a loop vectorized stmt via the same-dr mechanism
6817 we have to give up. */
6818 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6819 && (STMT_SLP_TYPE (stmt_info)
6820 != STMT_SLP_TYPE (vinfo_for_stmt
6821 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6822 {
6823 if (dump_enabled_p ())
6824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6825 "conflicting SLP types for CSEd load\n");
6826 return false;
6827 }
ebfd146a
IR
6828 }
6829
2de001ee 6830 vect_memory_access_type memory_access_type;
62da9e14 6831 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
2de001ee
RS
6832 &memory_access_type, &gs_info))
6833 return false;
a1e53f3f 6834
ebfd146a
IR
6835 if (!vec_stmt) /* transformation not required. */
6836 {
2de001ee
RS
6837 if (!slp)
6838 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 6839 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6840 /* The SLP costs are calculated during SLP analysis. */
6841 if (!PURE_SLP_STMT (stmt_info))
2de001ee 6842 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 6843 NULL, NULL, NULL);
ebfd146a
IR
6844 return true;
6845 }
6846
2de001ee
RS
6847 if (!slp)
6848 gcc_assert (memory_access_type
6849 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6850
73fbfcad 6851 if (dump_enabled_p ())
78c60e3d 6852 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6853 "transform load. ncopies = %d\n", ncopies);
ebfd146a 6854
67b8dbac 6855 /* Transform. */
ebfd146a 6856
f702e7d4 6857 ensure_base_align (dr);
c716e67f 6858
2de001ee 6859 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d
JJ
6860 {
6861 tree vec_oprnd0 = NULL_TREE, op;
134c85ca 6862 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
aec7ae7d 6863 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6864 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6865 edge pe = loop_preheader_edge (loop);
6866 gimple_seq seq;
6867 basic_block new_bb;
6868 enum { NARROW, NONE, WIDEN } modifier;
134c85ca 6869 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
aec7ae7d
JJ
6870
6871 if (nunits == gather_off_nunits)
6872 modifier = NONE;
6873 else if (nunits == gather_off_nunits / 2)
6874 {
aec7ae7d
JJ
6875 modifier = WIDEN;
6876
e3342de4 6877 vec_perm_builder sel (gather_off_nunits, gather_off_nunits, 1);
aec7ae7d 6878 for (i = 0; i < gather_off_nunits; ++i)
908a1a16 6879 sel.quick_push (i | nunits);
aec7ae7d 6880
e3342de4
RS
6881 vec_perm_indices indices (sel, 1, gather_off_nunits);
6882 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6883 indices);
aec7ae7d
JJ
6884 }
6885 else if (nunits == gather_off_nunits * 2)
6886 {
aec7ae7d
JJ
6887 modifier = NARROW;
6888
e3342de4 6889 vec_perm_builder sel (nunits, nunits, 1);
aec7ae7d 6890 for (i = 0; i < nunits; ++i)
908a1a16
RS
6891 sel.quick_push (i < gather_off_nunits
6892 ? i : i + nunits - gather_off_nunits);
aec7ae7d 6893
e3342de4
RS
6894 vec_perm_indices indices (sel, 2, nunits);
6895 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
aec7ae7d
JJ
6896 ncopies *= 2;
6897 }
6898 else
6899 gcc_unreachable ();
6900
134c85ca 6901 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
aec7ae7d
JJ
6902 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6903 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6904 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6905 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6906 scaletype = TREE_VALUE (arglist);
d3c2fee0 6907 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6908
6909 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6910
134c85ca 6911 ptr = fold_convert (ptrtype, gs_info.base);
aec7ae7d
JJ
6912 if (!is_gimple_min_invariant (ptr))
6913 {
6914 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6915 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6916 gcc_assert (!new_bb);
6917 }
6918
6919 /* Currently we support only unconditional gather loads,
6920 so mask should be all ones. */
d3c2fee0
AI
6921 if (TREE_CODE (masktype) == INTEGER_TYPE)
6922 mask = build_int_cst (masktype, -1);
6923 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6924 {
6925 mask = build_int_cst (TREE_TYPE (masktype), -1);
6926 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6927 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6928 }
aec7ae7d
JJ
6929 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6930 {
6931 REAL_VALUE_TYPE r;
6932 long tmp[6];
6933 for (j = 0; j < 6; ++j)
6934 tmp[j] = -1;
6935 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6936 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6937 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6938 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6939 }
6940 else
6941 gcc_unreachable ();
aec7ae7d 6942
134c85ca 6943 scale = build_int_cst (scaletype, gs_info.scale);
aec7ae7d 6944
d3c2fee0
AI
6945 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6946 merge = build_int_cst (TREE_TYPE (rettype), 0);
6947 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6948 {
6949 REAL_VALUE_TYPE r;
6950 long tmp[6];
6951 for (j = 0; j < 6; ++j)
6952 tmp[j] = 0;
6953 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6954 merge = build_real (TREE_TYPE (rettype), r);
6955 }
6956 else
6957 gcc_unreachable ();
6958 merge = build_vector_from_val (rettype, merge);
6959 merge = vect_init_vector (stmt, merge, rettype, NULL);
6960
aec7ae7d
JJ
6961 prev_stmt_info = NULL;
6962 for (j = 0; j < ncopies; ++j)
6963 {
6964 if (modifier == WIDEN && (j & 1))
6965 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6966 perm_mask, stmt, gsi);
6967 else if (j == 0)
6968 op = vec_oprnd0
134c85ca 6969 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
aec7ae7d
JJ
6970 else
6971 op = vec_oprnd0
134c85ca 6972 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
aec7ae7d
JJ
6973
6974 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6975 {
6976 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6977 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 6978 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
6979 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6980 new_stmt
0d0e4a03 6981 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6983 op = var;
6984 }
6985
6986 new_stmt
134c85ca 6987 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6988
6989 if (!useless_type_conversion_p (vectype, rettype))
6990 {
6991 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6992 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 6993 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
6994 gimple_call_set_lhs (new_stmt, op);
6995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6996 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6997 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6998 new_stmt
0d0e4a03 6999 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
7000 }
7001 else
7002 {
7003 var = make_ssa_name (vec_dest, new_stmt);
7004 gimple_call_set_lhs (new_stmt, var);
7005 }
7006
7007 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7008
7009 if (modifier == NARROW)
7010 {
7011 if ((j & 1) == 0)
7012 {
7013 prev_res = var;
7014 continue;
7015 }
7016 var = permute_vec_elements (prev_res, var,
7017 perm_mask, stmt, gsi);
7018 new_stmt = SSA_NAME_DEF_STMT (var);
7019 }
7020
7021 if (prev_stmt_info == NULL)
7022 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7023 else
7024 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7025 prev_stmt_info = vinfo_for_stmt (new_stmt);
7026 }
7027 return true;
7028 }
2de001ee
RS
7029
7030 if (memory_access_type == VMAT_ELEMENTWISE
7031 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7032 {
7033 gimple_stmt_iterator incr_gsi;
7034 bool insert_after;
355fe088 7035 gimple *incr;
7d75abc8 7036 tree offvar;
7d75abc8
MM
7037 tree ivstep;
7038 tree running_off;
9771b263 7039 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 7040 gimple_seq stmts = NULL;
14ac6aa2
RB
7041 tree stride_base, stride_step, alias_off;
7042
7043 gcc_assert (!nested_in_vect_loop);
7d75abc8 7044
f502d50e 7045 if (slp && grouped_load)
44fc7854
BE
7046 {
7047 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7048 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7049 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7050 ref_type = get_group_alias_ptr_type (first_stmt);
7051 }
ab313a8c 7052 else
44fc7854
BE
7053 {
7054 first_stmt = stmt;
7055 first_dr = dr;
7056 group_size = 1;
7057 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7058 }
ab313a8c 7059
14ac6aa2
RB
7060 stride_base
7061 = fold_build_pointer_plus
ab313a8c 7062 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7063 size_binop (PLUS_EXPR,
ab313a8c
RB
7064 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7065 convert_to_ptrofftype (DR_INIT (first_dr))));
7066 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7067
7068 /* For a load with loop-invariant (but other than power-of-2)
7069 stride (i.e. not a grouped access) like so:
7070
7071 for (i = 0; i < n; i += stride)
7072 ... = array[i];
7073
7074 we generate a new induction variable and new accesses to
7075 form a new vector (or vectors, depending on ncopies):
7076
7077 for (j = 0; ; j += VF*stride)
7078 tmp1 = array[j];
7079 tmp2 = array[j + stride];
7080 ...
7081 vectemp = {tmp1, tmp2, ...}
7082 */
7083
ab313a8c
RB
7084 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7085 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7086
7087 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7088
ab313a8c 7089 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7090 loop, &incr_gsi, insert_after,
7091 &offvar, NULL);
7092 incr = gsi_stmt (incr_gsi);
310213d4 7093 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7094
ab313a8c
RB
7095 stride_step = force_gimple_operand (unshare_expr (stride_step),
7096 &stmts, true, NULL_TREE);
7d75abc8
MM
7097 if (stmts)
7098 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7099
7100 prev_stmt_info = NULL;
7101 running_off = offvar;
44fc7854 7102 alias_off = build_int_cst (ref_type, 0);
7b5fc413 7103 int nloads = nunits;
e09b4c37 7104 int lnel = 1;
7b5fc413 7105 tree ltype = TREE_TYPE (vectype);
ea60dd34 7106 tree lvectype = vectype;
b266b968 7107 auto_vec<tree> dr_chain;
2de001ee 7108 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7109 {
2de001ee 7110 if (group_size < nunits)
e09b4c37 7111 {
ff03930a
JJ
7112 /* First check if vec_init optab supports construction from
7113 vector elts directly. */
b397965c 7114 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7115 machine_mode vmode;
7116 if (mode_for_vector (elmode, group_size).exists (&vmode)
7117 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7118 && (convert_optab_handler (vec_init_optab,
7119 TYPE_MODE (vectype), vmode)
7120 != CODE_FOR_nothing))
ea60dd34
RB
7121 {
7122 nloads = nunits / group_size;
7123 lnel = group_size;
ff03930a
JJ
7124 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7125 }
7126 else
7127 {
7128 /* Otherwise avoid emitting a constructor of vector elements
7129 by performing the loads using an integer type of the same
7130 size, constructing a vector of those and then
7131 re-interpreting it as the original vector type.
7132 This avoids a huge runtime penalty due to the general
7133 inability to perform store forwarding from smaller stores
7134 to a larger load. */
7135 unsigned lsize
7136 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7137 elmode = int_mode_for_size (lsize, 0).require ();
ff03930a
JJ
7138 /* If we can't construct such a vector fall back to
7139 element loads of the original vector type. */
9da15d40
RS
7140 if (mode_for_vector (elmode,
7141 nunits / group_size).exists (&vmode)
7142 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7143 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7144 != CODE_FOR_nothing))
7145 {
7146 nloads = nunits / group_size;
7147 lnel = group_size;
7148 ltype = build_nonstandard_integer_type (lsize, 1);
7149 lvectype = build_vector_type (ltype, nloads);
7150 }
ea60dd34 7151 }
e09b4c37 7152 }
2de001ee 7153 else
e09b4c37 7154 {
ea60dd34 7155 nloads = 1;
e09b4c37
RB
7156 lnel = nunits;
7157 ltype = vectype;
e09b4c37 7158 }
2de001ee
RS
7159 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7160 }
7161 if (slp)
7162 {
66c16fd9
RB
7163 /* For SLP permutation support we need to load the whole group,
7164 not only the number of vector stmts the permutation result
7165 fits in. */
b266b968 7166 if (slp_perm)
66c16fd9 7167 {
d9f21f6a
RS
7168 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7169 variable VF. */
7170 unsigned int const_vf = vf.to_constant ();
7171 ncopies = (group_size * const_vf + nunits - 1) / nunits;
66c16fd9
RB
7172 dr_chain.create (ncopies);
7173 }
7174 else
7175 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7176 }
e09b4c37
RB
7177 int group_el = 0;
7178 unsigned HOST_WIDE_INT
7179 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7180 for (j = 0; j < ncopies; j++)
7181 {
7b5fc413 7182 if (nloads > 1)
e09b4c37
RB
7183 vec_alloc (v, nloads);
7184 for (i = 0; i < nloads; i++)
7b5fc413 7185 {
e09b4c37
RB
7186 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7187 group_el * elsz);
7188 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7189 build2 (MEM_REF, ltype,
7190 running_off, this_off));
7191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7192 if (nloads > 1)
7193 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7194 gimple_assign_lhs (new_stmt));
7195
7196 group_el += lnel;
7197 if (! slp
7198 || group_el == group_size)
7b5fc413 7199 {
e09b4c37
RB
7200 tree newoff = copy_ssa_name (running_off);
7201 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7202 running_off, stride_step);
7b5fc413
RB
7203 vect_finish_stmt_generation (stmt, incr, gsi);
7204
7205 running_off = newoff;
e09b4c37 7206 group_el = 0;
7b5fc413 7207 }
7b5fc413 7208 }
e09b4c37 7209 if (nloads > 1)
7d75abc8 7210 {
ea60dd34
RB
7211 tree vec_inv = build_constructor (lvectype, v);
7212 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7213 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7214 if (lvectype != vectype)
7215 {
7216 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7217 VIEW_CONVERT_EXPR,
7218 build1 (VIEW_CONVERT_EXPR,
7219 vectype, new_temp));
7220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7221 }
7d75abc8
MM
7222 }
7223
7b5fc413 7224 if (slp)
b266b968 7225 {
b266b968
RB
7226 if (slp_perm)
7227 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7228 else
7229 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7230 }
7d75abc8 7231 else
225ce44b
RB
7232 {
7233 if (j == 0)
7234 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7235 else
7236 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7237 prev_stmt_info = vinfo_for_stmt (new_stmt);
7238 }
7d75abc8 7239 }
b266b968 7240 if (slp_perm)
29afecdf
RB
7241 {
7242 unsigned n_perms;
7243 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7244 slp_node_instance, false, &n_perms);
7245 }
7d75abc8
MM
7246 return true;
7247 }
aec7ae7d 7248
0d0293ac 7249 if (grouped_load)
ebfd146a 7250 {
e14c1050 7251 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7252 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7253 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7254 without permutation. */
7255 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7256 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7257 /* For BB vectorization always use the first stmt to base
7258 the data ref pointer on. */
7259 if (bb_vinfo)
7260 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7261
ebfd146a 7262 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7263 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7264 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7265 ??? But we can only do so if there is exactly one
7266 as we have no way to get at the rest. Leave the CSE
7267 opportunity alone.
7268 ??? With the group load eventually participating
7269 in multiple different permutations (having multiple
7270 slp nodes which refer to the same group) the CSE
7271 is even wrong code. See PR56270. */
7272 && !slp)
ebfd146a
IR
7273 {
7274 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7275 return true;
7276 }
7277 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7278 group_gap_adj = 0;
ebfd146a
IR
7279
7280 /* VEC_NUM is the number of vect stmts to be created for this group. */
7281 if (slp)
7282 {
0d0293ac 7283 grouped_load = false;
91ff1504
RB
7284 /* For SLP permutation support we need to load the whole group,
7285 not only the number of vector stmts the permutation result
7286 fits in. */
7287 if (slp_perm)
b267968e 7288 {
d9f21f6a
RS
7289 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7290 variable VF. */
7291 unsigned int const_vf = vf.to_constant ();
7292 vec_num = (group_size * const_vf + nunits - 1) / nunits;
b267968e
RB
7293 group_gap_adj = vf * group_size - nunits * vec_num;
7294 }
91ff1504 7295 else
b267968e
RB
7296 {
7297 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7298 group_gap_adj
7299 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7300 }
a70d6342 7301 }
ebfd146a 7302 else
9b999e8c 7303 vec_num = group_size;
44fc7854
BE
7304
7305 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7306 }
7307 else
7308 {
7309 first_stmt = stmt;
7310 first_dr = dr;
7311 group_size = vec_num = 1;
9b999e8c 7312 group_gap_adj = 0;
44fc7854 7313 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7314 }
7315
720f5239 7316 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7317 gcc_assert (alignment_support_scheme);
272c6793
RS
7318 /* Targets with load-lane instructions must not require explicit
7319 realignment. */
2de001ee 7320 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
7321 || alignment_support_scheme == dr_aligned
7322 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7323
7324 /* In case the vectorization factor (VF) is bigger than the number
7325 of elements that we can fit in a vectype (nunits), we have to generate
7326 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7327 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7328 from one copy of the vector stmt to the next, in the field
ff802fa1 7329 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7330 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7331 stmts that use the defs of the current stmt. The example below
7332 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7333 need to create 4 vectorized stmts):
ebfd146a
IR
7334
7335 before vectorization:
7336 RELATED_STMT VEC_STMT
7337 S1: x = memref - -
7338 S2: z = x + 1 - -
7339
7340 step 1: vectorize stmt S1:
7341 We first create the vector stmt VS1_0, and, as usual, record a
7342 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7343 Next, we create the vector stmt VS1_1, and record a pointer to
7344 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7345 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7346 stmts and pointers:
7347 RELATED_STMT VEC_STMT
7348 VS1_0: vx0 = memref0 VS1_1 -
7349 VS1_1: vx1 = memref1 VS1_2 -
7350 VS1_2: vx2 = memref2 VS1_3 -
7351 VS1_3: vx3 = memref3 - -
7352 S1: x = load - VS1_0
7353 S2: z = x + 1 - -
7354
b8698a0f
L
7355 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7356 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7357 stmt S2. */
7358
0d0293ac 7359 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7360
7361 S1: x2 = &base + 2
7362 S2: x0 = &base
7363 S3: x1 = &base + 1
7364 S4: x3 = &base + 3
7365
b8698a0f 7366 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7367 starting from the access of the first stmt of the chain:
7368
7369 VS1: vx0 = &base
7370 VS2: vx1 = &base + vec_size*1
7371 VS3: vx3 = &base + vec_size*2
7372 VS4: vx4 = &base + vec_size*3
7373
7374 Then permutation statements are generated:
7375
e2c83630
RH
7376 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7377 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7378 ...
7379
7380 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7381 (the order of the data-refs in the output of vect_permute_load_chain
7382 corresponds to the order of scalar stmts in the interleaving chain - see
7383 the documentation of vect_permute_load_chain()).
7384 The generation of permutation stmts and recording them in
0d0293ac 7385 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7386
b8698a0f 7387 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7388 permutation stmts above are created for every copy. The result vector
7389 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7390 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7391
7392 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7393 on a target that supports unaligned accesses (dr_unaligned_supported)
7394 we generate the following code:
7395 p = initial_addr;
7396 indx = 0;
7397 loop {
7398 p = p + indx * vectype_size;
7399 vec_dest = *(p);
7400 indx = indx + 1;
7401 }
7402
7403 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7404 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7405 then generate the following code, in which the data in each iteration is
7406 obtained by two vector loads, one from the previous iteration, and one
7407 from the current iteration:
7408 p1 = initial_addr;
7409 msq_init = *(floor(p1))
7410 p2 = initial_addr + VS - 1;
7411 realignment_token = call target_builtin;
7412 indx = 0;
7413 loop {
7414 p2 = p2 + indx * vectype_size
7415 lsq = *(floor(p2))
7416 vec_dest = realign_load (msq, lsq, realignment_token)
7417 indx = indx + 1;
7418 msq = lsq;
7419 } */
7420
7421 /* If the misalignment remains the same throughout the execution of the
7422 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7423 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7424 This can only occur when vectorizing memory accesses in the inner-loop
7425 nested within an outer-loop that is being vectorized. */
7426
d1e4b493 7427 if (nested_in_vect_loop
832b4117 7428 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
ebfd146a
IR
7429 {
7430 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7431 compute_in_loop = true;
7432 }
7433
7434 if ((alignment_support_scheme == dr_explicit_realign_optimized
7435 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7436 && !compute_in_loop)
ebfd146a
IR
7437 {
7438 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7439 alignment_support_scheme, NULL_TREE,
7440 &at_loop);
7441 if (alignment_support_scheme == dr_explicit_realign_optimized)
7442 {
538dd0b7 7443 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7444 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7445 size_one_node);
ebfd146a
IR
7446 }
7447 }
7448 else
7449 at_loop = loop;
7450
62da9e14 7451 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7452 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7453
2de001ee 7454 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7455 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7456 else
7457 aggr_type = vectype;
7458
ebfd146a 7459 prev_stmt_info = NULL;
b267968e 7460 int group_elt = 0;
ebfd146a 7461 for (j = 0; j < ncopies; j++)
b8698a0f 7462 {
272c6793 7463 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7464 if (j == 0)
74bf76ed
JJ
7465 {
7466 bool simd_lane_access_p
7467 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7468 if (simd_lane_access_p
7469 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7470 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7471 && integer_zerop (DR_OFFSET (first_dr))
7472 && integer_zerop (DR_INIT (first_dr))
7473 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7474 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7475 && (alignment_support_scheme == dr_aligned
7476 || alignment_support_scheme == dr_unaligned_supported))
7477 {
7478 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7479 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7480 inv_p = false;
74bf76ed 7481 }
4f0a0218
RB
7482 else if (first_stmt_for_drptr
7483 && first_stmt != first_stmt_for_drptr)
7484 {
7485 dataref_ptr
7486 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7487 at_loop, offset, &dummy, gsi,
7488 &ptr_incr, simd_lane_access_p,
7489 &inv_p, byte_offset);
7490 /* Adjust the pointer by the difference to first_stmt. */
7491 data_reference_p ptrdr
7492 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7493 tree diff = fold_convert (sizetype,
7494 size_binop (MINUS_EXPR,
7495 DR_INIT (first_dr),
7496 DR_INIT (ptrdr)));
7497 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7498 stmt, diff);
7499 }
74bf76ed
JJ
7500 else
7501 dataref_ptr
7502 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7503 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7504 simd_lane_access_p, &inv_p,
7505 byte_offset);
74bf76ed
JJ
7506 }
7507 else if (dataref_offset)
7508 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7509 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7510 else
272c6793
RS
7511 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7512 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7513
0d0293ac 7514 if (grouped_load || slp_perm)
9771b263 7515 dr_chain.create (vec_num);
5ce1ee7f 7516
2de001ee 7517 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7518 {
272c6793
RS
7519 tree vec_array;
7520
7521 vec_array = create_vector_array (vectype, vec_num);
7522
7523 /* Emit:
7524 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
44fc7854 7525 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
a844293d
RS
7526 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7527 data_ref);
7528 gimple_call_set_lhs (call, vec_array);
7529 gimple_call_set_nothrow (call, true);
7530 new_stmt = call;
272c6793 7531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7532
272c6793
RS
7533 /* Extract each vector into an SSA_NAME. */
7534 for (i = 0; i < vec_num; i++)
ebfd146a 7535 {
272c6793
RS
7536 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7537 vec_array, i);
9771b263 7538 dr_chain.quick_push (new_temp);
272c6793
RS
7539 }
7540
7541 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7542 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7543 }
7544 else
7545 {
7546 for (i = 0; i < vec_num; i++)
7547 {
7548 if (i > 0)
7549 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7550 stmt, NULL_TREE);
7551
7552 /* 2. Create the vector-load in the loop. */
7553 switch (alignment_support_scheme)
7554 {
7555 case dr_aligned:
7556 case dr_unaligned_supported:
be1ac4ec 7557 {
644ffefd
MJ
7558 unsigned int align, misalign;
7559
272c6793 7560 data_ref
aed93b23
RB
7561 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7562 dataref_offset
7563 ? dataref_offset
44fc7854 7564 : build_int_cst (ref_type, 0));
f702e7d4 7565 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7566 if (alignment_support_scheme == dr_aligned)
7567 {
7568 gcc_assert (aligned_access_p (first_dr));
644ffefd 7569 misalign = 0;
272c6793
RS
7570 }
7571 else if (DR_MISALIGNMENT (first_dr) == -1)
7572 {
25f68d90 7573 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7574 misalign = 0;
272c6793
RS
7575 TREE_TYPE (data_ref)
7576 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7577 align * BITS_PER_UNIT);
272c6793
RS
7578 }
7579 else
7580 {
7581 TREE_TYPE (data_ref)
7582 = build_aligned_type (TREE_TYPE (data_ref),
7583 TYPE_ALIGN (elem_type));
644ffefd 7584 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7585 }
aed93b23
RB
7586 if (dataref_offset == NULL_TREE
7587 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7588 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7589 align, misalign);
272c6793 7590 break;
be1ac4ec 7591 }
272c6793 7592 case dr_explicit_realign:
267d3070 7593 {
272c6793 7594 tree ptr, bump;
272c6793 7595
d88981fc 7596 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7597
7598 if (compute_in_loop)
7599 msq = vect_setup_realignment (first_stmt, gsi,
7600 &realignment_token,
7601 dr_explicit_realign,
7602 dataref_ptr, NULL);
7603
aed93b23
RB
7604 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7605 ptr = copy_ssa_name (dataref_ptr);
7606 else
7607 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7608 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7609 new_stmt = gimple_build_assign
7610 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7611 build_int_cst
7612 (TREE_TYPE (dataref_ptr),
f702e7d4 7613 -(HOST_WIDE_INT) align));
272c6793
RS
7614 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7615 data_ref
7616 = build2 (MEM_REF, vectype, ptr,
44fc7854 7617 build_int_cst (ref_type, 0));
272c6793
RS
7618 vec_dest = vect_create_destination_var (scalar_dest,
7619 vectype);
7620 new_stmt = gimple_build_assign (vec_dest, data_ref);
7621 new_temp = make_ssa_name (vec_dest, new_stmt);
7622 gimple_assign_set_lhs (new_stmt, new_temp);
7623 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7624 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7626 msq = new_temp;
7627
d88981fc 7628 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7629 TYPE_SIZE_UNIT (elem_type));
d88981fc 7630 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7631 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7632 new_stmt = gimple_build_assign
7633 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7634 build_int_cst
f702e7d4 7635 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7636 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7637 gimple_assign_set_lhs (new_stmt, ptr);
7638 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7639 data_ref
7640 = build2 (MEM_REF, vectype, ptr,
44fc7854 7641 build_int_cst (ref_type, 0));
272c6793 7642 break;
267d3070 7643 }
272c6793 7644 case dr_explicit_realign_optimized:
f702e7d4
RS
7645 {
7646 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7647 new_temp = copy_ssa_name (dataref_ptr);
7648 else
7649 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7650 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7651 new_stmt = gimple_build_assign
7652 (new_temp, BIT_AND_EXPR, dataref_ptr,
7653 build_int_cst (TREE_TYPE (dataref_ptr),
7654 -(HOST_WIDE_INT) align));
7655 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7656 data_ref
7657 = build2 (MEM_REF, vectype, new_temp,
7658 build_int_cst (ref_type, 0));
7659 break;
7660 }
272c6793
RS
7661 default:
7662 gcc_unreachable ();
7663 }
ebfd146a 7664 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7665 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7666 new_temp = make_ssa_name (vec_dest, new_stmt);
7667 gimple_assign_set_lhs (new_stmt, new_temp);
7668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7669
272c6793
RS
7670 /* 3. Handle explicit realignment if necessary/supported.
7671 Create in loop:
7672 vec_dest = realign_load (msq, lsq, realignment_token) */
7673 if (alignment_support_scheme == dr_explicit_realign_optimized
7674 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7675 {
272c6793
RS
7676 lsq = gimple_assign_lhs (new_stmt);
7677 if (!realignment_token)
7678 realignment_token = dataref_ptr;
7679 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7680 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7681 msq, lsq, realignment_token);
272c6793
RS
7682 new_temp = make_ssa_name (vec_dest, new_stmt);
7683 gimple_assign_set_lhs (new_stmt, new_temp);
7684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7685
7686 if (alignment_support_scheme == dr_explicit_realign_optimized)
7687 {
7688 gcc_assert (phi);
7689 if (i == vec_num - 1 && j == ncopies - 1)
7690 add_phi_arg (phi, lsq,
7691 loop_latch_edge (containing_loop),
9e227d60 7692 UNKNOWN_LOCATION);
272c6793
RS
7693 msq = lsq;
7694 }
ebfd146a 7695 }
ebfd146a 7696
59fd17e3
RB
7697 /* 4. Handle invariant-load. */
7698 if (inv_p && !bb_vinfo)
7699 {
59fd17e3 7700 gcc_assert (!grouped_load);
d1417442
JJ
7701 /* If we have versioned for aliasing or the loop doesn't
7702 have any data dependencies that would preclude this,
7703 then we are sure this is a loop invariant load and
7704 thus we can insert it on the preheader edge. */
7705 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7706 && !nested_in_vect_loop
6b916b36 7707 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7708 {
7709 if (dump_enabled_p ())
7710 {
7711 dump_printf_loc (MSG_NOTE, vect_location,
7712 "hoisting out of the vectorized "
7713 "loop: ");
7714 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7715 }
b731b390 7716 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7717 gsi_insert_on_edge_immediate
7718 (loop_preheader_edge (loop),
7719 gimple_build_assign (tem,
7720 unshare_expr
7721 (gimple_assign_rhs1 (stmt))));
7722 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7723 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7724 set_vinfo_for_stmt (new_stmt,
7725 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7726 }
7727 else
7728 {
7729 gimple_stmt_iterator gsi2 = *gsi;
7730 gsi_next (&gsi2);
7731 new_temp = vect_init_vector (stmt, scalar_dest,
7732 vectype, &gsi2);
34cd48e5 7733 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7734 }
59fd17e3
RB
7735 }
7736
62da9e14 7737 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7738 {
aec7ae7d
JJ
7739 tree perm_mask = perm_mask_for_reverse (vectype);
7740 new_temp = permute_vec_elements (new_temp, new_temp,
7741 perm_mask, stmt, gsi);
ebfd146a
IR
7742 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7743 }
267d3070 7744
272c6793 7745 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7746 vect_transform_grouped_load (). */
7747 if (grouped_load || slp_perm)
9771b263 7748 dr_chain.quick_push (new_temp);
267d3070 7749
272c6793
RS
7750 /* Store vector loads in the corresponding SLP_NODE. */
7751 if (slp && !slp_perm)
9771b263 7752 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7753
7754 /* With SLP permutation we load the gaps as well, without
7755 we need to skip the gaps after we manage to fully load
7756 all elements. group_gap_adj is GROUP_SIZE here. */
7757 group_elt += nunits;
d9f21f6a
RS
7758 if (maybe_ne (group_gap_adj, 0U)
7759 && !slp_perm
7760 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 7761 {
d9f21f6a
RS
7762 poly_wide_int bump_val
7763 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7764 * group_gap_adj);
8e6cdc90 7765 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7766 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7767 stmt, bump);
7768 group_elt = 0;
7769 }
272c6793 7770 }
9b999e8c
RB
7771 /* Bump the vector pointer to account for a gap or for excess
7772 elements loaded for a permuted SLP load. */
d9f21f6a 7773 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 7774 {
d9f21f6a
RS
7775 poly_wide_int bump_val
7776 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7777 * group_gap_adj);
8e6cdc90 7778 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7779 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7780 stmt, bump);
7781 }
ebfd146a
IR
7782 }
7783
7784 if (slp && !slp_perm)
7785 continue;
7786
7787 if (slp_perm)
7788 {
29afecdf 7789 unsigned n_perms;
01d8bf07 7790 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7791 slp_node_instance, false,
7792 &n_perms))
ebfd146a 7793 {
9771b263 7794 dr_chain.release ();
ebfd146a
IR
7795 return false;
7796 }
7797 }
7798 else
7799 {
0d0293ac 7800 if (grouped_load)
ebfd146a 7801 {
2de001ee 7802 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7803 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7804 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7805 }
7806 else
7807 {
7808 if (j == 0)
7809 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7810 else
7811 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7812 prev_stmt_info = vinfo_for_stmt (new_stmt);
7813 }
7814 }
9771b263 7815 dr_chain.release ();
ebfd146a
IR
7816 }
7817
ebfd146a
IR
7818 return true;
7819}
7820
7821/* Function vect_is_simple_cond.
b8698a0f 7822
ebfd146a
IR
7823 Input:
7824 LOOP - the loop that is being vectorized.
7825 COND - Condition that is checked for simple use.
7826
e9e1d143
RG
7827 Output:
7828 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7829 *DTS - The def types for the arguments of the comparison
e9e1d143 7830
ebfd146a
IR
7831 Returns whether a COND can be vectorized. Checks whether
7832 condition operands are supportable using vec_is_simple_use. */
7833
87aab9b2 7834static bool
4fc5ebf1 7835vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7836 tree *comp_vectype, enum vect_def_type *dts,
7837 tree vectype)
ebfd146a
IR
7838{
7839 tree lhs, rhs;
e9e1d143 7840 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7841
a414c77f
IE
7842 /* Mask case. */
7843 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7844 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7845 {
7846 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7847 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 7848 &dts[0], comp_vectype)
a414c77f
IE
7849 || !*comp_vectype
7850 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7851 return false;
7852 return true;
7853 }
7854
ebfd146a
IR
7855 if (!COMPARISON_CLASS_P (cond))
7856 return false;
7857
7858 lhs = TREE_OPERAND (cond, 0);
7859 rhs = TREE_OPERAND (cond, 1);
7860
7861 if (TREE_CODE (lhs) == SSA_NAME)
7862 {
355fe088 7863 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 7864 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
7865 return false;
7866 }
4fc5ebf1
JG
7867 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7868 || TREE_CODE (lhs) == FIXED_CST)
7869 dts[0] = vect_constant_def;
7870 else
ebfd146a
IR
7871 return false;
7872
7873 if (TREE_CODE (rhs) == SSA_NAME)
7874 {
355fe088 7875 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 7876 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
7877 return false;
7878 }
4fc5ebf1
JG
7879 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7880 || TREE_CODE (rhs) == FIXED_CST)
7881 dts[1] = vect_constant_def;
7882 else
ebfd146a
IR
7883 return false;
7884
28b33016
IE
7885 if (vectype1 && vectype2
7886 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7887 return false;
7888
e9e1d143 7889 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
7890 /* Invariant comparison. */
7891 if (! *comp_vectype)
7892 {
7893 tree scalar_type = TREE_TYPE (lhs);
7894 /* If we can widen the comparison to match vectype do so. */
7895 if (INTEGRAL_TYPE_P (scalar_type)
7896 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7897 TYPE_SIZE (TREE_TYPE (vectype))))
7898 scalar_type = build_nonstandard_integer_type
7899 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7900 TYPE_UNSIGNED (scalar_type));
7901 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7902 }
7903
ebfd146a
IR
7904 return true;
7905}
7906
7907/* vectorizable_condition.
7908
b8698a0f
L
7909 Check if STMT is conditional modify expression that can be vectorized.
7910 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7911 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7912 at GSI.
7913
7914 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7915 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7916 else clause if it is 2).
ebfd146a
IR
7917
7918 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7919
4bbe8262 7920bool
355fe088
TS
7921vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7922 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7923 slp_tree slp_node)
ebfd146a
IR
7924{
7925 tree scalar_dest = NULL_TREE;
7926 tree vec_dest = NULL_TREE;
01216d27
JJ
7927 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7928 tree then_clause, else_clause;
ebfd146a 7929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7930 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7931 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7932 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7933 tree vec_compare;
ebfd146a
IR
7934 tree new_temp;
7935 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
7936 enum vect_def_type dts[4]
7937 = {vect_unknown_def_type, vect_unknown_def_type,
7938 vect_unknown_def_type, vect_unknown_def_type};
7939 int ndts = 4;
f7e531cf 7940 int ncopies;
01216d27 7941 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 7942 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7943 int i, j;
7944 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7945 vec<tree> vec_oprnds0 = vNULL;
7946 vec<tree> vec_oprnds1 = vNULL;
7947 vec<tree> vec_oprnds2 = vNULL;
7948 vec<tree> vec_oprnds3 = vNULL;
74946978 7949 tree vec_cmp_type;
a414c77f 7950 bool masked = false;
b8698a0f 7951
f7e531cf
IR
7952 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7953 return false;
7954
af29617a
AH
7955 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7956 {
7957 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7958 return false;
ebfd146a 7959
af29617a
AH
7960 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7961 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7962 && reduc_def))
7963 return false;
ebfd146a 7964
af29617a
AH
7965 /* FORNOW: not yet supported. */
7966 if (STMT_VINFO_LIVE_P (stmt_info))
7967 {
7968 if (dump_enabled_p ())
7969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7970 "value used after loop.\n");
7971 return false;
7972 }
ebfd146a
IR
7973 }
7974
7975 /* Is vectorizable conditional operation? */
7976 if (!is_gimple_assign (stmt))
7977 return false;
7978
7979 code = gimple_assign_rhs_code (stmt);
7980
7981 if (code != COND_EXPR)
7982 return false;
7983
465c8c19 7984 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 7985 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 7986
fce57248 7987 if (slp_node)
465c8c19
JJ
7988 ncopies = 1;
7989 else
e8f142e2 7990 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
7991
7992 gcc_assert (ncopies >= 1);
7993 if (reduc_index && ncopies > 1)
7994 return false; /* FORNOW */
7995
4e71066d
RG
7996 cond_expr = gimple_assign_rhs1 (stmt);
7997 then_clause = gimple_assign_rhs2 (stmt);
7998 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7999
4fc5ebf1 8000 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8001 &comp_vectype, &dts[0], vectype)
e9e1d143 8002 || !comp_vectype)
ebfd146a
IR
8003 return false;
8004
81c40241 8005 gimple *def_stmt;
4fc5ebf1 8006 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8007 &vectype1))
8008 return false;
4fc5ebf1 8009 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8010 &vectype2))
ebfd146a 8011 return false;
2947d3b2
IE
8012
8013 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8014 return false;
8015
8016 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8017 return false;
8018
28b33016
IE
8019 masked = !COMPARISON_CLASS_P (cond_expr);
8020 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8021
74946978
MP
8022 if (vec_cmp_type == NULL_TREE)
8023 return false;
784fb9b3 8024
01216d27
JJ
8025 cond_code = TREE_CODE (cond_expr);
8026 if (!masked)
8027 {
8028 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8029 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8030 }
8031
8032 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8033 {
8034 /* Boolean values may have another representation in vectors
8035 and therefore we prefer bit operations over comparison for
8036 them (which also works for scalar masks). We store opcodes
8037 to use in bitop1 and bitop2. Statement is vectorized as
8038 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8039 depending on bitop1 and bitop2 arity. */
8040 switch (cond_code)
8041 {
8042 case GT_EXPR:
8043 bitop1 = BIT_NOT_EXPR;
8044 bitop2 = BIT_AND_EXPR;
8045 break;
8046 case GE_EXPR:
8047 bitop1 = BIT_NOT_EXPR;
8048 bitop2 = BIT_IOR_EXPR;
8049 break;
8050 case LT_EXPR:
8051 bitop1 = BIT_NOT_EXPR;
8052 bitop2 = BIT_AND_EXPR;
8053 std::swap (cond_expr0, cond_expr1);
8054 break;
8055 case LE_EXPR:
8056 bitop1 = BIT_NOT_EXPR;
8057 bitop2 = BIT_IOR_EXPR;
8058 std::swap (cond_expr0, cond_expr1);
8059 break;
8060 case NE_EXPR:
8061 bitop1 = BIT_XOR_EXPR;
8062 break;
8063 case EQ_EXPR:
8064 bitop1 = BIT_XOR_EXPR;
8065 bitop2 = BIT_NOT_EXPR;
8066 break;
8067 default:
8068 return false;
8069 }
8070 cond_code = SSA_NAME;
8071 }
8072
b8698a0f 8073 if (!vec_stmt)
ebfd146a
IR
8074 {
8075 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8076 if (bitop1 != NOP_EXPR)
8077 {
8078 machine_mode mode = TYPE_MODE (comp_vectype);
8079 optab optab;
8080
8081 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8082 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8083 return false;
8084
8085 if (bitop2 != NOP_EXPR)
8086 {
8087 optab = optab_for_tree_code (bitop2, comp_vectype,
8088 optab_default);
8089 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8090 return false;
8091 }
8092 }
4fc5ebf1
JG
8093 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8094 cond_code))
8095 {
8096 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8097 return true;
8098 }
8099 return false;
ebfd146a
IR
8100 }
8101
f7e531cf
IR
8102 /* Transform. */
8103
8104 if (!slp_node)
8105 {
9771b263
DN
8106 vec_oprnds0.create (1);
8107 vec_oprnds1.create (1);
8108 vec_oprnds2.create (1);
8109 vec_oprnds3.create (1);
f7e531cf 8110 }
ebfd146a
IR
8111
8112 /* Handle def. */
8113 scalar_dest = gimple_assign_lhs (stmt);
8114 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8115
8116 /* Handle cond expr. */
a855b1b1
MM
8117 for (j = 0; j < ncopies; j++)
8118 {
538dd0b7 8119 gassign *new_stmt = NULL;
a855b1b1
MM
8120 if (j == 0)
8121 {
f7e531cf
IR
8122 if (slp_node)
8123 {
00f96dc9
TS
8124 auto_vec<tree, 4> ops;
8125 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8126
a414c77f 8127 if (masked)
01216d27 8128 ops.safe_push (cond_expr);
a414c77f
IE
8129 else
8130 {
01216d27
JJ
8131 ops.safe_push (cond_expr0);
8132 ops.safe_push (cond_expr1);
a414c77f 8133 }
9771b263
DN
8134 ops.safe_push (then_clause);
8135 ops.safe_push (else_clause);
306b0c92 8136 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8137 vec_oprnds3 = vec_defs.pop ();
8138 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8139 if (!masked)
8140 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8141 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8142 }
8143 else
8144 {
355fe088 8145 gimple *gtemp;
a414c77f
IE
8146 if (masked)
8147 {
8148 vec_cond_lhs
8149 = vect_get_vec_def_for_operand (cond_expr, stmt,
8150 comp_vectype);
8151 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8152 &gtemp, &dts[0]);
8153 }
8154 else
8155 {
01216d27
JJ
8156 vec_cond_lhs
8157 = vect_get_vec_def_for_operand (cond_expr0,
8158 stmt, comp_vectype);
8159 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8160
8161 vec_cond_rhs
8162 = vect_get_vec_def_for_operand (cond_expr1,
8163 stmt, comp_vectype);
8164 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8165 }
f7e531cf
IR
8166 if (reduc_index == 1)
8167 vec_then_clause = reduc_def;
8168 else
8169 {
8170 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8171 stmt);
8172 vect_is_simple_use (then_clause, loop_vinfo,
8173 &gtemp, &dts[2]);
f7e531cf
IR
8174 }
8175 if (reduc_index == 2)
8176 vec_else_clause = reduc_def;
8177 else
8178 {
8179 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8180 stmt);
8181 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8182 }
a855b1b1
MM
8183 }
8184 }
8185 else
8186 {
a414c77f
IE
8187 vec_cond_lhs
8188 = vect_get_vec_def_for_stmt_copy (dts[0],
8189 vec_oprnds0.pop ());
8190 if (!masked)
8191 vec_cond_rhs
8192 = vect_get_vec_def_for_stmt_copy (dts[1],
8193 vec_oprnds1.pop ());
8194
a855b1b1 8195 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8196 vec_oprnds2.pop ());
a855b1b1 8197 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8198 vec_oprnds3.pop ());
f7e531cf
IR
8199 }
8200
8201 if (!slp_node)
8202 {
9771b263 8203 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8204 if (!masked)
8205 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8206 vec_oprnds2.quick_push (vec_then_clause);
8207 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8208 }
8209
9dc3f7de 8210 /* Arguments are ready. Create the new vector stmt. */
9771b263 8211 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8212 {
9771b263
DN
8213 vec_then_clause = vec_oprnds2[i];
8214 vec_else_clause = vec_oprnds3[i];
a855b1b1 8215
a414c77f
IE
8216 if (masked)
8217 vec_compare = vec_cond_lhs;
8218 else
8219 {
8220 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8221 if (bitop1 == NOP_EXPR)
8222 vec_compare = build2 (cond_code, vec_cmp_type,
8223 vec_cond_lhs, vec_cond_rhs);
8224 else
8225 {
8226 new_temp = make_ssa_name (vec_cmp_type);
8227 if (bitop1 == BIT_NOT_EXPR)
8228 new_stmt = gimple_build_assign (new_temp, bitop1,
8229 vec_cond_rhs);
8230 else
8231 new_stmt
8232 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8233 vec_cond_rhs);
8234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8235 if (bitop2 == NOP_EXPR)
8236 vec_compare = new_temp;
8237 else if (bitop2 == BIT_NOT_EXPR)
8238 {
8239 /* Instead of doing ~x ? y : z do x ? z : y. */
8240 vec_compare = new_temp;
8241 std::swap (vec_then_clause, vec_else_clause);
8242 }
8243 else
8244 {
8245 vec_compare = make_ssa_name (vec_cmp_type);
8246 new_stmt
8247 = gimple_build_assign (vec_compare, bitop2,
8248 vec_cond_lhs, new_temp);
8249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8250 }
8251 }
a414c77f 8252 }
5958f9e2
JJ
8253 new_temp = make_ssa_name (vec_dest);
8254 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8255 vec_compare, vec_then_clause,
8256 vec_else_clause);
f7e531cf
IR
8257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8258 if (slp_node)
9771b263 8259 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8260 }
8261
8262 if (slp_node)
8263 continue;
8264
8265 if (j == 0)
8266 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8267 else
8268 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8269
8270 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8271 }
b8698a0f 8272
9771b263
DN
8273 vec_oprnds0.release ();
8274 vec_oprnds1.release ();
8275 vec_oprnds2.release ();
8276 vec_oprnds3.release ();
f7e531cf 8277
ebfd146a
IR
8278 return true;
8279}
8280
42fd8198
IE
8281/* vectorizable_comparison.
8282
8283 Check if STMT is comparison expression that can be vectorized.
8284 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8285 comparison, put it in VEC_STMT, and insert it at GSI.
8286
8287 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8288
fce57248 8289static bool
42fd8198
IE
8290vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8291 gimple **vec_stmt, tree reduc_def,
8292 slp_tree slp_node)
8293{
8294 tree lhs, rhs1, rhs2;
8295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8296 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8297 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8298 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8299 tree new_temp;
8300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8301 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8302 int ndts = 2;
42fd8198
IE
8303 unsigned nunits;
8304 int ncopies;
49e76ff1 8305 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8306 stmt_vec_info prev_stmt_info = NULL;
8307 int i, j;
8308 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8309 vec<tree> vec_oprnds0 = vNULL;
8310 vec<tree> vec_oprnds1 = vNULL;
8311 gimple *def_stmt;
8312 tree mask_type;
8313 tree mask;
8314
c245362b
IE
8315 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8316 return false;
8317
30480bcd 8318 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8319 return false;
8320
8321 mask_type = vectype;
8322 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8323
fce57248 8324 if (slp_node)
42fd8198
IE
8325 ncopies = 1;
8326 else
e8f142e2 8327 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8328
8329 gcc_assert (ncopies >= 1);
42fd8198
IE
8330 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8331 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8332 && reduc_def))
8333 return false;
8334
8335 if (STMT_VINFO_LIVE_P (stmt_info))
8336 {
8337 if (dump_enabled_p ())
8338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8339 "value used after loop.\n");
8340 return false;
8341 }
8342
8343 if (!is_gimple_assign (stmt))
8344 return false;
8345
8346 code = gimple_assign_rhs_code (stmt);
8347
8348 if (TREE_CODE_CLASS (code) != tcc_comparison)
8349 return false;
8350
8351 rhs1 = gimple_assign_rhs1 (stmt);
8352 rhs2 = gimple_assign_rhs2 (stmt);
8353
8354 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8355 &dts[0], &vectype1))
8356 return false;
8357
8358 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8359 &dts[1], &vectype2))
8360 return false;
8361
8362 if (vectype1 && vectype2
8363 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8364 return false;
8365
8366 vectype = vectype1 ? vectype1 : vectype2;
8367
8368 /* Invariant comparison. */
8369 if (!vectype)
8370 {
69a9a66f
RB
8371 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8372 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
42fd8198
IE
8373 return false;
8374 }
8375 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8376 return false;
8377
49e76ff1
IE
8378 /* Can't compare mask and non-mask types. */
8379 if (vectype1 && vectype2
8380 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8381 return false;
8382
8383 /* Boolean values may have another representation in vectors
8384 and therefore we prefer bit operations over comparison for
8385 them (which also works for scalar masks). We store opcodes
8386 to use in bitop1 and bitop2. Statement is vectorized as
8387 BITOP2 (rhs1 BITOP1 rhs2) or
8388 rhs1 BITOP2 (BITOP1 rhs2)
8389 depending on bitop1 and bitop2 arity. */
8390 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8391 {
8392 if (code == GT_EXPR)
8393 {
8394 bitop1 = BIT_NOT_EXPR;
8395 bitop2 = BIT_AND_EXPR;
8396 }
8397 else if (code == GE_EXPR)
8398 {
8399 bitop1 = BIT_NOT_EXPR;
8400 bitop2 = BIT_IOR_EXPR;
8401 }
8402 else if (code == LT_EXPR)
8403 {
8404 bitop1 = BIT_NOT_EXPR;
8405 bitop2 = BIT_AND_EXPR;
8406 std::swap (rhs1, rhs2);
264d951a 8407 std::swap (dts[0], dts[1]);
49e76ff1
IE
8408 }
8409 else if (code == LE_EXPR)
8410 {
8411 bitop1 = BIT_NOT_EXPR;
8412 bitop2 = BIT_IOR_EXPR;
8413 std::swap (rhs1, rhs2);
264d951a 8414 std::swap (dts[0], dts[1]);
49e76ff1
IE
8415 }
8416 else
8417 {
8418 bitop1 = BIT_XOR_EXPR;
8419 if (code == EQ_EXPR)
8420 bitop2 = BIT_NOT_EXPR;
8421 }
8422 }
8423
42fd8198
IE
8424 if (!vec_stmt)
8425 {
8426 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8427 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8428 dts, ndts, NULL, NULL);
49e76ff1 8429 if (bitop1 == NOP_EXPR)
96592eed 8430 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8431 else
8432 {
8433 machine_mode mode = TYPE_MODE (vectype);
8434 optab optab;
8435
8436 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8437 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8438 return false;
8439
8440 if (bitop2 != NOP_EXPR)
8441 {
8442 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8443 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8444 return false;
8445 }
8446 return true;
8447 }
42fd8198
IE
8448 }
8449
8450 /* Transform. */
8451 if (!slp_node)
8452 {
8453 vec_oprnds0.create (1);
8454 vec_oprnds1.create (1);
8455 }
8456
8457 /* Handle def. */
8458 lhs = gimple_assign_lhs (stmt);
8459 mask = vect_create_destination_var (lhs, mask_type);
8460
8461 /* Handle cmp expr. */
8462 for (j = 0; j < ncopies; j++)
8463 {
8464 gassign *new_stmt = NULL;
8465 if (j == 0)
8466 {
8467 if (slp_node)
8468 {
8469 auto_vec<tree, 2> ops;
8470 auto_vec<vec<tree>, 2> vec_defs;
8471
8472 ops.safe_push (rhs1);
8473 ops.safe_push (rhs2);
306b0c92 8474 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8475 vec_oprnds1 = vec_defs.pop ();
8476 vec_oprnds0 = vec_defs.pop ();
8477 }
8478 else
8479 {
e4af0bc4
IE
8480 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8481 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8482 }
8483 }
8484 else
8485 {
8486 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8487 vec_oprnds0.pop ());
8488 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8489 vec_oprnds1.pop ());
8490 }
8491
8492 if (!slp_node)
8493 {
8494 vec_oprnds0.quick_push (vec_rhs1);
8495 vec_oprnds1.quick_push (vec_rhs2);
8496 }
8497
8498 /* Arguments are ready. Create the new vector stmt. */
8499 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8500 {
8501 vec_rhs2 = vec_oprnds1[i];
8502
8503 new_temp = make_ssa_name (mask);
49e76ff1
IE
8504 if (bitop1 == NOP_EXPR)
8505 {
8506 new_stmt = gimple_build_assign (new_temp, code,
8507 vec_rhs1, vec_rhs2);
8508 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8509 }
8510 else
8511 {
8512 if (bitop1 == BIT_NOT_EXPR)
8513 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8514 else
8515 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8516 vec_rhs2);
8517 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8518 if (bitop2 != NOP_EXPR)
8519 {
8520 tree res = make_ssa_name (mask);
8521 if (bitop2 == BIT_NOT_EXPR)
8522 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8523 else
8524 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8525 new_temp);
8526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8527 }
8528 }
42fd8198
IE
8529 if (slp_node)
8530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8531 }
8532
8533 if (slp_node)
8534 continue;
8535
8536 if (j == 0)
8537 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8538 else
8539 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8540
8541 prev_stmt_info = vinfo_for_stmt (new_stmt);
8542 }
8543
8544 vec_oprnds0.release ();
8545 vec_oprnds1.release ();
8546
8547 return true;
8548}
ebfd146a 8549
68a0f2ff
RS
8550/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8551 can handle all live statements in the node. Otherwise return true
8552 if STMT is not live or if vectorizable_live_operation can handle it.
8553 GSI and VEC_STMT are as for vectorizable_live_operation. */
8554
8555static bool
8556can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8557 slp_tree slp_node, gimple **vec_stmt)
8558{
8559 if (slp_node)
8560 {
8561 gimple *slp_stmt;
8562 unsigned int i;
8563 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8564 {
8565 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8566 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8567 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8568 vec_stmt))
8569 return false;
8570 }
8571 }
8572 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8573 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8574 return false;
8575
8576 return true;
8577}
8578
8644a673 8579/* Make sure the statement is vectorizable. */
ebfd146a
IR
8580
8581bool
891ad31c
RB
8582vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8583 slp_instance node_instance)
ebfd146a 8584{
8644a673 8585 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8586 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8587 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8588 bool ok;
355fe088 8589 gimple *pattern_stmt;
363477c0 8590 gimple_seq pattern_def_seq;
ebfd146a 8591
73fbfcad 8592 if (dump_enabled_p ())
ebfd146a 8593 {
78c60e3d
SS
8594 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8595 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8596 }
ebfd146a 8597
1825a1f3 8598 if (gimple_has_volatile_ops (stmt))
b8698a0f 8599 {
73fbfcad 8600 if (dump_enabled_p ())
78c60e3d 8601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8602 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8603
8604 return false;
8605 }
b8698a0f
L
8606
8607 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8608 to include:
8609 - the COND_EXPR which is the loop exit condition
8610 - any LABEL_EXPRs in the loop
b8698a0f 8611 - computations that are used only for array indexing or loop control.
8644a673 8612 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8613 instance, therefore, all the statements are relevant.
ebfd146a 8614
d092494c 8615 Pattern statement needs to be analyzed instead of the original statement
83197f37 8616 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8617 statements. In basic blocks we are called from some SLP instance
8618 traversal, don't analyze pattern stmts instead, the pattern stmts
8619 already will be part of SLP instance. */
83197f37
IR
8620
8621 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8622 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8623 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8624 {
9d5e7640 8625 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8626 && pattern_stmt
9d5e7640
IR
8627 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8628 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8629 {
83197f37 8630 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8631 stmt = pattern_stmt;
8632 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8633 if (dump_enabled_p ())
9d5e7640 8634 {
78c60e3d
SS
8635 dump_printf_loc (MSG_NOTE, vect_location,
8636 "==> examining pattern statement: ");
8637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8638 }
8639 }
8640 else
8641 {
73fbfcad 8642 if (dump_enabled_p ())
e645e942 8643 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8644
9d5e7640
IR
8645 return true;
8646 }
8644a673 8647 }
83197f37 8648 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8649 && node == NULL
83197f37
IR
8650 && pattern_stmt
8651 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8652 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8653 {
8654 /* Analyze PATTERN_STMT too. */
73fbfcad 8655 if (dump_enabled_p ())
83197f37 8656 {
78c60e3d
SS
8657 dump_printf_loc (MSG_NOTE, vect_location,
8658 "==> examining pattern statement: ");
8659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8660 }
8661
891ad31c
RB
8662 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8663 node_instance))
83197f37
IR
8664 return false;
8665 }
ebfd146a 8666
1107f3ae 8667 if (is_pattern_stmt_p (stmt_info)
079c527f 8668 && node == NULL
363477c0 8669 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8670 {
363477c0 8671 gimple_stmt_iterator si;
1107f3ae 8672
363477c0
JJ
8673 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8674 {
355fe088 8675 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8676 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8677 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8678 {
8679 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8680 if (dump_enabled_p ())
363477c0 8681 {
78c60e3d
SS
8682 dump_printf_loc (MSG_NOTE, vect_location,
8683 "==> examining pattern def statement: ");
8684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8685 }
1107f3ae 8686
363477c0 8687 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8688 need_to_vectorize, node, node_instance))
363477c0
JJ
8689 return false;
8690 }
8691 }
8692 }
1107f3ae 8693
8644a673
IR
8694 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8695 {
8696 case vect_internal_def:
8697 break;
ebfd146a 8698
8644a673 8699 case vect_reduction_def:
7c5222ff 8700 case vect_nested_cycle:
14a61437
RB
8701 gcc_assert (!bb_vinfo
8702 && (relevance == vect_used_in_outer
8703 || relevance == vect_used_in_outer_by_reduction
8704 || relevance == vect_used_by_reduction
b28ead45
AH
8705 || relevance == vect_unused_in_scope
8706 || relevance == vect_used_only_live));
8644a673
IR
8707 break;
8708
8709 case vect_induction_def:
e7baeb39
RB
8710 gcc_assert (!bb_vinfo);
8711 break;
8712
8644a673
IR
8713 case vect_constant_def:
8714 case vect_external_def:
8715 case vect_unknown_def_type:
8716 default:
8717 gcc_unreachable ();
8718 }
ebfd146a 8719
8644a673 8720 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8721 {
8644a673 8722 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8723 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8724 || (is_gimple_call (stmt)
8725 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8726 *need_to_vectorize = true;
ebfd146a
IR
8727 }
8728
b1af7da6
RB
8729 if (PURE_SLP_STMT (stmt_info) && !node)
8730 {
8731 dump_printf_loc (MSG_NOTE, vect_location,
8732 "handled only by SLP analysis\n");
8733 return true;
8734 }
8735
8736 ok = true;
8737 if (!bb_vinfo
8738 && (STMT_VINFO_RELEVANT_P (stmt_info)
8739 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8740 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8741 || vectorizable_conversion (stmt, NULL, NULL, node)
8742 || vectorizable_shift (stmt, NULL, NULL, node)
8743 || vectorizable_operation (stmt, NULL, NULL, node)
8744 || vectorizable_assignment (stmt, NULL, NULL, node)
8745 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8746 || vectorizable_call (stmt, NULL, NULL, node)
8747 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8748 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8749 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8750 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8751 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8752 else
8753 {
8754 if (bb_vinfo)
8755 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8756 || vectorizable_conversion (stmt, NULL, NULL, node)
8757 || vectorizable_shift (stmt, NULL, NULL, node)
8758 || vectorizable_operation (stmt, NULL, NULL, node)
8759 || vectorizable_assignment (stmt, NULL, NULL, node)
8760 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8761 || vectorizable_call (stmt, NULL, NULL, node)
8762 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8763 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8764 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8765 }
8644a673
IR
8766
8767 if (!ok)
ebfd146a 8768 {
73fbfcad 8769 if (dump_enabled_p ())
8644a673 8770 {
78c60e3d
SS
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8772 "not vectorized: relevant stmt not ");
8773 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8775 }
b8698a0f 8776
ebfd146a
IR
8777 return false;
8778 }
8779
a70d6342
IR
8780 if (bb_vinfo)
8781 return true;
8782
8644a673
IR
8783 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8784 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8785 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8786 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8787 {
73fbfcad 8788 if (dump_enabled_p ())
8644a673 8789 {
78c60e3d 8790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8791 "not vectorized: live stmt not supported: ");
78c60e3d 8792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8793 }
b8698a0f 8794
8644a673 8795 return false;
ebfd146a
IR
8796 }
8797
ebfd146a
IR
8798 return true;
8799}
8800
8801
8802/* Function vect_transform_stmt.
8803
8804 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8805
8806bool
355fe088 8807vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8808 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8809 slp_instance slp_node_instance)
8810{
8811 bool is_store = false;
355fe088 8812 gimple *vec_stmt = NULL;
ebfd146a 8813 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8814 bool done;
ebfd146a 8815
fce57248 8816 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8817 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8818
ebfd146a
IR
8819 switch (STMT_VINFO_TYPE (stmt_info))
8820 {
8821 case type_demotion_vec_info_type:
ebfd146a 8822 case type_promotion_vec_info_type:
ebfd146a
IR
8823 case type_conversion_vec_info_type:
8824 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8825 gcc_assert (done);
8826 break;
8827
8828 case induc_vec_info_type:
e7baeb39 8829 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8830 gcc_assert (done);
8831 break;
8832
9dc3f7de
IR
8833 case shift_vec_info_type:
8834 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8835 gcc_assert (done);
8836 break;
8837
ebfd146a
IR
8838 case op_vec_info_type:
8839 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8840 gcc_assert (done);
8841 break;
8842
8843 case assignment_vec_info_type:
8844 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8845 gcc_assert (done);
8846 break;
8847
8848 case load_vec_info_type:
b8698a0f 8849 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8850 slp_node_instance);
8851 gcc_assert (done);
8852 break;
8853
8854 case store_vec_info_type:
8855 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8856 gcc_assert (done);
0d0293ac 8857 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8858 {
8859 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8860 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8861 one are skipped, and there vec_stmt_info shouldn't be freed
8862 meanwhile. */
0d0293ac 8863 *grouped_store = true;
ebfd146a
IR
8864 if (STMT_VINFO_VEC_STMT (stmt_info))
8865 is_store = true;
8866 }
8867 else
8868 is_store = true;
8869 break;
8870
8871 case condition_vec_info_type:
f7e531cf 8872 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8873 gcc_assert (done);
8874 break;
8875
42fd8198
IE
8876 case comparison_vec_info_type:
8877 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8878 gcc_assert (done);
8879 break;
8880
ebfd146a 8881 case call_vec_info_type:
190c2236 8882 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8883 stmt = gsi_stmt (*gsi);
8e4284d0 8884 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
5ce9450f 8885 is_store = true;
ebfd146a
IR
8886 break;
8887
0136f8f0
AH
8888 case call_simd_clone_vec_info_type:
8889 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8890 stmt = gsi_stmt (*gsi);
8891 break;
8892
ebfd146a 8893 case reduc_vec_info_type:
891ad31c
RB
8894 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8895 slp_node_instance);
ebfd146a
IR
8896 gcc_assert (done);
8897 break;
8898
8899 default:
8900 if (!STMT_VINFO_LIVE_P (stmt_info))
8901 {
73fbfcad 8902 if (dump_enabled_p ())
78c60e3d 8903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8904 "stmt not supported.\n");
ebfd146a
IR
8905 gcc_unreachable ();
8906 }
8907 }
8908
225ce44b
RB
8909 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8910 This would break hybrid SLP vectorization. */
8911 if (slp_node)
d90f8440
RB
8912 gcc_assert (!vec_stmt
8913 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8914
ebfd146a
IR
8915 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8916 is being vectorized, but outside the immediately enclosing loop. */
8917 if (vec_stmt
a70d6342
IR
8918 && STMT_VINFO_LOOP_VINFO (stmt_info)
8919 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8920 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8921 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8922 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8923 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8924 vect_used_in_outer_by_reduction))
ebfd146a 8925 {
a70d6342
IR
8926 struct loop *innerloop = LOOP_VINFO_LOOP (
8927 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8928 imm_use_iterator imm_iter;
8929 use_operand_p use_p;
8930 tree scalar_dest;
355fe088 8931 gimple *exit_phi;
ebfd146a 8932
73fbfcad 8933 if (dump_enabled_p ())
78c60e3d 8934 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8935 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8936
8937 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8938 (to be used when vectorizing outer-loop stmts that use the DEF of
8939 STMT). */
8940 if (gimple_code (stmt) == GIMPLE_PHI)
8941 scalar_dest = PHI_RESULT (stmt);
8942 else
8943 scalar_dest = gimple_assign_lhs (stmt);
8944
8945 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8946 {
8947 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8948 {
8949 exit_phi = USE_STMT (use_p);
8950 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8951 }
8952 }
8953 }
8954
8955 /* Handle stmts whose DEF is used outside the loop-nest that is
8956 being vectorized. */
68a0f2ff 8957 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 8958 {
68a0f2ff 8959 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
8960 gcc_assert (done);
8961 }
8962
8963 if (vec_stmt)
83197f37 8964 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8965
b8698a0f 8966 return is_store;
ebfd146a
IR
8967}
8968
8969
b8698a0f 8970/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8971 stmt_vec_info. */
8972
8973void
355fe088 8974vect_remove_stores (gimple *first_stmt)
ebfd146a 8975{
355fe088
TS
8976 gimple *next = first_stmt;
8977 gimple *tmp;
ebfd146a
IR
8978 gimple_stmt_iterator next_si;
8979
8980 while (next)
8981 {
78048b1c
JJ
8982 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8983
8984 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8985 if (is_pattern_stmt_p (stmt_info))
8986 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8987 /* Free the attached stmt_vec_info and remove the stmt. */
8988 next_si = gsi_for_stmt (next);
3d3f2249 8989 unlink_stmt_vdef (next);
ebfd146a 8990 gsi_remove (&next_si, true);
3d3f2249 8991 release_defs (next);
ebfd146a
IR
8992 free_stmt_vec_info (next);
8993 next = tmp;
8994 }
8995}
8996
8997
8998/* Function new_stmt_vec_info.
8999
9000 Create and initialize a new stmt_vec_info struct for STMT. */
9001
9002stmt_vec_info
310213d4 9003new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9004{
9005 stmt_vec_info res;
9006 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9007
9008 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9009 STMT_VINFO_STMT (res) = stmt;
310213d4 9010 res->vinfo = vinfo;
8644a673 9011 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9012 STMT_VINFO_LIVE_P (res) = false;
9013 STMT_VINFO_VECTYPE (res) = NULL;
9014 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9015 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9016 STMT_VINFO_IN_PATTERN_P (res) = false;
9017 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9018 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9019 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9020 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9021 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9022
ebfd146a
IR
9023 if (gimple_code (stmt) == GIMPLE_PHI
9024 && is_loop_header_bb_p (gimple_bb (stmt)))
9025 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9026 else
8644a673
IR
9027 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9028
9771b263 9029 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9030 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9031 STMT_VINFO_NUM_SLP_USES (res) = 0;
9032
e14c1050
IR
9033 GROUP_FIRST_ELEMENT (res) = NULL;
9034 GROUP_NEXT_ELEMENT (res) = NULL;
9035 GROUP_SIZE (res) = 0;
9036 GROUP_STORE_COUNT (res) = 0;
9037 GROUP_GAP (res) = 0;
9038 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9039
9040 return res;
9041}
9042
9043
9044/* Create a hash table for stmt_vec_info. */
9045
9046void
9047init_stmt_vec_info_vec (void)
9048{
9771b263
DN
9049 gcc_assert (!stmt_vec_info_vec.exists ());
9050 stmt_vec_info_vec.create (50);
ebfd146a
IR
9051}
9052
9053
9054/* Free hash table for stmt_vec_info. */
9055
9056void
9057free_stmt_vec_info_vec (void)
9058{
93675444 9059 unsigned int i;
3161455c 9060 stmt_vec_info info;
93675444
JJ
9061 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9062 if (info != NULL)
3161455c 9063 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9064 gcc_assert (stmt_vec_info_vec.exists ());
9065 stmt_vec_info_vec.release ();
ebfd146a
IR
9066}
9067
9068
9069/* Free stmt vectorization related info. */
9070
9071void
355fe088 9072free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9073{
9074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9075
9076 if (!stmt_info)
9077 return;
9078
78048b1c
JJ
9079 /* Check if this statement has a related "pattern stmt"
9080 (introduced by the vectorizer during the pattern recognition
9081 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9082 too. */
9083 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9084 {
9085 stmt_vec_info patt_info
9086 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9087 if (patt_info)
9088 {
363477c0 9089 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9090 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9091 gimple_set_bb (patt_stmt, NULL);
9092 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9093 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9094 release_ssa_name (lhs);
363477c0
JJ
9095 if (seq)
9096 {
9097 gimple_stmt_iterator si;
9098 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9099 {
355fe088 9100 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9101 gimple_set_bb (seq_stmt, NULL);
7532abf2 9102 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9103 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9104 release_ssa_name (lhs);
9105 free_stmt_vec_info (seq_stmt);
9106 }
363477c0 9107 }
f0281fde 9108 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9109 }
9110 }
9111
9771b263 9112 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9113 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9114 set_vinfo_for_stmt (stmt, NULL);
9115 free (stmt_info);
9116}
9117
9118
bb67d9c7 9119/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9120
bb67d9c7 9121 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9122 by the target. */
9123
bb67d9c7
RG
9124static tree
9125get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 9126{
c7d97b28 9127 tree orig_scalar_type = scalar_type;
3bd8f481 9128 scalar_mode inner_mode;
ef4bddc2 9129 machine_mode simd_mode;
ebfd146a
IR
9130 int nunits;
9131 tree vectype;
9132
3bd8f481
RS
9133 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9134 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9135 return NULL_TREE;
9136
3bd8f481 9137 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9138
7b7b1813
RG
9139 /* For vector types of elements whose mode precision doesn't
9140 match their types precision we use a element type of mode
9141 precision. The vectorization routines will have to make sure
48f2e373
RB
9142 they support the proper result truncation/extension.
9143 We also make sure to build vector types with INTEGER_TYPE
9144 component type only. */
6d7971b8 9145 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9146 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9147 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9148 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9149 TYPE_UNSIGNED (scalar_type));
6d7971b8 9150
ccbf5bb4
RG
9151 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9152 When the component mode passes the above test simply use a type
9153 corresponding to that mode. The theory is that any use that
9154 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9155 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9156 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9157 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9158
9159 /* We can't build a vector type of elements with alignment bigger than
9160 their size. */
dfc2e2ac 9161 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9162 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9163 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9164
dfc2e2ac
RB
9165 /* If we felt back to using the mode fail if there was
9166 no scalar type for it. */
9167 if (scalar_type == NULL_TREE)
9168 return NULL_TREE;
9169
bb67d9c7
RG
9170 /* If no size was supplied use the mode the target prefers. Otherwise
9171 lookup a vector mode of the specified size. */
9172 if (size == 0)
9173 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9da15d40
RS
9174 else if (!mode_for_vector (inner_mode, size / nbytes).exists (&simd_mode))
9175 return NULL_TREE;
cc4b5170 9176 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
4c8fd8ac
JB
9177 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9178 if (nunits < 1)
cc4b5170 9179 return NULL_TREE;
ebfd146a
IR
9180
9181 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9182
9183 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9184 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9185 return NULL_TREE;
ebfd146a 9186
c7d97b28
RB
9187 /* Re-attach the address-space qualifier if we canonicalized the scalar
9188 type. */
9189 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9190 return build_qualified_type
9191 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9192
ebfd146a
IR
9193 return vectype;
9194}
9195
bb67d9c7
RG
9196unsigned int current_vector_size;
9197
9198/* Function get_vectype_for_scalar_type.
9199
9200 Returns the vector type corresponding to SCALAR_TYPE as supported
9201 by the target. */
9202
9203tree
9204get_vectype_for_scalar_type (tree scalar_type)
9205{
9206 tree vectype;
9207 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9208 current_vector_size);
9209 if (vectype
9210 && current_vector_size == 0)
9211 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9212 return vectype;
9213}
9214
42fd8198
IE
9215/* Function get_mask_type_for_scalar_type.
9216
9217 Returns the mask type corresponding to a result of comparison
9218 of vectors of specified SCALAR_TYPE as supported by target. */
9219
9220tree
9221get_mask_type_for_scalar_type (tree scalar_type)
9222{
9223 tree vectype = get_vectype_for_scalar_type (scalar_type);
9224
9225 if (!vectype)
9226 return NULL;
9227
9228 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9229 current_vector_size);
9230}
9231
b690cc0f
RG
9232/* Function get_same_sized_vectype
9233
9234 Returns a vector type corresponding to SCALAR_TYPE of size
9235 VECTOR_TYPE if supported by the target. */
9236
9237tree
bb67d9c7 9238get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9239{
2568d8a1 9240 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9241 return build_same_sized_truth_vector_type (vector_type);
9242
bb67d9c7
RG
9243 return get_vectype_for_scalar_type_and_size
9244 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9245}
9246
ebfd146a
IR
9247/* Function vect_is_simple_use.
9248
9249 Input:
81c40241
RB
9250 VINFO - the vect info of the loop or basic block that is being vectorized.
9251 OPERAND - operand in the loop or bb.
9252 Output:
9253 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9254 DT - the type of definition
ebfd146a
IR
9255
9256 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9257 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9258 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9259 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9260 is the case in reduction/induction computations).
9261 For basic blocks, supportable operands are constants and bb invariants.
9262 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9263
9264bool
81c40241
RB
9265vect_is_simple_use (tree operand, vec_info *vinfo,
9266 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9267{
ebfd146a 9268 *def_stmt = NULL;
3fc356dc 9269 *dt = vect_unknown_def_type;
b8698a0f 9270
73fbfcad 9271 if (dump_enabled_p ())
ebfd146a 9272 {
78c60e3d
SS
9273 dump_printf_loc (MSG_NOTE, vect_location,
9274 "vect_is_simple_use: operand ");
9275 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9276 dump_printf (MSG_NOTE, "\n");
ebfd146a 9277 }
b8698a0f 9278
b758f602 9279 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9280 {
9281 *dt = vect_constant_def;
9282 return true;
9283 }
b8698a0f 9284
ebfd146a
IR
9285 if (is_gimple_min_invariant (operand))
9286 {
8644a673 9287 *dt = vect_external_def;
ebfd146a
IR
9288 return true;
9289 }
9290
ebfd146a
IR
9291 if (TREE_CODE (operand) != SSA_NAME)
9292 {
73fbfcad 9293 if (dump_enabled_p ())
af29617a
AH
9294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9295 "not ssa-name.\n");
ebfd146a
IR
9296 return false;
9297 }
b8698a0f 9298
3fc356dc 9299 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9300 {
3fc356dc
RB
9301 *dt = vect_external_def;
9302 return true;
ebfd146a
IR
9303 }
9304
3fc356dc 9305 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9306 if (dump_enabled_p ())
ebfd146a 9307 {
78c60e3d
SS
9308 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9309 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9310 }
9311
61d371eb 9312 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9313 *dt = vect_external_def;
ebfd146a
IR
9314 else
9315 {
3fc356dc 9316 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9317 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9318 }
9319
2e8ab70c
RB
9320 if (dump_enabled_p ())
9321 {
9322 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9323 switch (*dt)
9324 {
9325 case vect_uninitialized_def:
9326 dump_printf (MSG_NOTE, "uninitialized\n");
9327 break;
9328 case vect_constant_def:
9329 dump_printf (MSG_NOTE, "constant\n");
9330 break;
9331 case vect_external_def:
9332 dump_printf (MSG_NOTE, "external\n");
9333 break;
9334 case vect_internal_def:
9335 dump_printf (MSG_NOTE, "internal\n");
9336 break;
9337 case vect_induction_def:
9338 dump_printf (MSG_NOTE, "induction\n");
9339 break;
9340 case vect_reduction_def:
9341 dump_printf (MSG_NOTE, "reduction\n");
9342 break;
9343 case vect_double_reduction_def:
9344 dump_printf (MSG_NOTE, "double reduction\n");
9345 break;
9346 case vect_nested_cycle:
9347 dump_printf (MSG_NOTE, "nested cycle\n");
9348 break;
9349 case vect_unknown_def_type:
9350 dump_printf (MSG_NOTE, "unknown\n");
9351 break;
9352 }
9353 }
9354
81c40241 9355 if (*dt == vect_unknown_def_type)
ebfd146a 9356 {
73fbfcad 9357 if (dump_enabled_p ())
78c60e3d 9358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9359 "Unsupported pattern.\n");
ebfd146a
IR
9360 return false;
9361 }
9362
ebfd146a
IR
9363 switch (gimple_code (*def_stmt))
9364 {
9365 case GIMPLE_PHI:
ebfd146a 9366 case GIMPLE_ASSIGN:
ebfd146a 9367 case GIMPLE_CALL:
81c40241 9368 break;
ebfd146a 9369 default:
73fbfcad 9370 if (dump_enabled_p ())
78c60e3d 9371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9372 "unsupported defining stmt:\n");
ebfd146a
IR
9373 return false;
9374 }
9375
9376 return true;
9377}
9378
81c40241 9379/* Function vect_is_simple_use.
b690cc0f 9380
81c40241 9381 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9382 type of OPERAND and stores it to *VECTYPE. If the definition of
9383 OPERAND is vect_uninitialized_def, vect_constant_def or
9384 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9385 is responsible to compute the best suited vector type for the
9386 scalar operand. */
9387
9388bool
81c40241
RB
9389vect_is_simple_use (tree operand, vec_info *vinfo,
9390 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9391{
81c40241 9392 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9393 return false;
9394
9395 /* Now get a vector type if the def is internal, otherwise supply
9396 NULL_TREE and leave it up to the caller to figure out a proper
9397 type for the use stmt. */
9398 if (*dt == vect_internal_def
9399 || *dt == vect_induction_def
9400 || *dt == vect_reduction_def
9401 || *dt == vect_double_reduction_def
9402 || *dt == vect_nested_cycle)
9403 {
9404 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9405
9406 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9407 && !STMT_VINFO_RELEVANT (stmt_info)
9408 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9409 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9410
b690cc0f
RG
9411 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9412 gcc_assert (*vectype != NULL_TREE);
9413 }
9414 else if (*dt == vect_uninitialized_def
9415 || *dt == vect_constant_def
9416 || *dt == vect_external_def)
9417 *vectype = NULL_TREE;
9418 else
9419 gcc_unreachable ();
9420
9421 return true;
9422}
9423
ebfd146a
IR
9424
9425/* Function supportable_widening_operation
9426
b8698a0f
L
9427 Check whether an operation represented by the code CODE is a
9428 widening operation that is supported by the target platform in
b690cc0f
RG
9429 vector form (i.e., when operating on arguments of type VECTYPE_IN
9430 producing a result of type VECTYPE_OUT).
b8698a0f 9431
ebfd146a
IR
9432 Widening operations we currently support are NOP (CONVERT), FLOAT
9433 and WIDEN_MULT. This function checks if these operations are supported
9434 by the target platform either directly (via vector tree-codes), or via
9435 target builtins.
9436
9437 Output:
b8698a0f
L
9438 - CODE1 and CODE2 are codes of vector operations to be used when
9439 vectorizing the operation, if available.
ebfd146a
IR
9440 - MULTI_STEP_CVT determines the number of required intermediate steps in
9441 case of multi-step conversion (like char->short->int - in that case
9442 MULTI_STEP_CVT will be 1).
b8698a0f
L
9443 - INTERM_TYPES contains the intermediate type required to perform the
9444 widening operation (short in the above example). */
ebfd146a
IR
9445
9446bool
355fe088 9447supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9448 tree vectype_out, tree vectype_in,
ebfd146a
IR
9449 enum tree_code *code1, enum tree_code *code2,
9450 int *multi_step_cvt,
9771b263 9451 vec<tree> *interm_types)
ebfd146a
IR
9452{
9453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9454 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9455 struct loop *vect_loop = NULL;
ef4bddc2 9456 machine_mode vec_mode;
81f40b79 9457 enum insn_code icode1, icode2;
ebfd146a 9458 optab optab1, optab2;
b690cc0f
RG
9459 tree vectype = vectype_in;
9460 tree wide_vectype = vectype_out;
ebfd146a 9461 enum tree_code c1, c2;
4a00c761
JJ
9462 int i;
9463 tree prev_type, intermediate_type;
ef4bddc2 9464 machine_mode intermediate_mode, prev_mode;
4a00c761 9465 optab optab3, optab4;
ebfd146a 9466
4a00c761 9467 *multi_step_cvt = 0;
4ef69dfc
IR
9468 if (loop_info)
9469 vect_loop = LOOP_VINFO_LOOP (loop_info);
9470
ebfd146a
IR
9471 switch (code)
9472 {
9473 case WIDEN_MULT_EXPR:
6ae6116f
RH
9474 /* The result of a vectorized widening operation usually requires
9475 two vectors (because the widened results do not fit into one vector).
9476 The generated vector results would normally be expected to be
9477 generated in the same order as in the original scalar computation,
9478 i.e. if 8 results are generated in each vector iteration, they are
9479 to be organized as follows:
9480 vect1: [res1,res2,res3,res4],
9481 vect2: [res5,res6,res7,res8].
9482
9483 However, in the special case that the result of the widening
9484 operation is used in a reduction computation only, the order doesn't
9485 matter (because when vectorizing a reduction we change the order of
9486 the computation). Some targets can take advantage of this and
9487 generate more efficient code. For example, targets like Altivec,
9488 that support widen_mult using a sequence of {mult_even,mult_odd}
9489 generate the following vectors:
9490 vect1: [res1,res3,res5,res7],
9491 vect2: [res2,res4,res6,res8].
9492
9493 When vectorizing outer-loops, we execute the inner-loop sequentially
9494 (each vectorized inner-loop iteration contributes to VF outer-loop
9495 iterations in parallel). We therefore don't allow to change the
9496 order of the computation in the inner-loop during outer-loop
9497 vectorization. */
9498 /* TODO: Another case in which order doesn't *really* matter is when we
9499 widen and then contract again, e.g. (short)((int)x * y >> 8).
9500 Normally, pack_trunc performs an even/odd permute, whereas the
9501 repack from an even/odd expansion would be an interleave, which
9502 would be significantly simpler for e.g. AVX2. */
9503 /* In any case, in order to avoid duplicating the code below, recurse
9504 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9505 are properly set up for the caller. If we fail, we'll continue with
9506 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9507 if (vect_loop
9508 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9509 && !nested_in_vect_loop_p (vect_loop, stmt)
9510 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9511 stmt, vectype_out, vectype_in,
a86ec597
RH
9512 code1, code2, multi_step_cvt,
9513 interm_types))
ebc047a2
CH
9514 {
9515 /* Elements in a vector with vect_used_by_reduction property cannot
9516 be reordered if the use chain with this property does not have the
9517 same operation. One such an example is s += a * b, where elements
9518 in a and b cannot be reordered. Here we check if the vector defined
9519 by STMT is only directly used in the reduction statement. */
9520 tree lhs = gimple_assign_lhs (stmt);
9521 use_operand_p dummy;
355fe088 9522 gimple *use_stmt;
ebc047a2
CH
9523 stmt_vec_info use_stmt_info = NULL;
9524 if (single_imm_use (lhs, &dummy, &use_stmt)
9525 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9526 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9527 return true;
9528 }
4a00c761
JJ
9529 c1 = VEC_WIDEN_MULT_LO_EXPR;
9530 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9531 break;
9532
81c40241
RB
9533 case DOT_PROD_EXPR:
9534 c1 = DOT_PROD_EXPR;
9535 c2 = DOT_PROD_EXPR;
9536 break;
9537
9538 case SAD_EXPR:
9539 c1 = SAD_EXPR;
9540 c2 = SAD_EXPR;
9541 break;
9542
6ae6116f
RH
9543 case VEC_WIDEN_MULT_EVEN_EXPR:
9544 /* Support the recursion induced just above. */
9545 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9546 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9547 break;
9548
36ba4aae 9549 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9550 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9551 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9552 break;
9553
ebfd146a 9554 CASE_CONVERT:
4a00c761
JJ
9555 c1 = VEC_UNPACK_LO_EXPR;
9556 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9557 break;
9558
9559 case FLOAT_EXPR:
4a00c761
JJ
9560 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9561 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9562 break;
9563
9564 case FIX_TRUNC_EXPR:
9565 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9566 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9567 computing the operation. */
9568 return false;
9569
9570 default:
9571 gcc_unreachable ();
9572 }
9573
6ae6116f 9574 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9575 std::swap (c1, c2);
4a00c761 9576
ebfd146a
IR
9577 if (code == FIX_TRUNC_EXPR)
9578 {
9579 /* The signedness is determined from output operand. */
b690cc0f
RG
9580 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9581 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9582 }
9583 else
9584 {
9585 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9586 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9587 }
9588
9589 if (!optab1 || !optab2)
9590 return false;
9591
9592 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9593 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9594 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9595 return false;
9596
4a00c761
JJ
9597 *code1 = c1;
9598 *code2 = c2;
9599
9600 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9601 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9602 /* For scalar masks we may have different boolean
9603 vector types having the same QImode. Thus we
9604 add additional check for elements number. */
9605 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9606 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9607 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761 9608
b8698a0f 9609 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9610 types. */
ebfd146a 9611
4a00c761
JJ
9612 prev_type = vectype;
9613 prev_mode = vec_mode;
b8698a0f 9614
4a00c761
JJ
9615 if (!CONVERT_EXPR_CODE_P (code))
9616 return false;
b8698a0f 9617
4a00c761
JJ
9618 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9619 intermediate steps in promotion sequence. We try
9620 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9621 not. */
9771b263 9622 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9623 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9624 {
9625 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9626 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9627 {
9628 intermediate_type
9629 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9630 current_vector_size);
9631 if (intermediate_mode != TYPE_MODE (intermediate_type))
9632 return false;
9633 }
9634 else
9635 intermediate_type
9636 = lang_hooks.types.type_for_mode (intermediate_mode,
9637 TYPE_UNSIGNED (prev_type));
9638
4a00c761
JJ
9639 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9640 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9641
9642 if (!optab3 || !optab4
9643 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9644 || insn_data[icode1].operand[0].mode != intermediate_mode
9645 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9646 || insn_data[icode2].operand[0].mode != intermediate_mode
9647 || ((icode1 = optab_handler (optab3, intermediate_mode))
9648 == CODE_FOR_nothing)
9649 || ((icode2 = optab_handler (optab4, intermediate_mode))
9650 == CODE_FOR_nothing))
9651 break;
ebfd146a 9652
9771b263 9653 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9654 (*multi_step_cvt)++;
9655
9656 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9657 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9658 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9659 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9660 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761
JJ
9661
9662 prev_type = intermediate_type;
9663 prev_mode = intermediate_mode;
ebfd146a
IR
9664 }
9665
9771b263 9666 interm_types->release ();
4a00c761 9667 return false;
ebfd146a
IR
9668}
9669
9670
9671/* Function supportable_narrowing_operation
9672
b8698a0f
L
9673 Check whether an operation represented by the code CODE is a
9674 narrowing operation that is supported by the target platform in
b690cc0f
RG
9675 vector form (i.e., when operating on arguments of type VECTYPE_IN
9676 and producing a result of type VECTYPE_OUT).
b8698a0f 9677
ebfd146a 9678 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9679 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9680 the target platform directly via vector tree-codes.
9681
9682 Output:
b8698a0f
L
9683 - CODE1 is the code of a vector operation to be used when
9684 vectorizing the operation, if available.
ebfd146a
IR
9685 - MULTI_STEP_CVT determines the number of required intermediate steps in
9686 case of multi-step conversion (like int->short->char - in that case
9687 MULTI_STEP_CVT will be 1).
9688 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9689 narrowing operation (short in the above example). */
ebfd146a
IR
9690
9691bool
9692supportable_narrowing_operation (enum tree_code code,
b690cc0f 9693 tree vectype_out, tree vectype_in,
ebfd146a 9694 enum tree_code *code1, int *multi_step_cvt,
9771b263 9695 vec<tree> *interm_types)
ebfd146a 9696{
ef4bddc2 9697 machine_mode vec_mode;
ebfd146a
IR
9698 enum insn_code icode1;
9699 optab optab1, interm_optab;
b690cc0f
RG
9700 tree vectype = vectype_in;
9701 tree narrow_vectype = vectype_out;
ebfd146a 9702 enum tree_code c1;
3ae0661a 9703 tree intermediate_type, prev_type;
ef4bddc2 9704 machine_mode intermediate_mode, prev_mode;
ebfd146a 9705 int i;
4a00c761 9706 bool uns;
ebfd146a 9707
4a00c761 9708 *multi_step_cvt = 0;
ebfd146a
IR
9709 switch (code)
9710 {
9711 CASE_CONVERT:
9712 c1 = VEC_PACK_TRUNC_EXPR;
9713 break;
9714
9715 case FIX_TRUNC_EXPR:
9716 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9717 break;
9718
9719 case FLOAT_EXPR:
9720 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9721 tree code and optabs used for computing the operation. */
9722 return false;
9723
9724 default:
9725 gcc_unreachable ();
9726 }
9727
9728 if (code == FIX_TRUNC_EXPR)
9729 /* The signedness is determined from output operand. */
b690cc0f 9730 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9731 else
9732 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9733
9734 if (!optab1)
9735 return false;
9736
9737 vec_mode = TYPE_MODE (vectype);
947131ba 9738 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9739 return false;
9740
4a00c761
JJ
9741 *code1 = c1;
9742
9743 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9744 /* For scalar masks we may have different boolean
9745 vector types having the same QImode. Thus we
9746 add additional check for elements number. */
9747 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9748 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9749 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9750
ebfd146a
IR
9751 /* Check if it's a multi-step conversion that can be done using intermediate
9752 types. */
4a00c761 9753 prev_mode = vec_mode;
3ae0661a 9754 prev_type = vectype;
4a00c761
JJ
9755 if (code == FIX_TRUNC_EXPR)
9756 uns = TYPE_UNSIGNED (vectype_out);
9757 else
9758 uns = TYPE_UNSIGNED (vectype);
9759
9760 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9761 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9762 costly than signed. */
9763 if (code == FIX_TRUNC_EXPR && uns)
9764 {
9765 enum insn_code icode2;
9766
9767 intermediate_type
9768 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9769 interm_optab
9770 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9771 if (interm_optab != unknown_optab
4a00c761
JJ
9772 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9773 && insn_data[icode1].operand[0].mode
9774 == insn_data[icode2].operand[0].mode)
9775 {
9776 uns = false;
9777 optab1 = interm_optab;
9778 icode1 = icode2;
9779 }
9780 }
ebfd146a 9781
4a00c761
JJ
9782 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9783 intermediate steps in promotion sequence. We try
9784 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9785 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9786 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9787 {
9788 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9789 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9790 {
9791 intermediate_type
9792 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9793 current_vector_size);
9794 if (intermediate_mode != TYPE_MODE (intermediate_type))
9795 return false;
9796 }
9797 else
9798 intermediate_type
9799 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9800 interm_optab
9801 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9802 optab_default);
9803 if (!interm_optab
9804 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9805 || insn_data[icode1].operand[0].mode != intermediate_mode
9806 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9807 == CODE_FOR_nothing))
9808 break;
9809
9771b263 9810 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9811 (*multi_step_cvt)++;
9812
9813 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9814 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9815 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9816 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9817
9818 prev_mode = intermediate_mode;
3ae0661a 9819 prev_type = intermediate_type;
4a00c761 9820 optab1 = interm_optab;
ebfd146a
IR
9821 }
9822
9771b263 9823 interm_types->release ();
4a00c761 9824 return false;
ebfd146a 9825}