]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
rl78.c (move_elim_pass): Use TDF_NONE rather than integer 0 for argument to print_rtl...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
263 gimple *def_stmt;
264 ssa_op_iter iter;
265
266 if (!is_gimple_assign (stmt))
267 return false;
268
269 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 {
271 enum vect_def_type dt = vect_uninitialized_def;
272
273 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
274 {
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "use not simple.\n");
278 return false;
279 }
280
281 if (dt != vect_external_def && dt != vect_constant_def)
282 return false;
283 }
284 return true;
285}
286
ebfd146a
IR
287/* Function vect_stmt_relevant_p.
288
289 Return true if STMT in loop that is represented by LOOP_VINFO is
290 "relevant for vectorization".
291
292 A stmt is considered "relevant for vectorization" if:
293 - it has uses outside the loop.
294 - it has vdefs (it alters memory).
295 - control stmts in the loop (except for the exit condition).
296
297 CHECKME: what other side effects would the vectorizer allow? */
298
299static bool
355fe088 300vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
301 enum vect_relevant *relevant, bool *live_p)
302{
303 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
304 ssa_op_iter op_iter;
305 imm_use_iterator imm_iter;
306 use_operand_p use_p;
307 def_operand_p def_p;
308
8644a673 309 *relevant = vect_unused_in_scope;
ebfd146a
IR
310 *live_p = false;
311
312 /* cond stmt other than loop exit cond. */
b8698a0f
L
313 if (is_ctrl_stmt (stmt)
314 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
315 != loop_exit_ctrl_vec_info_type)
8644a673 316 *relevant = vect_used_in_scope;
ebfd146a
IR
317
318 /* changing memory. */
319 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
320 if (gimple_vdef (stmt)
321 && !gimple_clobber_p (stmt))
ebfd146a 322 {
73fbfcad 323 if (dump_enabled_p ())
78c60e3d 324 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 325 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 326 *relevant = vect_used_in_scope;
ebfd146a
IR
327 }
328
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 {
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 {
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
336 {
73fbfcad 337 if (dump_enabled_p ())
78c60e3d 338 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 339 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 340
3157b0c2
AO
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
343
ebfd146a
IR
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
348
349 *live_p = true;
350 }
351 }
352 }
353
3a2edf4c
AH
354 if (*live_p && *relevant == vect_unused_in_scope
355 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
356 {
357 if (dump_enabled_p ())
358 dump_printf_loc (MSG_NOTE, vect_location,
359 "vec_stmt_relevant_p: stmt live but not relevant.\n");
360 *relevant = vect_used_only_live;
361 }
362
ebfd146a
IR
363 return (*live_p || *relevant);
364}
365
366
b8698a0f 367/* Function exist_non_indexing_operands_for_use_p
ebfd146a 368
ff802fa1 369 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
370 used in STMT for anything other than indexing an array. */
371
372static bool
355fe088 373exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
374{
375 tree operand;
376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 377
ff802fa1 378 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
379 reference in STMT, then any operand that corresponds to USE
380 is not indexing an array. */
381 if (!STMT_VINFO_DATA_REF (stmt_info))
382 return true;
59a05b0c 383
ebfd146a
IR
384 /* STMT has a data_ref. FORNOW this means that its of one of
385 the following forms:
386 -1- ARRAY_REF = var
387 -2- var = ARRAY_REF
388 (This should have been verified in analyze_data_refs).
389
390 'var' in the second case corresponds to a def, not a use,
b8698a0f 391 so USE cannot correspond to any operands that are not used
ebfd146a
IR
392 for array indexing.
393
394 Therefore, all we need to check is if STMT falls into the
395 first case, and whether var corresponds to USE. */
ebfd146a
IR
396
397 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
398 {
399 if (is_gimple_call (stmt)
400 && gimple_call_internal_p (stmt))
bfaa08b7
RS
401 {
402 internal_fn ifn = gimple_call_internal_fn (stmt);
403 int mask_index = internal_fn_mask_index (ifn);
404 if (mask_index >= 0
405 && use == gimple_call_arg (stmt, mask_index))
406 return true;
f307441a
RS
407 int stored_value_index = internal_fn_stored_value_index (ifn);
408 if (stored_value_index >= 0
409 && use == gimple_call_arg (stmt, stored_value_index))
410 return true;
bfaa08b7
RS
411 if (internal_gather_scatter_fn_p (ifn)
412 && use == gimple_call_arg (stmt, 1))
413 return true;
bfaa08b7 414 }
5ce9450f
JJ
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
68435eb2
RB
818/* Compute the prologue cost for invariant or constant operands. */
819
820static unsigned
821vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
822 unsigned opno, enum vect_def_type dt,
823 stmt_vector_for_cost *cost_vec)
824{
825 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
826 tree op = gimple_op (stmt, opno);
827 unsigned prologue_cost = 0;
828
829 /* Without looking at the actual initializer a vector of
830 constants can be implemented as load from the constant pool.
831 When all elements are the same we can use a splat. */
832 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
833 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
834 unsigned num_vects_to_check;
835 unsigned HOST_WIDE_INT const_nunits;
836 unsigned nelt_limit;
837 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
838 && ! multiple_p (const_nunits, group_size))
839 {
840 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
841 nelt_limit = const_nunits;
842 }
843 else
844 {
845 /* If either the vector has variable length or the vectors
846 are composed of repeated whole groups we only need to
847 cost construction once. All vectors will be the same. */
848 num_vects_to_check = 1;
849 nelt_limit = group_size;
850 }
851 tree elt = NULL_TREE;
852 unsigned nelt = 0;
853 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
854 {
855 unsigned si = j % group_size;
856 if (nelt == 0)
857 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
858 /* ??? We're just tracking whether all operands of a single
859 vector initializer are the same, ideally we'd check if
860 we emitted the same one already. */
861 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
862 opno))
863 elt = NULL_TREE;
864 nelt++;
865 if (nelt == nelt_limit)
866 {
867 /* ??? We need to pass down stmt_info for a vector type
868 even if it points to the wrong stmt. */
869 prologue_cost += record_stmt_cost
870 (cost_vec, 1,
871 dt == vect_external_def
872 ? (elt ? scalar_to_vec : vec_construct)
873 : vector_load,
874 stmt_info, 0, vect_prologue);
875 nelt = 0;
876 }
877 }
878
879 return prologue_cost;
880}
ebfd146a 881
b8698a0f 882/* Function vect_model_simple_cost.
ebfd146a 883
b8698a0f 884 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
885 single op. Right now, this does not account for multiple insns that could
886 be generated for the single vector op. We will handle that shortly. */
887
68435eb2 888static void
b8698a0f 889vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 890 enum vect_def_type *dt,
4fc5ebf1 891 int ndts,
68435eb2
RB
892 slp_tree node,
893 stmt_vector_for_cost *cost_vec)
ebfd146a 894{
92345349 895 int inside_cost = 0, prologue_cost = 0;
ebfd146a 896
68435eb2 897 gcc_assert (cost_vec != NULL);
ebfd146a 898
68435eb2
RB
899 /* ??? Somehow we need to fix this at the callers. */
900 if (node)
901 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
902
903 if (node)
904 {
905 /* Scan operands and account for prologue cost of constants/externals.
906 ??? This over-estimates cost for multiple uses and should be
907 re-engineered. */
908 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
909 tree lhs = gimple_get_lhs (stmt);
910 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
911 {
912 tree op = gimple_op (stmt, i);
913 gimple *def_stmt;
914 enum vect_def_type dt;
915 if (!op || op == lhs)
916 continue;
917 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
918 && (dt == vect_constant_def || dt == vect_external_def))
919 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
920 i, dt, cost_vec);
921 }
922 }
923 else
924 /* Cost the "broadcast" of a scalar operand in to a vector operand.
925 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
926 cost model. */
927 for (int i = 0; i < ndts; i++)
928 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
929 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
930 stmt_info, 0, vect_prologue);
931
932 /* Adjust for two-operator SLP nodes. */
933 if (node && SLP_TREE_TWO_OPERATORS (node))
934 {
935 ncopies *= 2;
936 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
937 stmt_info, 0, vect_body);
938 }
c3e7ee41
BS
939
940 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
941 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
942 stmt_info, 0, vect_body);
c3e7ee41 943
73fbfcad 944 if (dump_enabled_p ())
78c60e3d
SS
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "vect_model_simple_cost: inside_cost = %d, "
e645e942 947 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
948}
949
950
8bd37302
BS
951/* Model cost for type demotion and promotion operations. PWR is normally
952 zero for single-step promotions and demotions. It will be one if
953 two-step promotion/demotion is required, and so on. Each additional
954 step doubles the number of instructions required. */
955
956static void
957vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
958 enum vect_def_type *dt, int pwr,
959 stmt_vector_for_cost *cost_vec)
8bd37302
BS
960{
961 int i, tmp;
92345349 962 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 963
8bd37302
BS
964 for (i = 0; i < pwr + 1; i++)
965 {
966 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
967 (i + 1) : i;
68435eb2
RB
968 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
969 vec_promote_demote, stmt_info, 0,
970 vect_body);
8bd37302
BS
971 }
972
973 /* FORNOW: Assuming maximum 2 args per stmts. */
974 for (i = 0; i < 2; i++)
92345349 975 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
976 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
977 stmt_info, 0, vect_prologue);
8bd37302 978
73fbfcad 979 if (dump_enabled_p ())
78c60e3d
SS
980 dump_printf_loc (MSG_NOTE, vect_location,
981 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 982 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
983}
984
ebfd146a
IR
985/* Function vect_model_store_cost
986
0d0293ac
MM
987 Models cost for stores. In the case of grouped accesses, one access
988 has the overhead of the grouped access attributed to it. */
ebfd146a 989
68435eb2 990static void
b8698a0f 991vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 992 enum vect_def_type dt,
2de001ee 993 vect_memory_access_type memory_access_type,
9ce4345a 994 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 995 stmt_vector_for_cost *cost_vec)
ebfd146a 996{
92345349 997 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
998 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
999 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1000
68435eb2
RB
1001 /* ??? Somehow we need to fix this at the callers. */
1002 if (slp_node)
1003 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1004
9ce4345a 1005 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1006 {
1007 if (slp_node)
1008 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1009 1, dt, cost_vec);
1010 else
1011 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1012 stmt_info, 0, vect_prologue);
1013 }
ebfd146a 1014
892a981f
RS
1015 /* Grouped stores update all elements in the group at once,
1016 so we want the DR for the first statement. */
1017 if (!slp_node && grouped_access_p)
57c454d2 1018 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1019
892a981f
RS
1020 /* True if we should include any once-per-group costs as well as
1021 the cost of the statement itself. For SLP we only get called
1022 once per group anyhow. */
1023 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1024
272c6793 1025 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1026 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1027 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1028 include the cost of the permutes. */
1029 if (first_stmt_p
1030 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1031 {
e1377713
ES
1032 /* Uses a high and low interleave or shuffle operations for each
1033 needed permute. */
2c53b149 1034 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1035 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1036 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1037 stmt_info, 0, vect_body);
ebfd146a 1038
73fbfcad 1039 if (dump_enabled_p ())
78c60e3d 1040 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1041 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1042 group_size);
ebfd146a
IR
1043 }
1044
cee62fee 1045 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1046 /* Costs of the stores. */
067bc855
RB
1047 if (memory_access_type == VMAT_ELEMENTWISE
1048 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1049 {
1050 /* N scalar stores plus extracting the elements. */
1051 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1052 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1053 ncopies * assumed_nunits,
1054 scalar_store, stmt_info, 0, vect_body);
1055 }
f2e2a985 1056 else
57c454d2 1057 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1058
2de001ee
RS
1059 if (memory_access_type == VMAT_ELEMENTWISE
1060 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1061 {
1062 /* N scalar stores plus extracting the elements. */
1063 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1064 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1065 ncopies * assumed_nunits,
1066 vec_to_scalar, stmt_info, 0, vect_body);
1067 }
cee62fee 1068
73fbfcad 1069 if (dump_enabled_p ())
78c60e3d
SS
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "vect_model_store_cost: inside_cost = %d, "
e645e942 1072 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1073}
1074
1075
720f5239
IR
1076/* Calculate cost of DR's memory access. */
1077void
57c454d2 1078vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1079 unsigned int *inside_cost,
92345349 1080 stmt_vector_for_cost *body_cost_vec)
720f5239 1081{
57c454d2 1082 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1083 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1084
1085 switch (alignment_support_scheme)
1086 {
1087 case dr_aligned:
1088 {
92345349
BS
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1090 vector_store, stmt_info, 0,
1091 vect_body);
720f5239 1092
73fbfcad 1093 if (dump_enabled_p ())
78c60e3d 1094 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1095 "vect_model_store_cost: aligned.\n");
720f5239
IR
1096 break;
1097 }
1098
1099 case dr_unaligned_supported:
1100 {
720f5239 1101 /* Here, we assign an additional cost for the unaligned store. */
92345349 1102 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1103 unaligned_store, stmt_info,
92345349 1104 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1105 if (dump_enabled_p ())
78c60e3d
SS
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_store_cost: unaligned supported by "
e645e942 1108 "hardware.\n");
720f5239
IR
1109 break;
1110 }
1111
38eec4c6
UW
1112 case dr_unaligned_unsupported:
1113 {
1114 *inside_cost = VECT_MAX_COST;
1115
73fbfcad 1116 if (dump_enabled_p ())
78c60e3d 1117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1118 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1119 break;
1120 }
1121
720f5239
IR
1122 default:
1123 gcc_unreachable ();
1124 }
1125}
1126
1127
ebfd146a
IR
1128/* Function vect_model_load_cost
1129
892a981f
RS
1130 Models cost for loads. In the case of grouped accesses, one access has
1131 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1132 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1133 access scheme chosen. */
1134
68435eb2
RB
1135static void
1136vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1137 vect_memory_access_type memory_access_type,
68435eb2 1138 slp_instance instance,
2de001ee 1139 slp_tree slp_node,
68435eb2 1140 stmt_vector_for_cost *cost_vec)
ebfd146a 1141{
892a981f 1142 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
92345349 1143 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1144 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1145
68435eb2
RB
1146 gcc_assert (cost_vec);
1147
1148 /* ??? Somehow we need to fix this at the callers. */
1149 if (slp_node)
1150 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1151
1152 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1153 {
1154 /* If the load is permuted then the alignment is determined by
1155 the first group element not by the first scalar stmt DR. */
2c53b149 1156 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1158 /* Record the cost for the permutation. */
1159 unsigned n_perms;
1160 unsigned assumed_nunits
1161 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1162 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1163 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1164 slp_vf, instance, true,
1165 &n_perms);
1166 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1167 stmt_info, 0, vect_body);
1168 /* And adjust the number of loads performed. This handles
1169 redundancies as well as loads that are later dead. */
2c53b149 1170 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
68435eb2
RB
1171 bitmap_clear (perm);
1172 for (unsigned i = 0;
1173 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1174 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1175 ncopies = 0;
1176 bool load_seen = false;
2c53b149 1177 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
68435eb2
RB
1178 {
1179 if (i % assumed_nunits == 0)
1180 {
1181 if (load_seen)
1182 ncopies++;
1183 load_seen = false;
1184 }
1185 if (bitmap_bit_p (perm, i))
1186 load_seen = true;
1187 }
1188 if (load_seen)
1189 ncopies++;
1190 gcc_assert (ncopies
2c53b149 1191 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
68435eb2
RB
1192 + assumed_nunits - 1) / assumed_nunits);
1193 }
1194
892a981f
RS
1195 /* Grouped loads read all elements in the group at once,
1196 so we want the DR for the first statement. */
1197 if (!slp_node && grouped_access_p)
57c454d2 1198 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1199
892a981f
RS
1200 /* True if we should include any once-per-group costs as well as
1201 the cost of the statement itself. For SLP we only get called
1202 once per group anyhow. */
1203 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1204
272c6793 1205 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1206 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1207 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1208 include the cost of the permutes. */
1209 if (first_stmt_p
1210 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1211 {
2c23db6d
ES
1212 /* Uses an even and odd extract operations or shuffle operations
1213 for each needed permute. */
2c53b149 1214 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1215 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1216 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1217 stmt_info, 0, vect_body);
ebfd146a 1218
73fbfcad 1219 if (dump_enabled_p ())
e645e942
TJ
1220 dump_printf_loc (MSG_NOTE, vect_location,
1221 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1222 group_size);
ebfd146a
IR
1223 }
1224
1225 /* The loads themselves. */
067bc855
RB
1226 if (memory_access_type == VMAT_ELEMENTWISE
1227 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1228 {
a21892ad
BS
1229 /* N scalar loads plus gathering them into a vector. */
1230 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1231 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1232 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1233 ncopies * assumed_nunits,
92345349 1234 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1235 }
1236 else
57c454d2 1237 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1238 &inside_cost, &prologue_cost,
68435eb2 1239 cost_vec, cost_vec, true);
2de001ee
RS
1240 if (memory_access_type == VMAT_ELEMENTWISE
1241 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1242 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1243 stmt_info, 0, vect_body);
720f5239 1244
73fbfcad 1245 if (dump_enabled_p ())
78c60e3d
SS
1246 dump_printf_loc (MSG_NOTE, vect_location,
1247 "vect_model_load_cost: inside_cost = %d, "
e645e942 1248 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1249}
1250
1251
1252/* Calculate cost of DR's memory access. */
1253void
57c454d2 1254vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1255 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1256 unsigned int *prologue_cost,
1257 stmt_vector_for_cost *prologue_cost_vec,
1258 stmt_vector_for_cost *body_cost_vec,
1259 bool record_prologue_costs)
720f5239 1260{
57c454d2 1261 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1262 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1263
1264 switch (alignment_support_scheme)
ebfd146a
IR
1265 {
1266 case dr_aligned:
1267 {
92345349
BS
1268 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1269 stmt_info, 0, vect_body);
ebfd146a 1270
73fbfcad 1271 if (dump_enabled_p ())
78c60e3d 1272 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1273 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1274
1275 break;
1276 }
1277 case dr_unaligned_supported:
1278 {
720f5239 1279 /* Here, we assign an additional cost for the unaligned load. */
92345349 1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1281 unaligned_load, stmt_info,
92345349 1282 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1283
73fbfcad 1284 if (dump_enabled_p ())
78c60e3d
SS
1285 dump_printf_loc (MSG_NOTE, vect_location,
1286 "vect_model_load_cost: unaligned supported by "
e645e942 1287 "hardware.\n");
ebfd146a
IR
1288
1289 break;
1290 }
1291 case dr_explicit_realign:
1292 {
92345349
BS
1293 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1294 vector_load, stmt_info, 0, vect_body);
1295 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1296 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1297
1298 /* FIXME: If the misalignment remains fixed across the iterations of
1299 the containing loop, the following cost should be added to the
92345349 1300 prologue costs. */
ebfd146a 1301 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1302 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1303 stmt_info, 0, vect_body);
ebfd146a 1304
73fbfcad 1305 if (dump_enabled_p ())
e645e942
TJ
1306 dump_printf_loc (MSG_NOTE, vect_location,
1307 "vect_model_load_cost: explicit realign\n");
8bd37302 1308
ebfd146a
IR
1309 break;
1310 }
1311 case dr_explicit_realign_optimized:
1312 {
73fbfcad 1313 if (dump_enabled_p ())
e645e942 1314 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1315 "vect_model_load_cost: unaligned software "
e645e942 1316 "pipelined.\n");
ebfd146a
IR
1317
1318 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1319 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1320 if this is an access in a group of loads, which provide grouped
ebfd146a 1321 access, then the above cost should only be considered for one
ff802fa1 1322 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1323 and a realignment op. */
1324
92345349 1325 if (add_realign_cost && record_prologue_costs)
ebfd146a 1326 {
92345349
BS
1327 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1328 vector_stmt, stmt_info,
1329 0, vect_prologue);
ebfd146a 1330 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1331 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1332 vector_stmt, stmt_info,
1333 0, vect_prologue);
ebfd146a
IR
1334 }
1335
92345349
BS
1336 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1337 stmt_info, 0, vect_body);
1338 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1339 stmt_info, 0, vect_body);
8bd37302 1340
73fbfcad 1341 if (dump_enabled_p ())
78c60e3d 1342 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1343 "vect_model_load_cost: explicit realign optimized"
1344 "\n");
8bd37302 1345
ebfd146a
IR
1346 break;
1347 }
1348
38eec4c6
UW
1349 case dr_unaligned_unsupported:
1350 {
1351 *inside_cost = VECT_MAX_COST;
1352
73fbfcad 1353 if (dump_enabled_p ())
78c60e3d 1354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1355 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1356 break;
1357 }
1358
ebfd146a
IR
1359 default:
1360 gcc_unreachable ();
1361 }
ebfd146a
IR
1362}
1363
418b7df3
RG
1364/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1365 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1366
418b7df3 1367static void
355fe088 1368vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1369{
ebfd146a 1370 if (gsi)
418b7df3 1371 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1372 else
1373 {
418b7df3 1374 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1376
a70d6342
IR
1377 if (loop_vinfo)
1378 {
1379 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1380 basic_block new_bb;
1381 edge pe;
a70d6342
IR
1382
1383 if (nested_in_vect_loop_p (loop, stmt))
1384 loop = loop->inner;
b8698a0f 1385
a70d6342 1386 pe = loop_preheader_edge (loop);
418b7df3 1387 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1388 gcc_assert (!new_bb);
1389 }
1390 else
1391 {
1392 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1393 basic_block bb;
1394 gimple_stmt_iterator gsi_bb_start;
1395
1396 gcc_assert (bb_vinfo);
1397 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1398 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1399 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1400 }
ebfd146a
IR
1401 }
1402
73fbfcad 1403 if (dump_enabled_p ())
ebfd146a 1404 {
78c60e3d
SS
1405 dump_printf_loc (MSG_NOTE, vect_location,
1406 "created new init_stmt: ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1408 }
418b7df3
RG
1409}
1410
1411/* Function vect_init_vector.
ebfd146a 1412
5467ee52
RG
1413 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1414 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1415 vector type a vector with all elements equal to VAL is created first.
1416 Place the initialization at BSI if it is not NULL. Otherwise, place the
1417 initialization at the loop preheader.
418b7df3
RG
1418 Return the DEF of INIT_STMT.
1419 It will be used in the vectorization of STMT. */
1420
1421tree
355fe088 1422vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1423{
355fe088 1424 gimple *init_stmt;
418b7df3
RG
1425 tree new_temp;
1426
e412ece4
RB
1427 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1428 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1429 {
e412ece4
RB
1430 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1431 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1432 {
5a308cf1
IE
1433 /* Scalar boolean value should be transformed into
1434 all zeros or all ones value before building a vector. */
1435 if (VECTOR_BOOLEAN_TYPE_P (type))
1436 {
b3d51f23
IE
1437 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1438 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1439
1440 if (CONSTANT_CLASS_P (val))
1441 val = integer_zerop (val) ? false_val : true_val;
1442 else
1443 {
1444 new_temp = make_ssa_name (TREE_TYPE (type));
1445 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1446 val, true_val, false_val);
1447 vect_init_vector_1 (stmt, init_stmt, gsi);
1448 val = new_temp;
1449 }
1450 }
1451 else if (CONSTANT_CLASS_P (val))
42fd8198 1452 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1453 else
1454 {
b731b390 1455 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1456 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1457 init_stmt = gimple_build_assign (new_temp,
1458 fold_build1 (VIEW_CONVERT_EXPR,
1459 TREE_TYPE (type),
1460 val));
1461 else
1462 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1463 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1464 val = new_temp;
418b7df3
RG
1465 }
1466 }
5467ee52 1467 val = build_vector_from_val (type, val);
418b7df3
RG
1468 }
1469
0e22bb5a
RB
1470 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1471 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1472 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1473 return new_temp;
ebfd146a
IR
1474}
1475
c83a894c 1476/* Function vect_get_vec_def_for_operand_1.
a70d6342 1477
c83a894c
AH
1478 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1479 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1480
1481tree
c83a894c 1482vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1483{
1484 tree vec_oprnd;
355fe088 1485 gimple *vec_stmt;
ebfd146a 1486 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1487
1488 switch (dt)
1489 {
81c40241 1490 /* operand is a constant or a loop invariant. */
ebfd146a 1491 case vect_constant_def:
81c40241 1492 case vect_external_def:
c83a894c
AH
1493 /* Code should use vect_get_vec_def_for_operand. */
1494 gcc_unreachable ();
ebfd146a 1495
81c40241 1496 /* operand is defined inside the loop. */
8644a673 1497 case vect_internal_def:
ebfd146a 1498 {
ebfd146a
IR
1499 /* Get the def from the vectorized stmt. */
1500 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1501
ebfd146a 1502 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1503 /* Get vectorized pattern statement. */
1504 if (!vec_stmt
1505 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1506 && !STMT_VINFO_RELEVANT (def_stmt_info))
1507 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1508 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1509 gcc_assert (vec_stmt);
1510 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511 vec_oprnd = PHI_RESULT (vec_stmt);
1512 else if (is_gimple_call (vec_stmt))
1513 vec_oprnd = gimple_call_lhs (vec_stmt);
1514 else
1515 vec_oprnd = gimple_assign_lhs (vec_stmt);
1516 return vec_oprnd;
1517 }
1518
c78e3652 1519 /* operand is defined by a loop header phi. */
ebfd146a 1520 case vect_reduction_def:
06066f92 1521 case vect_double_reduction_def:
7c5222ff 1522 case vect_nested_cycle:
ebfd146a
IR
1523 case vect_induction_def:
1524 {
1525 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1526
1527 /* Get the def from the vectorized stmt. */
1528 def_stmt_info = vinfo_for_stmt (def_stmt);
1529 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1530 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt);
1532 else
1533 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1534 return vec_oprnd;
1535 }
1536
1537 default:
1538 gcc_unreachable ();
1539 }
1540}
1541
1542
c83a894c
AH
1543/* Function vect_get_vec_def_for_operand.
1544
1545 OP is an operand in STMT. This function returns a (vector) def that will be
1546 used in the vectorized stmt for STMT.
1547
1548 In the case that OP is an SSA_NAME which is defined in the loop, then
1549 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1550
1551 In case OP is an invariant or constant, a new stmt that creates a vector def
1552 needs to be introduced. VECTYPE may be used to specify a required type for
1553 vector invariant. */
1554
1555tree
1556vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1557{
1558 gimple *def_stmt;
1559 enum vect_def_type dt;
1560 bool is_simple_use;
1561 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1563
1564 if (dump_enabled_p ())
1565 {
1566 dump_printf_loc (MSG_NOTE, vect_location,
1567 "vect_get_vec_def_for_operand: ");
1568 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1569 dump_printf (MSG_NOTE, "\n");
1570 }
1571
1572 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1573 gcc_assert (is_simple_use);
1574 if (def_stmt && dump_enabled_p ())
1575 {
1576 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1578 }
1579
1580 if (dt == vect_constant_def || dt == vect_external_def)
1581 {
1582 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1583 tree vector_type;
1584
1585 if (vectype)
1586 vector_type = vectype;
2568d8a1 1587 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1588 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1589 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1590 else
1591 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1592
1593 gcc_assert (vector_type);
1594 return vect_init_vector (stmt, op, vector_type, NULL);
1595 }
1596 else
1597 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1598}
1599
1600
ebfd146a
IR
1601/* Function vect_get_vec_def_for_stmt_copy
1602
ff802fa1 1603 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1604 vectorized stmt to be created (by the caller to this function) is a "copy"
1605 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1606 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1607 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1608 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1609 DT is the type of the vector def VEC_OPRND.
1610
1611 Context:
1612 In case the vectorization factor (VF) is bigger than the number
1613 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1614 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1615 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1616 smallest data-type determines the VF, and as a result, when vectorizing
1617 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1618 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1619 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1620 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1621 which VF=16 and nunits=4, so the number of copies required is 4):
1622
1623 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1624
ebfd146a
IR
1625 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1626 VS1.1: vx.1 = memref1 VS1.2
1627 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1628 VS1.3: vx.3 = memref3
ebfd146a
IR
1629
1630 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1631 VSnew.1: vz1 = vx.1 + ... VSnew.2
1632 VSnew.2: vz2 = vx.2 + ... VSnew.3
1633 VSnew.3: vz3 = vx.3 + ...
1634
1635 The vectorization of S1 is explained in vectorizable_load.
1636 The vectorization of S2:
b8698a0f
L
1637 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1638 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1639 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1640 returns the vector-def 'vx.0'.
1641
b8698a0f
L
1642 To create the remaining copies of the vector-stmt (VSnew.j), this
1643 function is called to get the relevant vector-def for each operand. It is
1644 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1645 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1646
b8698a0f
L
1647 For example, to obtain the vector-def 'vx.1' in order to create the
1648 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1649 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1650 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1651 and return its def ('vx.1').
1652 Overall, to create the above sequence this function will be called 3 times:
1653 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1654 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1655 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1656
1657tree
1658vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1659{
355fe088 1660 gimple *vec_stmt_for_operand;
ebfd146a
IR
1661 stmt_vec_info def_stmt_info;
1662
1663 /* Do nothing; can reuse same def. */
8644a673 1664 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1665 return vec_oprnd;
1666
1667 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1668 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1669 gcc_assert (def_stmt_info);
1670 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1671 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1672 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1673 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1674 else
1675 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1676 return vec_oprnd;
1677}
1678
1679
1680/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1681 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1682
c78e3652 1683void
b8698a0f 1684vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1685 vec<tree> *vec_oprnds0,
1686 vec<tree> *vec_oprnds1)
ebfd146a 1687{
9771b263 1688 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1689
1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1691 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1692
9771b263 1693 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1694 {
9771b263 1695 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1696 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1697 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1698 }
1699}
1700
1701
c78e3652 1702/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1703
c78e3652 1704void
355fe088 1705vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1706 vec<tree> *vec_oprnds0,
1707 vec<tree> *vec_oprnds1,
306b0c92 1708 slp_tree slp_node)
ebfd146a
IR
1709{
1710 if (slp_node)
d092494c
IR
1711 {
1712 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1713 auto_vec<tree> ops (nops);
1714 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1715
9771b263 1716 ops.quick_push (op0);
d092494c 1717 if (op1)
9771b263 1718 ops.quick_push (op1);
d092494c 1719
306b0c92 1720 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1721
37b5ec8f 1722 *vec_oprnds0 = vec_defs[0];
d092494c 1723 if (op1)
37b5ec8f 1724 *vec_oprnds1 = vec_defs[1];
d092494c 1725 }
ebfd146a
IR
1726 else
1727 {
1728 tree vec_oprnd;
1729
9771b263 1730 vec_oprnds0->create (1);
81c40241 1731 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1732 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1733
1734 if (op1)
1735 {
9771b263 1736 vec_oprnds1->create (1);
81c40241 1737 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1738 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1739 }
1740 }
1741}
1742
bb6c2b68
RS
1743/* Helper function called by vect_finish_replace_stmt and
1744 vect_finish_stmt_generation. Set the location of the new
1745 statement and create a stmt_vec_info for it. */
1746
1747static void
1748vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1749{
1750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1751 vec_info *vinfo = stmt_info->vinfo;
1752
1753 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1754
1755 if (dump_enabled_p ())
1756 {
1757 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1758 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1759 }
1760
1761 gimple_set_location (vec_stmt, gimple_location (stmt));
1762
1763 /* While EH edges will generally prevent vectorization, stmt might
1764 e.g. be in a must-not-throw region. Ensure newly created stmts
1765 that could throw are part of the same region. */
1766 int lp_nr = lookup_stmt_eh_lp (stmt);
1767 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1768 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1769}
1770
1771/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1772 which sets the same scalar result as STMT did. */
1773
1774void
1775vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1776{
1777 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1778
1779 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1780 gsi_replace (&gsi, vec_stmt, false);
1781
1782 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1783}
ebfd146a
IR
1784
1785/* Function vect_finish_stmt_generation.
1786
1787 Insert a new stmt. */
1788
1789void
355fe088 1790vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1791 gimple_stmt_iterator *gsi)
1792{
ebfd146a
IR
1793 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1794
54e8e2c3
RG
1795 if (!gsi_end_p (*gsi)
1796 && gimple_has_mem_ops (vec_stmt))
1797 {
355fe088 1798 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1799 tree vuse = gimple_vuse (at_stmt);
1800 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1801 {
1802 tree vdef = gimple_vdef (at_stmt);
1803 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1804 /* If we have an SSA vuse and insert a store, update virtual
1805 SSA form to avoid triggering the renamer. Do so only
1806 if we can easily see all uses - which is what almost always
1807 happens with the way vectorized stmts are inserted. */
1808 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1809 && ((is_gimple_assign (vec_stmt)
1810 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1811 || (is_gimple_call (vec_stmt)
1812 && !(gimple_call_flags (vec_stmt)
1813 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1814 {
1815 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1816 gimple_set_vdef (vec_stmt, new_vdef);
1817 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1818 }
1819 }
1820 }
ebfd146a 1821 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1822 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1823}
1824
70439f0d
RS
1825/* We want to vectorize a call to combined function CFN with function
1826 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1827 as the types of all inputs. Check whether this is possible using
1828 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1829
70439f0d
RS
1830static internal_fn
1831vectorizable_internal_function (combined_fn cfn, tree fndecl,
1832 tree vectype_out, tree vectype_in)
ebfd146a 1833{
70439f0d
RS
1834 internal_fn ifn;
1835 if (internal_fn_p (cfn))
1836 ifn = as_internal_fn (cfn);
1837 else
1838 ifn = associated_internal_fn (fndecl);
1839 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1840 {
1841 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1842 if (info.vectorizable)
1843 {
1844 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1845 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1846 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1847 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1848 return ifn;
1849 }
1850 }
1851 return IFN_LAST;
ebfd146a
IR
1852}
1853
5ce9450f 1854
355fe088 1855static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1856 gimple_stmt_iterator *);
1857
7cfb4d93
RS
1858/* Check whether a load or store statement in the loop described by
1859 LOOP_VINFO is possible in a fully-masked loop. This is testing
1860 whether the vectorizer pass has the appropriate support, as well as
1861 whether the target does.
1862
1863 VLS_TYPE says whether the statement is a load or store and VECTYPE
1864 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1865 says how the load or store is going to be implemented and GROUP_SIZE
1866 is the number of load or store statements in the containing group.
bfaa08b7
RS
1867 If the access is a gather load or scatter store, GS_INFO describes
1868 its arguments.
7cfb4d93
RS
1869
1870 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1871 supported, otherwise record the required mask types. */
1872
1873static void
1874check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1875 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1876 vect_memory_access_type memory_access_type,
1877 gather_scatter_info *gs_info)
7cfb4d93
RS
1878{
1879 /* Invariant loads need no special support. */
1880 if (memory_access_type == VMAT_INVARIANT)
1881 return;
1882
1883 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1884 machine_mode vecmode = TYPE_MODE (vectype);
1885 bool is_load = (vls_type == VLS_LOAD);
1886 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1887 {
1888 if (is_load
1889 ? !vect_load_lanes_supported (vectype, group_size, true)
1890 : !vect_store_lanes_supported (vectype, group_size, true))
1891 {
1892 if (dump_enabled_p ())
1893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1894 "can't use a fully-masked loop because the"
1895 " target doesn't have an appropriate masked"
1896 " load/store-lanes instruction.\n");
1897 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1898 return;
1899 }
1900 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1901 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1902 return;
1903 }
1904
bfaa08b7
RS
1905 if (memory_access_type == VMAT_GATHER_SCATTER)
1906 {
f307441a
RS
1907 internal_fn ifn = (is_load
1908 ? IFN_MASK_GATHER_LOAD
1909 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1910 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1911 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1912 gs_info->memory_type,
1913 TYPE_SIGN (offset_type),
1914 gs_info->scale))
1915 {
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "can't use a fully-masked loop because the"
1919 " target doesn't have an appropriate masked"
f307441a 1920 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1921 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1922 return;
1923 }
1924 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1925 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1926 return;
1927 }
1928
7cfb4d93
RS
1929 if (memory_access_type != VMAT_CONTIGUOUS
1930 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1931 {
1932 /* Element X of the data must come from iteration i * VF + X of the
1933 scalar loop. We need more work to support other mappings. */
1934 if (dump_enabled_p ())
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "can't use a fully-masked loop because an access"
1937 " isn't contiguous.\n");
1938 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1939 return;
1940 }
1941
1942 machine_mode mask_mode;
1943 if (!(targetm.vectorize.get_mask_mode
1944 (GET_MODE_NUNITS (vecmode),
1945 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1946 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1947 {
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1950 "can't use a fully-masked loop because the target"
1951 " doesn't have the appropriate masked load or"
1952 " store.\n");
1953 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1954 return;
1955 }
1956 /* We might load more scalars than we need for permuting SLP loads.
1957 We checked in get_group_load_store_type that the extra elements
1958 don't leak into a new vector. */
1959 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1960 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1961 unsigned int nvectors;
1962 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1963 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1964 else
1965 gcc_unreachable ();
1966}
1967
1968/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1969 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1970 that needs to be applied to all loads and stores in a vectorized loop.
1971 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1972
1973 MASK_TYPE is the type of both masks. If new statements are needed,
1974 insert them before GSI. */
1975
1976static tree
1977prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1978 gimple_stmt_iterator *gsi)
1979{
1980 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1981 if (!loop_mask)
1982 return vec_mask;
1983
1984 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1985 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1986 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1987 vec_mask, loop_mask);
1988 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1989 return and_res;
1990}
1991
429ef523
RS
1992/* Determine whether we can use a gather load or scatter store to vectorize
1993 strided load or store STMT by truncating the current offset to a smaller
1994 width. We need to be able to construct an offset vector:
1995
1996 { 0, X, X*2, X*3, ... }
1997
1998 without loss of precision, where X is STMT's DR_STEP.
1999
2000 Return true if this is possible, describing the gather load or scatter
2001 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2002
2003static bool
2004vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
2005 bool masked_p,
2006 gather_scatter_info *gs_info)
2007{
2008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2009 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2010 tree step = DR_STEP (dr);
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 {
2013 /* ??? Perhaps we could use range information here? */
2014 if (dump_enabled_p ())
2015 dump_printf_loc (MSG_NOTE, vect_location,
2016 "cannot truncate variable step.\n");
2017 return false;
2018 }
2019
2020 /* Get the number of bits in an element. */
2021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2022 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2023 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2024
2025 /* Set COUNT to the upper limit on the number of elements - 1.
2026 Start with the maximum vectorization factor. */
2027 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2028
2029 /* Try lowering COUNT to the number of scalar latch iterations. */
2030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2031 widest_int max_iters;
2032 if (max_loop_iterations (loop, &max_iters)
2033 && max_iters < count)
2034 count = max_iters.to_shwi ();
2035
2036 /* Try scales of 1 and the element size. */
2037 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2038 bool overflow_p = false;
2039 for (int i = 0; i < 2; ++i)
2040 {
2041 int scale = scales[i];
2042 widest_int factor;
2043 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2044 continue;
2045
2046 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2047 in OFFSET_BITS bits. */
2048 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2049 if (overflow_p)
2050 continue;
2051 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2052 if (wi::min_precision (range, sign) > element_bits)
2053 {
2054 overflow_p = true;
2055 continue;
2056 }
2057
2058 /* See whether the target supports the operation. */
2059 tree memory_type = TREE_TYPE (DR_REF (dr));
2060 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2061 memory_type, element_bits, sign, scale,
2062 &gs_info->ifn, &gs_info->element_type))
2063 continue;
2064
2065 tree offset_type = build_nonstandard_integer_type (element_bits,
2066 sign == UNSIGNED);
2067
2068 gs_info->decl = NULL_TREE;
2069 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2070 but we don't need to store that here. */
2071 gs_info->base = NULL_TREE;
2072 gs_info->offset = fold_convert (offset_type, step);
929b4411 2073 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2074 gs_info->offset_vectype = NULL_TREE;
2075 gs_info->scale = scale;
2076 gs_info->memory_type = memory_type;
2077 return true;
2078 }
2079
2080 if (overflow_p && dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "truncating gather/scatter offset to %d bits"
2083 " might change its value.\n", element_bits);
2084
2085 return false;
2086}
2087
ab2fc782
RS
2088/* Return true if we can use gather/scatter internal functions to
2089 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2090 MASKED_P is true if load or store is conditional. When returning
2091 true, fill in GS_INFO with the information required to perform the
2092 operation. */
ab2fc782
RS
2093
2094static bool
2095vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2096 bool masked_p,
ab2fc782
RS
2097 gather_scatter_info *gs_info)
2098{
2099 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2100 || gs_info->decl)
429ef523
RS
2101 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2102 masked_p, gs_info);
ab2fc782
RS
2103
2104 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2105 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2106 tree offset_type = TREE_TYPE (gs_info->offset);
2107 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2108
2109 /* Enforced by vect_check_gather_scatter. */
2110 gcc_assert (element_bits >= offset_bits);
2111
2112 /* If the elements are wider than the offset, convert the offset to the
2113 same width, without changing its sign. */
2114 if (element_bits > offset_bits)
2115 {
2116 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2117 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2118 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2119 }
2120
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_NOTE, vect_location,
2123 "using gather/scatter for strided/grouped access,"
2124 " scale = %d\n", gs_info->scale);
2125
2126 return true;
2127}
2128
62da9e14
RS
2129/* STMT is a non-strided load or store, meaning that it accesses
2130 elements with a known constant step. Return -1 if that step
2131 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2132
2133static int
2134compare_step_with_zero (gimple *stmt)
2135{
2136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2137 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2138 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2139 size_zero_node);
62da9e14
RS
2140}
2141
2142/* If the target supports a permute mask that reverses the elements in
2143 a vector of type VECTYPE, return that mask, otherwise return null. */
2144
2145static tree
2146perm_mask_for_reverse (tree vectype)
2147{
928686b1 2148 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2149
d980067b
RS
2150 /* The encoding has a single stepped pattern. */
2151 vec_perm_builder sel (nunits, 1, 3);
928686b1 2152 for (int i = 0; i < 3; ++i)
908a1a16 2153 sel.quick_push (nunits - 1 - i);
62da9e14 2154
e3342de4
RS
2155 vec_perm_indices indices (sel, 1, nunits);
2156 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2157 return NULL_TREE;
e3342de4 2158 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2159}
5ce9450f 2160
c3a8f964
RS
2161/* STMT is either a masked or unconditional store. Return the value
2162 being stored. */
2163
f307441a 2164tree
c3a8f964
RS
2165vect_get_store_rhs (gimple *stmt)
2166{
2167 if (gassign *assign = dyn_cast <gassign *> (stmt))
2168 {
2169 gcc_assert (gimple_assign_single_p (assign));
2170 return gimple_assign_rhs1 (assign);
2171 }
2172 if (gcall *call = dyn_cast <gcall *> (stmt))
2173 {
2174 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2175 int index = internal_fn_stored_value_index (ifn);
2176 gcc_assert (index >= 0);
2177 return gimple_call_arg (stmt, index);
c3a8f964
RS
2178 }
2179 gcc_unreachable ();
2180}
2181
2de001ee
RS
2182/* A subroutine of get_load_store_type, with a subset of the same
2183 arguments. Handle the case where STMT is part of a grouped load
2184 or store.
2185
2186 For stores, the statements in the group are all consecutive
2187 and there is no gap at the end. For loads, the statements in the
2188 group might not be consecutive; there can be gaps between statements
2189 as well as at the end. */
2190
2191static bool
2192get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2193 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2194 vect_memory_access_type *memory_access_type,
2195 gather_scatter_info *gs_info)
2de001ee
RS
2196{
2197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2198 vec_info *vinfo = stmt_info->vinfo;
2199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2200 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2c53b149 2201 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2202 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 2203 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2de001ee 2204 bool single_element_p = (stmt == first_stmt
2c53b149
RB
2205 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2206 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2207 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2208
2209 /* True if the vectorized statements would access beyond the last
2210 statement in the group. */
2211 bool overrun_p = false;
2212
2213 /* True if we can cope with such overrun by peeling for gaps, so that
2214 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2215 bool can_overrun_p = (!masked_p
2216 && vls_type == VLS_LOAD
2217 && loop_vinfo
2218 && !loop->inner);
2de001ee
RS
2219
2220 /* There can only be a gap at the end of the group if the stride is
2221 known at compile time. */
2222 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2223
2224 /* Stores can't yet have gaps. */
2225 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2226
2227 if (slp)
2228 {
2229 if (STMT_VINFO_STRIDED_P (stmt_info))
2230 {
2c53b149 2231 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2232 separated by the stride, until we have a complete vector.
2233 Fall back to scalar accesses if that isn't possible. */
928686b1 2234 if (multiple_p (nunits, group_size))
2de001ee
RS
2235 *memory_access_type = VMAT_STRIDED_SLP;
2236 else
2237 *memory_access_type = VMAT_ELEMENTWISE;
2238 }
2239 else
2240 {
2241 overrun_p = loop_vinfo && gap != 0;
2242 if (overrun_p && vls_type != VLS_LOAD)
2243 {
2244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2245 "Grouped store with gaps requires"
2246 " non-consecutive accesses\n");
2247 return false;
2248 }
f702e7d4
RS
2249 /* An overrun is fine if the trailing elements are smaller
2250 than the alignment boundary B. Every vector access will
2251 be a multiple of B and so we are guaranteed to access a
2252 non-gap element in the same B-sized block. */
f9ef2c76 2253 if (overrun_p
f702e7d4
RS
2254 && gap < (vect_known_alignment_in_bytes (first_dr)
2255 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2256 overrun_p = false;
2de001ee
RS
2257 if (overrun_p && !can_overrun_p)
2258 {
2259 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2261 "Peeling for outer loop is not supported\n");
2262 return false;
2263 }
2264 *memory_access_type = VMAT_CONTIGUOUS;
2265 }
2266 }
2267 else
2268 {
2269 /* We can always handle this case using elementwise accesses,
2270 but see if something more efficient is available. */
2271 *memory_access_type = VMAT_ELEMENTWISE;
2272
2273 /* If there is a gap at the end of the group then these optimizations
2274 would access excess elements in the last iteration. */
2275 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2276 /* An overrun is fine if the trailing elements are smaller than the
2277 alignment boundary B. Every vector access will be a multiple of B
2278 and so we are guaranteed to access a non-gap element in the
2279 same B-sized block. */
f9ef2c76 2280 if (would_overrun_p
7e11fc7f 2281 && !masked_p
f702e7d4
RS
2282 && gap < (vect_known_alignment_in_bytes (first_dr)
2283 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2284 would_overrun_p = false;
f702e7d4 2285
2de001ee 2286 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2287 && (can_overrun_p || !would_overrun_p)
2288 && compare_step_with_zero (stmt) > 0)
2de001ee 2289 {
6737facb
RS
2290 /* First cope with the degenerate case of a single-element
2291 vector. */
2292 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2293 *memory_access_type = VMAT_CONTIGUOUS;
2294
2295 /* Otherwise try using LOAD/STORE_LANES. */
2296 if (*memory_access_type == VMAT_ELEMENTWISE
2297 && (vls_type == VLS_LOAD
7e11fc7f
RS
2298 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2299 : vect_store_lanes_supported (vectype, group_size,
2300 masked_p)))
2de001ee
RS
2301 {
2302 *memory_access_type = VMAT_LOAD_STORE_LANES;
2303 overrun_p = would_overrun_p;
2304 }
2305
2306 /* If that fails, try using permuting loads. */
2307 if (*memory_access_type == VMAT_ELEMENTWISE
2308 && (vls_type == VLS_LOAD
2309 ? vect_grouped_load_supported (vectype, single_element_p,
2310 group_size)
2311 : vect_grouped_store_supported (vectype, group_size)))
2312 {
2313 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2314 overrun_p = would_overrun_p;
2315 }
2316 }
429ef523
RS
2317
2318 /* As a last resort, trying using a gather load or scatter store.
2319
2320 ??? Although the code can handle all group sizes correctly,
2321 it probably isn't a win to use separate strided accesses based
2322 on nearby locations. Or, even if it's a win over scalar code,
2323 it might not be a win over vectorizing at a lower VF, if that
2324 allows us to use contiguous accesses. */
2325 if (*memory_access_type == VMAT_ELEMENTWISE
2326 && single_element_p
2327 && loop_vinfo
2328 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2329 masked_p, gs_info))
2330 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2331 }
2332
2333 if (vls_type != VLS_LOAD && first_stmt == stmt)
2334 {
2335 /* STMT is the leader of the group. Check the operands of all the
2336 stmts of the group. */
2c53b149 2337 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2de001ee
RS
2338 while (next_stmt)
2339 {
7e11fc7f 2340 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
2341 gimple *def_stmt;
2342 enum vect_def_type dt;
2343 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2344 {
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "use not simple.\n");
2348 return false;
2349 }
2c53b149 2350 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2de001ee
RS
2351 }
2352 }
2353
2354 if (overrun_p)
2355 {
2356 gcc_assert (can_overrun_p);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "Data access with gaps requires scalar "
2360 "epilogue loop\n");
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2362 }
2363
2364 return true;
2365}
2366
62da9e14
RS
2367/* A subroutine of get_load_store_type, with a subset of the same
2368 arguments. Handle the case where STMT is a load or store that
2369 accesses consecutive elements with a negative step. */
2370
2371static vect_memory_access_type
2372get_negative_load_store_type (gimple *stmt, tree vectype,
2373 vec_load_store_type vls_type,
2374 unsigned int ncopies)
2375{
2376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2377 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2378 dr_alignment_support alignment_support_scheme;
2379
2380 if (ncopies > 1)
2381 {
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384 "multiple types with negative step.\n");
2385 return VMAT_ELEMENTWISE;
2386 }
2387
2388 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2389 if (alignment_support_scheme != dr_aligned
2390 && alignment_support_scheme != dr_unaligned_supported)
2391 {
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "negative step but alignment required.\n");
2395 return VMAT_ELEMENTWISE;
2396 }
2397
2398 if (vls_type == VLS_STORE_INVARIANT)
2399 {
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE, vect_location,
2402 "negative step with invariant source;"
2403 " no permute needed.\n");
2404 return VMAT_CONTIGUOUS_DOWN;
2405 }
2406
2407 if (!perm_mask_for_reverse (vectype))
2408 {
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "negative step and reversing not supported.\n");
2412 return VMAT_ELEMENTWISE;
2413 }
2414
2415 return VMAT_CONTIGUOUS_REVERSE;
2416}
2417
2de001ee
RS
2418/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2419 if there is a memory access type that the vectorized form can use,
2420 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2421 or scatters, fill in GS_INFO accordingly.
2422
2423 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2424 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2425 VECTYPE is the vector type that the vectorized statements will use.
2426 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2427
2428static bool
7e11fc7f 2429get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2430 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2431 vect_memory_access_type *memory_access_type,
2432 gather_scatter_info *gs_info)
2433{
2434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2435 vec_info *vinfo = stmt_info->vinfo;
2436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2437 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2438 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2439 {
2440 *memory_access_type = VMAT_GATHER_SCATTER;
2441 gimple *def_stmt;
2442 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2443 gcc_unreachable ();
2444 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2445 &gs_info->offset_dt,
2446 &gs_info->offset_vectype))
2447 {
2448 if (dump_enabled_p ())
2449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2450 "%s index use not simple.\n",
2451 vls_type == VLS_LOAD ? "gather" : "scatter");
2452 return false;
2453 }
2454 }
2455 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2456 {
7e11fc7f 2457 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2458 memory_access_type, gs_info))
2de001ee
RS
2459 return false;
2460 }
2461 else if (STMT_VINFO_STRIDED_P (stmt_info))
2462 {
2463 gcc_assert (!slp);
ab2fc782 2464 if (loop_vinfo
429ef523
RS
2465 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2466 masked_p, gs_info))
ab2fc782
RS
2467 *memory_access_type = VMAT_GATHER_SCATTER;
2468 else
2469 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2470 }
2471 else
62da9e14
RS
2472 {
2473 int cmp = compare_step_with_zero (stmt);
2474 if (cmp < 0)
2475 *memory_access_type = get_negative_load_store_type
2476 (stmt, vectype, vls_type, ncopies);
2477 else if (cmp == 0)
2478 {
2479 gcc_assert (vls_type == VLS_LOAD);
2480 *memory_access_type = VMAT_INVARIANT;
2481 }
2482 else
2483 *memory_access_type = VMAT_CONTIGUOUS;
2484 }
2de001ee 2485
4d694b27
RS
2486 if ((*memory_access_type == VMAT_ELEMENTWISE
2487 || *memory_access_type == VMAT_STRIDED_SLP)
2488 && !nunits.is_constant ())
2489 {
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2494 return false;
2495 }
2496
2de001ee
RS
2497 /* FIXME: At the moment the cost model seems to underestimate the
2498 cost of using elementwise accesses. This check preserves the
2499 traditional behavior until that can be fixed. */
2500 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2501 && !STMT_VINFO_STRIDED_P (stmt_info)
2c53b149
RB
2502 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2503 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2504 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2505 {
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2508 "not falling back to elementwise accesses\n");
2509 return false;
2510 }
2511 return true;
2512}
2513
aaeefd88 2514/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2515 conditional load or store STMT. When returning true, store the type
2516 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2517 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2518
2519static bool
929b4411
RS
2520vect_check_load_store_mask (gimple *stmt, tree mask,
2521 vect_def_type *mask_dt_out,
2522 tree *mask_vectype_out)
aaeefd88
RS
2523{
2524 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2525 {
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not a boolean.\n");
2529 return false;
2530 }
2531
2532 if (TREE_CODE (mask) != SSA_NAME)
2533 {
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "mask argument is not an SSA name.\n");
2537 return false;
2538 }
2539
2540 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2541 gimple *def_stmt;
929b4411 2542 enum vect_def_type mask_dt;
aaeefd88 2543 tree mask_vectype;
929b4411 2544 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
aaeefd88
RS
2545 &mask_vectype))
2546 {
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "mask use not simple.\n");
2550 return false;
2551 }
2552
2553 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2554 if (!mask_vectype)
2555 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2556
2557 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2558 {
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "could not find an appropriate vector mask type.\n");
2562 return false;
2563 }
2564
2565 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2566 TYPE_VECTOR_SUBPARTS (vectype)))
2567 {
2568 if (dump_enabled_p ())
2569 {
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 "vector mask type ");
2572 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2573 dump_printf (MSG_MISSED_OPTIMIZATION,
2574 " does not match vector data type ");
2575 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2576 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2577 }
2578 return false;
2579 }
2580
929b4411 2581 *mask_dt_out = mask_dt;
aaeefd88
RS
2582 *mask_vectype_out = mask_vectype;
2583 return true;
2584}
2585
3133c3b6
RS
2586/* Return true if stored value RHS is suitable for vectorizing store
2587 statement STMT. When returning true, store the type of the
929b4411
RS
2588 definition in *RHS_DT_OUT, the type of the vectorized store value in
2589 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2590
2591static bool
929b4411
RS
2592vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2593 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2594{
2595 /* In the case this is a store from a constant make sure
2596 native_encode_expr can handle it. */
2597 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2598 {
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2601 "cannot encode constant as a byte sequence.\n");
2602 return false;
2603 }
2604
2605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2606 gimple *def_stmt;
929b4411 2607 enum vect_def_type rhs_dt;
3133c3b6 2608 tree rhs_vectype;
929b4411 2609 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
3133c3b6
RS
2610 &rhs_vectype))
2611 {
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2614 "use not simple.\n");
2615 return false;
2616 }
2617
2618 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2619 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2620 {
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2623 "incompatible vector types.\n");
2624 return false;
2625 }
2626
929b4411 2627 *rhs_dt_out = rhs_dt;
3133c3b6 2628 *rhs_vectype_out = rhs_vectype;
929b4411 2629 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2630 *vls_type_out = VLS_STORE_INVARIANT;
2631 else
2632 *vls_type_out = VLS_STORE;
2633 return true;
2634}
2635
bc9587eb
RS
2636/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2637 Note that we support masks with floating-point type, in which case the
2638 floats are interpreted as a bitmask. */
2639
2640static tree
2641vect_build_all_ones_mask (gimple *stmt, tree masktype)
2642{
2643 if (TREE_CODE (masktype) == INTEGER_TYPE)
2644 return build_int_cst (masktype, -1);
2645 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2646 {
2647 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2648 mask = build_vector_from_val (masktype, mask);
2649 return vect_init_vector (stmt, mask, masktype, NULL);
2650 }
2651 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2652 {
2653 REAL_VALUE_TYPE r;
2654 long tmp[6];
2655 for (int j = 0; j < 6; ++j)
2656 tmp[j] = -1;
2657 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2658 tree mask = build_real (TREE_TYPE (masktype), r);
2659 mask = build_vector_from_val (masktype, mask);
2660 return vect_init_vector (stmt, mask, masktype, NULL);
2661 }
2662 gcc_unreachable ();
2663}
2664
2665/* Build an all-zero merge value of type VECTYPE while vectorizing
2666 STMT as a gather load. */
2667
2668static tree
2669vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2670{
2671 tree merge;
2672 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2673 merge = build_int_cst (TREE_TYPE (vectype), 0);
2674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2675 {
2676 REAL_VALUE_TYPE r;
2677 long tmp[6];
2678 for (int j = 0; j < 6; ++j)
2679 tmp[j] = 0;
2680 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2681 merge = build_real (TREE_TYPE (vectype), r);
2682 }
2683 else
2684 gcc_unreachable ();
2685 merge = build_vector_from_val (vectype, merge);
2686 return vect_init_vector (stmt, merge, vectype, NULL);
2687}
2688
c48d2d35
RS
2689/* Build a gather load call while vectorizing STMT. Insert new instructions
2690 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2691 operation. If the load is conditional, MASK is the unvectorized
929b4411 2692 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2693
2694static void
2695vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2696 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2697 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2698{
2699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2701 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2702 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2703 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2704 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2705 edge pe = loop_preheader_edge (loop);
2706 enum { NARROW, NONE, WIDEN } modifier;
2707 poly_uint64 gather_off_nunits
2708 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2709
2710 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2711 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2712 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2713 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2714 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2715 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2716 tree scaletype = TREE_VALUE (arglist);
2717 gcc_checking_assert (types_compatible_p (srctype, rettype)
2718 && (!mask || types_compatible_p (srctype, masktype)));
2719
2720 tree perm_mask = NULL_TREE;
2721 tree mask_perm_mask = NULL_TREE;
2722 if (known_eq (nunits, gather_off_nunits))
2723 modifier = NONE;
2724 else if (known_eq (nunits * 2, gather_off_nunits))
2725 {
2726 modifier = WIDEN;
2727
2728 /* Currently widening gathers and scatters are only supported for
2729 fixed-length vectors. */
2730 int count = gather_off_nunits.to_constant ();
2731 vec_perm_builder sel (count, count, 1);
2732 for (int i = 0; i < count; ++i)
2733 sel.quick_push (i | (count / 2));
2734
2735 vec_perm_indices indices (sel, 1, count);
2736 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2737 indices);
2738 }
2739 else if (known_eq (nunits, gather_off_nunits * 2))
2740 {
2741 modifier = NARROW;
2742
2743 /* Currently narrowing gathers and scatters are only supported for
2744 fixed-length vectors. */
2745 int count = nunits.to_constant ();
2746 vec_perm_builder sel (count, count, 1);
2747 sel.quick_grow (count);
2748 for (int i = 0; i < count; ++i)
2749 sel[i] = i < count / 2 ? i : i + count / 2;
2750 vec_perm_indices indices (sel, 2, count);
2751 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2752
2753 ncopies *= 2;
2754
2755 if (mask)
2756 {
2757 for (int i = 0; i < count; ++i)
2758 sel[i] = i | (count / 2);
2759 indices.new_vector (sel, 2, count);
2760 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2761 }
2762 }
2763 else
2764 gcc_unreachable ();
2765
2766 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2767 vectype);
2768
2769 tree ptr = fold_convert (ptrtype, gs_info->base);
2770 if (!is_gimple_min_invariant (ptr))
2771 {
2772 gimple_seq seq;
2773 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2774 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2775 gcc_assert (!new_bb);
2776 }
2777
2778 tree scale = build_int_cst (scaletype, gs_info->scale);
2779
2780 tree vec_oprnd0 = NULL_TREE;
2781 tree vec_mask = NULL_TREE;
2782 tree src_op = NULL_TREE;
2783 tree mask_op = NULL_TREE;
2784 tree prev_res = NULL_TREE;
2785 stmt_vec_info prev_stmt_info = NULL;
2786
2787 if (!mask)
2788 {
2789 src_op = vect_build_zero_merge_argument (stmt, rettype);
2790 mask_op = vect_build_all_ones_mask (stmt, masktype);
2791 }
2792
2793 for (int j = 0; j < ncopies; ++j)
2794 {
2795 tree op, var;
2796 gimple *new_stmt;
2797 if (modifier == WIDEN && (j & 1))
2798 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2799 perm_mask, stmt, gsi);
2800 else if (j == 0)
2801 op = vec_oprnd0
2802 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2803 else
2804 op = vec_oprnd0
2805 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2806
2807 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2808 {
2809 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2810 TYPE_VECTOR_SUBPARTS (idxtype)));
2811 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2812 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2813 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2815 op = var;
2816 }
2817
2818 if (mask)
2819 {
2820 if (mask_perm_mask && (j & 1))
2821 mask_op = permute_vec_elements (mask_op, mask_op,
2822 mask_perm_mask, stmt, gsi);
2823 else
2824 {
2825 if (j == 0)
2826 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2827 else
929b4411 2828 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2829
2830 mask_op = vec_mask;
2831 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2832 {
2833 gcc_assert
2834 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2835 TYPE_VECTOR_SUBPARTS (masktype)));
2836 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2837 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2838 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2839 mask_op);
2840 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2841 mask_op = var;
2842 }
2843 }
2844 src_op = mask_op;
2845 }
2846
2847 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2848 mask_op, scale);
2849
2850 if (!useless_type_conversion_p (vectype, rettype))
2851 {
2852 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2853 TYPE_VECTOR_SUBPARTS (rettype)));
2854 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2855 gimple_call_set_lhs (new_stmt, op);
2856 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2857 var = make_ssa_name (vec_dest);
2858 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2859 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2860 }
2861 else
2862 {
2863 var = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, var);
2865 }
2866
2867 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2868
2869 if (modifier == NARROW)
2870 {
2871 if ((j & 1) == 0)
2872 {
2873 prev_res = var;
2874 continue;
2875 }
2876 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2877 new_stmt = SSA_NAME_DEF_STMT (var);
2878 }
2879
2880 if (prev_stmt_info == NULL)
2881 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2882 else
2883 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2884 prev_stmt_info = vinfo_for_stmt (new_stmt);
2885 }
2886}
2887
bfaa08b7
RS
2888/* Prepare the base and offset in GS_INFO for vectorization.
2889 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2890 to the vectorized offset argument for the first copy of STMT. STMT
2891 is the statement described by GS_INFO and LOOP is the containing loop. */
2892
2893static void
2894vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2895 gather_scatter_info *gs_info,
2896 tree *dataref_ptr, tree *vec_offset)
2897{
2898 gimple_seq stmts = NULL;
2899 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2900 if (stmts != NULL)
2901 {
2902 basic_block new_bb;
2903 edge pe = loop_preheader_edge (loop);
2904 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2905 gcc_assert (!new_bb);
2906 }
2907 tree offset_type = TREE_TYPE (gs_info->offset);
2908 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2909 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2910 offset_vectype);
2911}
2912
ab2fc782
RS
2913/* Prepare to implement a grouped or strided load or store using
2914 the gather load or scatter store operation described by GS_INFO.
2915 STMT is the load or store statement.
2916
2917 Set *DATAREF_BUMP to the amount that should be added to the base
2918 address after each copy of the vectorized statement. Set *VEC_OFFSET
2919 to an invariant offset vector in which element I has the value
2920 I * DR_STEP / SCALE. */
2921
2922static void
2923vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2924 gather_scatter_info *gs_info,
2925 tree *dataref_bump, tree *vec_offset)
2926{
2927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2928 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2929 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2931 gimple_seq stmts;
2932
2933 tree bump = size_binop (MULT_EXPR,
2934 fold_convert (sizetype, DR_STEP (dr)),
2935 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2936 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2937 if (stmts)
2938 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2939
2940 /* The offset given in GS_INFO can have pointer type, so use the element
2941 type of the vector instead. */
2942 tree offset_type = TREE_TYPE (gs_info->offset);
2943 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2944 offset_type = TREE_TYPE (offset_vectype);
2945
2946 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2947 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2948 ssize_int (gs_info->scale));
2949 step = fold_convert (offset_type, step);
2950 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2951
2952 /* Create {0, X, X*2, X*3, ...}. */
2953 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2954 build_zero_cst (offset_type), step);
2955 if (stmts)
2956 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2957}
2958
2959/* Return the amount that should be added to a vector pointer to move
2960 to the next or previous copy of AGGR_TYPE. DR is the data reference
2961 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2962 vectorization. */
2963
2964static tree
2965vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2966 vect_memory_access_type memory_access_type)
2967{
2968 if (memory_access_type == VMAT_INVARIANT)
2969 return size_zero_node;
2970
2971 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2972 tree step = vect_dr_behavior (dr)->step;
2973 if (tree_int_cst_sgn (step) == -1)
2974 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2975 return iv_step;
2976}
2977
37b14185
RB
2978/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2979
2980static bool
2981vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2982 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2983 tree vectype_in, enum vect_def_type *dt,
2984 stmt_vector_for_cost *cost_vec)
37b14185
RB
2985{
2986 tree op, vectype;
2987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2989 unsigned ncopies;
2990 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2991
2992 op = gimple_call_arg (stmt, 0);
2993 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2994
2995 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2996 return false;
37b14185
RB
2997
2998 /* Multiple types in SLP are handled by creating the appropriate number of
2999 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3000 case of SLP. */
3001 if (slp_node)
3002 ncopies = 1;
3003 else
e8f142e2 3004 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
3005
3006 gcc_assert (ncopies >= 1);
3007
3008 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3009 if (! char_vectype)
3010 return false;
3011
928686b1
RS
3012 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3013 return false;
3014
794e3180 3015 unsigned word_bytes = num_bytes / nunits;
908a1a16 3016
d980067b
RS
3017 /* The encoding uses one stepped pattern for each byte in the word. */
3018 vec_perm_builder elts (num_bytes, word_bytes, 3);
3019 for (unsigned i = 0; i < 3; ++i)
37b14185 3020 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3021 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3022
e3342de4
RS
3023 vec_perm_indices indices (elts, 1, num_bytes);
3024 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3025 return false;
3026
3027 if (! vec_stmt)
3028 {
3029 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3030 if (dump_enabled_p ())
3031 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
3032 "\n");
78604de0 3033 if (! slp_node)
37b14185 3034 {
68435eb2
RB
3035 record_stmt_cost (cost_vec,
3036 1, vector_stmt, stmt_info, 0, vect_prologue);
3037 record_stmt_cost (cost_vec,
3038 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3039 }
3040 return true;
3041 }
3042
736d0f28 3043 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3044
3045 /* Transform. */
3046 vec<tree> vec_oprnds = vNULL;
3047 gimple *new_stmt = NULL;
3048 stmt_vec_info prev_stmt_info = NULL;
3049 for (unsigned j = 0; j < ncopies; j++)
3050 {
3051 /* Handle uses. */
3052 if (j == 0)
306b0c92 3053 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3054 else
3055 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3056
3057 /* Arguments are ready. create the new vector stmt. */
3058 unsigned i;
3059 tree vop;
3060 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3061 {
3062 tree tem = make_ssa_name (char_vectype);
3063 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3064 char_vectype, vop));
3065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3066 tree tem2 = make_ssa_name (char_vectype);
3067 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3068 tem, tem, bswap_vconst);
3069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3070 tem = make_ssa_name (vectype);
3071 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3072 vectype, tem2));
3073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3074 if (slp_node)
3075 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3076 }
3077
3078 if (slp_node)
3079 continue;
3080
3081 if (j == 0)
3082 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3083 else
3084 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3085
3086 prev_stmt_info = vinfo_for_stmt (new_stmt);
3087 }
3088
3089 vec_oprnds.release ();
3090 return true;
3091}
3092
b1b6836e
RS
3093/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3094 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3095 in a single step. On success, store the binary pack code in
3096 *CONVERT_CODE. */
3097
3098static bool
3099simple_integer_narrowing (tree vectype_out, tree vectype_in,
3100 tree_code *convert_code)
3101{
3102 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3103 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3104 return false;
3105
3106 tree_code code;
3107 int multi_step_cvt = 0;
3108 auto_vec <tree, 8> interm_types;
3109 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3110 &code, &multi_step_cvt,
3111 &interm_types)
3112 || multi_step_cvt)
3113 return false;
3114
3115 *convert_code = code;
3116 return true;
3117}
5ce9450f 3118
ebfd146a
IR
3119/* Function vectorizable_call.
3120
538dd0b7 3121 Check if GS performs a function call that can be vectorized.
b8698a0f 3122 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3123 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3124 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3125
3126static bool
355fe088 3127vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3128 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3129{
538dd0b7 3130 gcall *stmt;
ebfd146a
IR
3131 tree vec_dest;
3132 tree scalar_dest;
3133 tree op, type;
3134 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3135 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3136 tree vectype_out, vectype_in;
c7bda0f4
RS
3137 poly_uint64 nunits_in;
3138 poly_uint64 nunits_out;
ebfd146a 3139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3140 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3141 vec_info *vinfo = stmt_info->vinfo;
81c40241 3142 tree fndecl, new_temp, rhs_type;
355fe088 3143 gimple *def_stmt;
0502fb85
UB
3144 enum vect_def_type dt[3]
3145 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3146 int ndts = 3;
355fe088 3147 gimple *new_stmt = NULL;
ebfd146a 3148 int ncopies, j;
6e1aa848 3149 vec<tree> vargs = vNULL;
ebfd146a
IR
3150 enum { NARROW, NONE, WIDEN } modifier;
3151 size_t i, nargs;
9d5e7640 3152 tree lhs;
ebfd146a 3153
190c2236 3154 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3155 return false;
3156
66c16fd9
RB
3157 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3158 && ! vec_stmt)
ebfd146a
IR
3159 return false;
3160
538dd0b7
DM
3161 /* Is GS a vectorizable call? */
3162 stmt = dyn_cast <gcall *> (gs);
3163 if (!stmt)
ebfd146a
IR
3164 return false;
3165
5ce9450f 3166 if (gimple_call_internal_p (stmt)
bfaa08b7 3167 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3168 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3169 /* Handled by vectorizable_load and vectorizable_store. */
3170 return false;
5ce9450f 3171
0136f8f0
AH
3172 if (gimple_call_lhs (stmt) == NULL_TREE
3173 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3174 return false;
3175
0136f8f0 3176 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3177
b690cc0f
RG
3178 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3179
ebfd146a
IR
3180 /* Process function arguments. */
3181 rhs_type = NULL_TREE;
b690cc0f 3182 vectype_in = NULL_TREE;
ebfd146a
IR
3183 nargs = gimple_call_num_args (stmt);
3184
1b1562a5
MM
3185 /* Bail out if the function has more than three arguments, we do not have
3186 interesting builtin functions to vectorize with more than two arguments
3187 except for fma. No arguments is also not good. */
3188 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3189 return false;
3190
74bf76ed
JJ
3191 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3192 if (gimple_call_internal_p (stmt)
3193 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3194 {
3195 nargs = 0;
3196 rhs_type = unsigned_type_node;
3197 }
3198
ebfd146a
IR
3199 for (i = 0; i < nargs; i++)
3200 {
b690cc0f
RG
3201 tree opvectype;
3202
ebfd146a
IR
3203 op = gimple_call_arg (stmt, i);
3204
3205 /* We can only handle calls with arguments of the same type. */
3206 if (rhs_type
8533c9d8 3207 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3208 {
73fbfcad 3209 if (dump_enabled_p ())
78c60e3d 3210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3211 "argument types differ.\n");
ebfd146a
IR
3212 return false;
3213 }
b690cc0f
RG
3214 if (!rhs_type)
3215 rhs_type = TREE_TYPE (op);
ebfd146a 3216
81c40241 3217 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 3218 {
73fbfcad 3219 if (dump_enabled_p ())
78c60e3d 3220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3221 "use not simple.\n");
ebfd146a
IR
3222 return false;
3223 }
ebfd146a 3224
b690cc0f
RG
3225 if (!vectype_in)
3226 vectype_in = opvectype;
3227 else if (opvectype
3228 && opvectype != vectype_in)
3229 {
73fbfcad 3230 if (dump_enabled_p ())
78c60e3d 3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3232 "argument vector types differ.\n");
b690cc0f
RG
3233 return false;
3234 }
3235 }
3236 /* If all arguments are external or constant defs use a vector type with
3237 the same size as the output vector type. */
ebfd146a 3238 if (!vectype_in)
b690cc0f 3239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3240 if (vec_stmt)
3241 gcc_assert (vectype_in);
3242 if (!vectype_in)
3243 {
73fbfcad 3244 if (dump_enabled_p ())
7d8930a0 3245 {
78c60e3d
SS
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "no vectype for scalar type ");
3248 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3249 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3250 }
3251
3252 return false;
3253 }
ebfd146a
IR
3254
3255 /* FORNOW */
b690cc0f
RG
3256 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3257 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3258 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3259 modifier = NARROW;
c7bda0f4 3260 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3261 modifier = NONE;
c7bda0f4 3262 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3263 modifier = WIDEN;
3264 else
3265 return false;
3266
70439f0d
RS
3267 /* We only handle functions that do not read or clobber memory. */
3268 if (gimple_vuse (stmt))
3269 {
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3272 "function reads from or writes to memory.\n");
3273 return false;
3274 }
3275
ebfd146a
IR
3276 /* For now, we only vectorize functions if a target specific builtin
3277 is available. TODO -- in some cases, it might be profitable to
3278 insert the calls for pieces of the vector, in order to be able
3279 to vectorize other operations in the loop. */
70439f0d
RS
3280 fndecl = NULL_TREE;
3281 internal_fn ifn = IFN_LAST;
3282 combined_fn cfn = gimple_call_combined_fn (stmt);
3283 tree callee = gimple_call_fndecl (stmt);
3284
3285 /* First try using an internal function. */
b1b6836e
RS
3286 tree_code convert_code = ERROR_MARK;
3287 if (cfn != CFN_LAST
3288 && (modifier == NONE
3289 || (modifier == NARROW
3290 && simple_integer_narrowing (vectype_out, vectype_in,
3291 &convert_code))))
70439f0d
RS
3292 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3293 vectype_in);
3294
3295 /* If that fails, try asking for a target-specific built-in function. */
3296 if (ifn == IFN_LAST)
3297 {
3298 if (cfn != CFN_LAST)
3299 fndecl = targetm.vectorize.builtin_vectorized_function
3300 (cfn, vectype_out, vectype_in);
7672aa9b 3301 else if (callee)
70439f0d
RS
3302 fndecl = targetm.vectorize.builtin_md_vectorized_function
3303 (callee, vectype_out, vectype_in);
3304 }
3305
3306 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3307 {
70439f0d 3308 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3309 && !slp_node
3310 && loop_vinfo
3311 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3312 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3313 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3314 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3315 {
3316 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3317 { 0, 1, 2, ... vf - 1 } vector. */
3318 gcc_assert (nargs == 0);
3319 }
37b14185
RB
3320 else if (modifier == NONE
3321 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3322 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3323 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3324 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3325 vectype_in, dt, cost_vec);
74bf76ed
JJ
3326 else
3327 {
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3330 "function is not vectorizable.\n");
74bf76ed
JJ
3331 return false;
3332 }
ebfd146a
IR
3333 }
3334
fce57248 3335 if (slp_node)
190c2236 3336 ncopies = 1;
b1b6836e 3337 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3338 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3339 else
e8f142e2 3340 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3341
3342 /* Sanity check: make sure that at least one copy of the vectorized stmt
3343 needs to be generated. */
3344 gcc_assert (ncopies >= 1);
3345
3346 if (!vec_stmt) /* transformation not required. */
3347 {
3348 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 3349 if (dump_enabled_p ())
e645e942
TJ
3350 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3351 "\n");
68435eb2
RB
3352 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3353 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3354 record_stmt_cost (cost_vec, ncopies / 2,
3355 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3356
ebfd146a
IR
3357 return true;
3358 }
3359
67b8dbac 3360 /* Transform. */
ebfd146a 3361
73fbfcad 3362 if (dump_enabled_p ())
e645e942 3363 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3364
3365 /* Handle def. */
3366 scalar_dest = gimple_call_lhs (stmt);
3367 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3368
3369 prev_stmt_info = NULL;
b1b6836e 3370 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3371 {
b1b6836e 3372 tree prev_res = NULL_TREE;
ebfd146a
IR
3373 for (j = 0; j < ncopies; ++j)
3374 {
3375 /* Build argument list for the vectorized call. */
3376 if (j == 0)
9771b263 3377 vargs.create (nargs);
ebfd146a 3378 else
9771b263 3379 vargs.truncate (0);
ebfd146a 3380
190c2236
JJ
3381 if (slp_node)
3382 {
ef062b13 3383 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3384 vec<tree> vec_oprnds0;
190c2236
JJ
3385
3386 for (i = 0; i < nargs; i++)
9771b263 3387 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3388 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3389 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3390
3391 /* Arguments are ready. Create the new vector stmt. */
9771b263 3392 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3393 {
3394 size_t k;
3395 for (k = 0; k < nargs; k++)
3396 {
37b5ec8f 3397 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3398 vargs[k] = vec_oprndsk[i];
190c2236 3399 }
b1b6836e
RS
3400 if (modifier == NARROW)
3401 {
3402 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3403 gcall *call
3404 = gimple_build_call_internal_vec (ifn, vargs);
3405 gimple_call_set_lhs (call, half_res);
3406 gimple_call_set_nothrow (call, true);
3407 new_stmt = call;
b1b6836e
RS
3408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3409 if ((i & 1) == 0)
3410 {
3411 prev_res = half_res;
3412 continue;
3413 }
3414 new_temp = make_ssa_name (vec_dest);
3415 new_stmt = gimple_build_assign (new_temp, convert_code,
3416 prev_res, half_res);
3417 }
70439f0d 3418 else
b1b6836e 3419 {
a844293d 3420 gcall *call;
b1b6836e 3421 if (ifn != IFN_LAST)
a844293d 3422 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3423 else
a844293d
RS
3424 call = gimple_build_call_vec (fndecl, vargs);
3425 new_temp = make_ssa_name (vec_dest, call);
3426 gimple_call_set_lhs (call, new_temp);
3427 gimple_call_set_nothrow (call, true);
3428 new_stmt = call;
b1b6836e 3429 }
190c2236 3430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3431 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3432 }
3433
3434 for (i = 0; i < nargs; i++)
3435 {
37b5ec8f 3436 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3437 vec_oprndsi.release ();
190c2236 3438 }
190c2236
JJ
3439 continue;
3440 }
3441
ebfd146a
IR
3442 for (i = 0; i < nargs; i++)
3443 {
3444 op = gimple_call_arg (stmt, i);
3445 if (j == 0)
3446 vec_oprnd0
81c40241 3447 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3448 else
63827fb8
IR
3449 {
3450 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3451 vec_oprnd0
3452 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3453 }
ebfd146a 3454
9771b263 3455 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3456 }
3457
74bf76ed
JJ
3458 if (gimple_call_internal_p (stmt)
3459 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3460 {
c7bda0f4 3461 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3462 tree new_var
0e22bb5a 3463 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3464 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3465 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3466 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3467 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3468 }
b1b6836e
RS
3469 else if (modifier == NARROW)
3470 {
3471 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3472 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3473 gimple_call_set_lhs (call, half_res);
3474 gimple_call_set_nothrow (call, true);
3475 new_stmt = call;
b1b6836e
RS
3476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3477 if ((j & 1) == 0)
3478 {
3479 prev_res = half_res;
3480 continue;
3481 }
3482 new_temp = make_ssa_name (vec_dest);
3483 new_stmt = gimple_build_assign (new_temp, convert_code,
3484 prev_res, half_res);
3485 }
74bf76ed
JJ
3486 else
3487 {
a844293d 3488 gcall *call;
70439f0d 3489 if (ifn != IFN_LAST)
a844293d 3490 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3491 else
a844293d 3492 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3493 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3494 gimple_call_set_lhs (call, new_temp);
3495 gimple_call_set_nothrow (call, true);
3496 new_stmt = call;
74bf76ed 3497 }
ebfd146a
IR
3498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3499
b1b6836e 3500 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3501 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3502 else
3503 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3504
3505 prev_stmt_info = vinfo_for_stmt (new_stmt);
3506 }
b1b6836e
RS
3507 }
3508 else if (modifier == NARROW)
3509 {
ebfd146a
IR
3510 for (j = 0; j < ncopies; ++j)
3511 {
3512 /* Build argument list for the vectorized call. */
3513 if (j == 0)
9771b263 3514 vargs.create (nargs * 2);
ebfd146a 3515 else
9771b263 3516 vargs.truncate (0);
ebfd146a 3517
190c2236
JJ
3518 if (slp_node)
3519 {
ef062b13 3520 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3521 vec<tree> vec_oprnds0;
190c2236
JJ
3522
3523 for (i = 0; i < nargs; i++)
9771b263 3524 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3525 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3526 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3527
3528 /* Arguments are ready. Create the new vector stmt. */
9771b263 3529 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3530 {
3531 size_t k;
9771b263 3532 vargs.truncate (0);
190c2236
JJ
3533 for (k = 0; k < nargs; k++)
3534 {
37b5ec8f 3535 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3536 vargs.quick_push (vec_oprndsk[i]);
3537 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3538 }
a844293d 3539 gcall *call;
70439f0d 3540 if (ifn != IFN_LAST)
a844293d 3541 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3542 else
a844293d
RS
3543 call = gimple_build_call_vec (fndecl, vargs);
3544 new_temp = make_ssa_name (vec_dest, call);
3545 gimple_call_set_lhs (call, new_temp);
3546 gimple_call_set_nothrow (call, true);
3547 new_stmt = call;
190c2236 3548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3550 }
3551
3552 for (i = 0; i < nargs; i++)
3553 {
37b5ec8f 3554 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3555 vec_oprndsi.release ();
190c2236 3556 }
190c2236
JJ
3557 continue;
3558 }
3559
ebfd146a
IR
3560 for (i = 0; i < nargs; i++)
3561 {
3562 op = gimple_call_arg (stmt, i);
3563 if (j == 0)
3564 {
3565 vec_oprnd0
81c40241 3566 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3567 vec_oprnd1
63827fb8 3568 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3569 }
3570 else
3571 {
336ecb65 3572 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3573 vec_oprnd0
63827fb8 3574 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3575 vec_oprnd1
63827fb8 3576 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3577 }
3578
9771b263
DN
3579 vargs.quick_push (vec_oprnd0);
3580 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3581 }
3582
b1b6836e 3583 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3584 new_temp = make_ssa_name (vec_dest, new_stmt);
3585 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3587
3588 if (j == 0)
3589 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3590 else
3591 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3592
3593 prev_stmt_info = vinfo_for_stmt (new_stmt);
3594 }
3595
3596 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3597 }
b1b6836e
RS
3598 else
3599 /* No current target implements this case. */
3600 return false;
ebfd146a 3601
9771b263 3602 vargs.release ();
ebfd146a 3603
ebfd146a
IR
3604 /* The call in STMT might prevent it from being removed in dce.
3605 We however cannot remove it here, due to the way the ssa name
3606 it defines is mapped to the new definition. So just replace
3607 rhs of the statement with something harmless. */
3608
dd34c087
JJ
3609 if (slp_node)
3610 return true;
3611
ebfd146a 3612 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3613 if (is_pattern_stmt_p (stmt_info))
3614 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3615 else
3616 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3617
9d5e7640 3618 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3619 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3620 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3621 STMT_VINFO_STMT (stmt_info) = new_stmt;
3622 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3623
3624 return true;
3625}
3626
3627
0136f8f0
AH
3628struct simd_call_arg_info
3629{
3630 tree vectype;
3631 tree op;
0136f8f0 3632 HOST_WIDE_INT linear_step;
34e82342 3633 enum vect_def_type dt;
0136f8f0 3634 unsigned int align;
17b658af 3635 bool simd_lane_linear;
0136f8f0
AH
3636};
3637
17b658af
JJ
3638/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3639 is linear within simd lane (but not within whole loop), note it in
3640 *ARGINFO. */
3641
3642static void
3643vect_simd_lane_linear (tree op, struct loop *loop,
3644 struct simd_call_arg_info *arginfo)
3645{
355fe088 3646 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3647
3648 if (!is_gimple_assign (def_stmt)
3649 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3650 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3651 return;
3652
3653 tree base = gimple_assign_rhs1 (def_stmt);
3654 HOST_WIDE_INT linear_step = 0;
3655 tree v = gimple_assign_rhs2 (def_stmt);
3656 while (TREE_CODE (v) == SSA_NAME)
3657 {
3658 tree t;
3659 def_stmt = SSA_NAME_DEF_STMT (v);
3660 if (is_gimple_assign (def_stmt))
3661 switch (gimple_assign_rhs_code (def_stmt))
3662 {
3663 case PLUS_EXPR:
3664 t = gimple_assign_rhs2 (def_stmt);
3665 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3666 return;
3667 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3668 v = gimple_assign_rhs1 (def_stmt);
3669 continue;
3670 case MULT_EXPR:
3671 t = gimple_assign_rhs2 (def_stmt);
3672 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3673 return;
3674 linear_step = tree_to_shwi (t);
3675 v = gimple_assign_rhs1 (def_stmt);
3676 continue;
3677 CASE_CONVERT:
3678 t = gimple_assign_rhs1 (def_stmt);
3679 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3680 || (TYPE_PRECISION (TREE_TYPE (v))
3681 < TYPE_PRECISION (TREE_TYPE (t))))
3682 return;
3683 if (!linear_step)
3684 linear_step = 1;
3685 v = t;
3686 continue;
3687 default:
3688 return;
3689 }
8e4284d0 3690 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3691 && loop->simduid
3692 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3693 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3694 == loop->simduid))
3695 {
3696 if (!linear_step)
3697 linear_step = 1;
3698 arginfo->linear_step = linear_step;
3699 arginfo->op = base;
3700 arginfo->simd_lane_linear = true;
3701 return;
3702 }
3703 }
3704}
3705
cf1b2ba4
RS
3706/* Return the number of elements in vector type VECTYPE, which is associated
3707 with a SIMD clone. At present these vectors always have a constant
3708 length. */
3709
3710static unsigned HOST_WIDE_INT
3711simd_clone_subparts (tree vectype)
3712{
928686b1 3713 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3714}
3715
0136f8f0
AH
3716/* Function vectorizable_simd_clone_call.
3717
3718 Check if STMT performs a function call that can be vectorized
3719 by calling a simd clone of the function.
3720 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3721 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3722 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3723
3724static bool
355fe088 3725vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3726 gimple **vec_stmt, slp_tree slp_node,
3727 stmt_vector_for_cost *)
0136f8f0
AH
3728{
3729 tree vec_dest;
3730 tree scalar_dest;
3731 tree op, type;
3732 tree vec_oprnd0 = NULL_TREE;
3733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3734 tree vectype;
3735 unsigned int nunits;
3736 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3737 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3738 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3739 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3740 tree fndecl, new_temp;
355fe088
TS
3741 gimple *def_stmt;
3742 gimple *new_stmt = NULL;
0136f8f0 3743 int ncopies, j;
00426f9a 3744 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3745 vec<tree> vargs = vNULL;
3746 size_t i, nargs;
3747 tree lhs, rtype, ratype;
e7a74006 3748 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3749
3750 /* Is STMT a vectorizable call? */
3751 if (!is_gimple_call (stmt))
3752 return false;
3753
3754 fndecl = gimple_call_fndecl (stmt);
3755 if (fndecl == NULL_TREE)
3756 return false;
3757
d52f5295 3758 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3759 if (node == NULL || node->simd_clones == NULL)
3760 return false;
3761
3762 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3763 return false;
3764
66c16fd9
RB
3765 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3766 && ! vec_stmt)
0136f8f0
AH
3767 return false;
3768
3769 if (gimple_call_lhs (stmt)
3770 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3771 return false;
3772
3773 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3774
3775 vectype = STMT_VINFO_VECTYPE (stmt_info);
3776
3777 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3778 return false;
3779
3780 /* FORNOW */
fce57248 3781 if (slp_node)
0136f8f0
AH
3782 return false;
3783
3784 /* Process function arguments. */
3785 nargs = gimple_call_num_args (stmt);
3786
3787 /* Bail out if the function has zero arguments. */
3788 if (nargs == 0)
3789 return false;
3790
00426f9a 3791 arginfo.reserve (nargs, true);
0136f8f0
AH
3792
3793 for (i = 0; i < nargs; i++)
3794 {
3795 simd_call_arg_info thisarginfo;
3796 affine_iv iv;
3797
3798 thisarginfo.linear_step = 0;
3799 thisarginfo.align = 0;
3800 thisarginfo.op = NULL_TREE;
17b658af 3801 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3802
3803 op = gimple_call_arg (stmt, i);
81c40241
RB
3804 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3805 &thisarginfo.vectype)
0136f8f0
AH
3806 || thisarginfo.dt == vect_uninitialized_def)
3807 {
3808 if (dump_enabled_p ())
3809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3810 "use not simple.\n");
0136f8f0
AH
3811 return false;
3812 }
3813
3814 if (thisarginfo.dt == vect_constant_def
3815 || thisarginfo.dt == vect_external_def)
3816 gcc_assert (thisarginfo.vectype == NULL_TREE);
3817 else
3818 gcc_assert (thisarginfo.vectype != NULL_TREE);
3819
6c9e85fb
JJ
3820 /* For linear arguments, the analyze phase should have saved
3821 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3822 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3823 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3824 {
3825 gcc_assert (vec_stmt);
3826 thisarginfo.linear_step
17b658af 3827 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3828 thisarginfo.op
17b658af
JJ
3829 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3830 thisarginfo.simd_lane_linear
3831 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3832 == boolean_true_node);
6c9e85fb
JJ
3833 /* If loop has been peeled for alignment, we need to adjust it. */
3834 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3835 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3836 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3837 {
3838 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3839 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3840 tree opt = TREE_TYPE (thisarginfo.op);
3841 bias = fold_convert (TREE_TYPE (step), bias);
3842 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3843 thisarginfo.op
3844 = fold_build2 (POINTER_TYPE_P (opt)
3845 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3846 thisarginfo.op, bias);
3847 }
3848 }
3849 else if (!vec_stmt
3850 && thisarginfo.dt != vect_constant_def
3851 && thisarginfo.dt != vect_external_def
3852 && loop_vinfo
3853 && TREE_CODE (op) == SSA_NAME
3854 && simple_iv (loop, loop_containing_stmt (stmt), op,
3855 &iv, false)
3856 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3857 {
3858 thisarginfo.linear_step = tree_to_shwi (iv.step);
3859 thisarginfo.op = iv.base;
3860 }
3861 else if ((thisarginfo.dt == vect_constant_def
3862 || thisarginfo.dt == vect_external_def)
3863 && POINTER_TYPE_P (TREE_TYPE (op)))
3864 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3865 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3866 linear too. */
3867 if (POINTER_TYPE_P (TREE_TYPE (op))
3868 && !thisarginfo.linear_step
3869 && !vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && !slp_node
3874 && TREE_CODE (op) == SSA_NAME)
3875 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3876
3877 arginfo.quick_push (thisarginfo);
3878 }
3879
d9f21f6a
RS
3880 unsigned HOST_WIDE_INT vf;
3881 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3882 {
3883 if (dump_enabled_p ())
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "not considering SIMD clones; not yet supported"
3886 " for variable-width vectors.\n");
3887 return NULL;
3888 }
3889
0136f8f0
AH
3890 unsigned int badness = 0;
3891 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3892 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3893 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3894 else
3895 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3896 n = n->simdclone->next_clone)
3897 {
3898 unsigned int this_badness = 0;
d9f21f6a 3899 if (n->simdclone->simdlen > vf
0136f8f0
AH
3900 || n->simdclone->nargs != nargs)
3901 continue;
d9f21f6a
RS
3902 if (n->simdclone->simdlen < vf)
3903 this_badness += (exact_log2 (vf)
0136f8f0
AH
3904 - exact_log2 (n->simdclone->simdlen)) * 1024;
3905 if (n->simdclone->inbranch)
3906 this_badness += 2048;
3907 int target_badness = targetm.simd_clone.usable (n);
3908 if (target_badness < 0)
3909 continue;
3910 this_badness += target_badness * 512;
3911 /* FORNOW: Have to add code to add the mask argument. */
3912 if (n->simdclone->inbranch)
3913 continue;
3914 for (i = 0; i < nargs; i++)
3915 {
3916 switch (n->simdclone->args[i].arg_type)
3917 {
3918 case SIMD_CLONE_ARG_TYPE_VECTOR:
3919 if (!useless_type_conversion_p
3920 (n->simdclone->args[i].orig_type,
3921 TREE_TYPE (gimple_call_arg (stmt, i))))
3922 i = -1;
3923 else if (arginfo[i].dt == vect_constant_def
3924 || arginfo[i].dt == vect_external_def
3925 || arginfo[i].linear_step)
3926 this_badness += 64;
3927 break;
3928 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3929 if (arginfo[i].dt != vect_constant_def
3930 && arginfo[i].dt != vect_external_def)
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3934 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3935 if (arginfo[i].dt == vect_constant_def
3936 || arginfo[i].dt == vect_external_def
3937 || (arginfo[i].linear_step
3938 != n->simdclone->args[i].linear_step))
3939 i = -1;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3942 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3943 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3944 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3945 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3947 /* FORNOW */
3948 i = -1;
3949 break;
3950 case SIMD_CLONE_ARG_TYPE_MASK:
3951 gcc_unreachable ();
3952 }
3953 if (i == (size_t) -1)
3954 break;
3955 if (n->simdclone->args[i].alignment > arginfo[i].align)
3956 {
3957 i = -1;
3958 break;
3959 }
3960 if (arginfo[i].align)
3961 this_badness += (exact_log2 (arginfo[i].align)
3962 - exact_log2 (n->simdclone->args[i].alignment));
3963 }
3964 if (i == (size_t) -1)
3965 continue;
3966 if (bestn == NULL || this_badness < badness)
3967 {
3968 bestn = n;
3969 badness = this_badness;
3970 }
3971 }
3972
3973 if (bestn == NULL)
00426f9a 3974 return false;
0136f8f0
AH
3975
3976 for (i = 0; i < nargs; i++)
3977 if ((arginfo[i].dt == vect_constant_def
3978 || arginfo[i].dt == vect_external_def)
3979 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3980 {
3981 arginfo[i].vectype
3982 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3983 i)));
3984 if (arginfo[i].vectype == NULL
cf1b2ba4 3985 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3986 > bestn->simdclone->simdlen))
00426f9a 3987 return false;
0136f8f0
AH
3988 }
3989
3990 fndecl = bestn->decl;
3991 nunits = bestn->simdclone->simdlen;
d9f21f6a 3992 ncopies = vf / nunits;
0136f8f0
AH
3993
3994 /* If the function isn't const, only allow it in simd loops where user
3995 has asserted that at least nunits consecutive iterations can be
3996 performed using SIMD instructions. */
3997 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3998 && gimple_vuse (stmt))
00426f9a 3999 return false;
0136f8f0
AH
4000
4001 /* Sanity check: make sure that at least one copy of the vectorized stmt
4002 needs to be generated. */
4003 gcc_assert (ncopies >= 1);
4004
4005 if (!vec_stmt) /* transformation not required. */
4006 {
6c9e85fb
JJ
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4008 for (i = 0; i < nargs; i++)
7adb26f2
JJ
4009 if ((bestn->simdclone->args[i].arg_type
4010 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4011 || (bestn->simdclone->args[i].arg_type
4012 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 4013 {
17b658af 4014 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
4015 + 1);
4016 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4017 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4018 ? size_type_node : TREE_TYPE (arginfo[i].op);
4019 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4021 tree sll = arginfo[i].simd_lane_linear
4022 ? boolean_true_node : boolean_false_node;
4023 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4024 }
0136f8f0
AH
4025 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4026 if (dump_enabled_p ())
4027 dump_printf_loc (MSG_NOTE, vect_location,
4028 "=== vectorizable_simd_clone_call ===\n");
68435eb2 4029/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4030 return true;
4031 }
4032
67b8dbac 4033 /* Transform. */
0136f8f0
AH
4034
4035 if (dump_enabled_p ())
4036 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4037
4038 /* Handle def. */
4039 scalar_dest = gimple_call_lhs (stmt);
4040 vec_dest = NULL_TREE;
4041 rtype = NULL_TREE;
4042 ratype = NULL_TREE;
4043 if (scalar_dest)
4044 {
4045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4046 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4047 if (TREE_CODE (rtype) == ARRAY_TYPE)
4048 {
4049 ratype = rtype;
4050 rtype = TREE_TYPE (ratype);
4051 }
4052 }
4053
4054 prev_stmt_info = NULL;
4055 for (j = 0; j < ncopies; ++j)
4056 {
4057 /* Build argument list for the vectorized call. */
4058 if (j == 0)
4059 vargs.create (nargs);
4060 else
4061 vargs.truncate (0);
4062
4063 for (i = 0; i < nargs; i++)
4064 {
4065 unsigned int k, l, m, o;
4066 tree atype;
4067 op = gimple_call_arg (stmt, i);
4068 switch (bestn->simdclone->args[i].arg_type)
4069 {
4070 case SIMD_CLONE_ARG_TYPE_VECTOR:
4071 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4072 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4073 for (m = j * o; m < (j + 1) * o; m++)
4074 {
cf1b2ba4
RS
4075 if (simd_clone_subparts (atype)
4076 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4077 {
73a699ae 4078 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4079 k = (simd_clone_subparts (arginfo[i].vectype)
4080 / simd_clone_subparts (atype));
0136f8f0
AH
4081 gcc_assert ((k & (k - 1)) == 0);
4082 if (m == 0)
4083 vec_oprnd0
81c40241 4084 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4085 else
4086 {
4087 vec_oprnd0 = arginfo[i].op;
4088 if ((m & (k - 1)) == 0)
4089 vec_oprnd0
4090 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4091 vec_oprnd0);
4092 }
4093 arginfo[i].op = vec_oprnd0;
4094 vec_oprnd0
4095 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4096 bitsize_int (prec),
0136f8f0
AH
4097 bitsize_int ((m & (k - 1)) * prec));
4098 new_stmt
b731b390 4099 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4100 vec_oprnd0);
4101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4102 vargs.safe_push (gimple_assign_lhs (new_stmt));
4103 }
4104 else
4105 {
cf1b2ba4
RS
4106 k = (simd_clone_subparts (atype)
4107 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4108 gcc_assert ((k & (k - 1)) == 0);
4109 vec<constructor_elt, va_gc> *ctor_elts;
4110 if (k != 1)
4111 vec_alloc (ctor_elts, k);
4112 else
4113 ctor_elts = NULL;
4114 for (l = 0; l < k; l++)
4115 {
4116 if (m == 0 && l == 0)
4117 vec_oprnd0
81c40241 4118 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4119 else
4120 vec_oprnd0
4121 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4122 arginfo[i].op);
4123 arginfo[i].op = vec_oprnd0;
4124 if (k == 1)
4125 break;
4126 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4127 vec_oprnd0);
4128 }
4129 if (k == 1)
4130 vargs.safe_push (vec_oprnd0);
4131 else
4132 {
4133 vec_oprnd0 = build_constructor (atype, ctor_elts);
4134 new_stmt
b731b390 4135 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4136 vec_oprnd0);
4137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4138 vargs.safe_push (gimple_assign_lhs (new_stmt));
4139 }
4140 }
4141 }
4142 break;
4143 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4144 vargs.safe_push (op);
4145 break;
4146 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4147 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4148 if (j == 0)
4149 {
4150 gimple_seq stmts;
4151 arginfo[i].op
4152 = force_gimple_operand (arginfo[i].op, &stmts, true,
4153 NULL_TREE);
4154 if (stmts != NULL)
4155 {
4156 basic_block new_bb;
4157 edge pe = loop_preheader_edge (loop);
4158 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4159 gcc_assert (!new_bb);
4160 }
17b658af
JJ
4161 if (arginfo[i].simd_lane_linear)
4162 {
4163 vargs.safe_push (arginfo[i].op);
4164 break;
4165 }
b731b390 4166 tree phi_res = copy_ssa_name (op);
538dd0b7 4167 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4168 set_vinfo_for_stmt (new_phi,
310213d4 4169 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4170 add_phi_arg (new_phi, arginfo[i].op,
4171 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4172 enum tree_code code
4173 = POINTER_TYPE_P (TREE_TYPE (op))
4174 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4175 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4176 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4177 widest_int cst
4178 = wi::mul (bestn->simdclone->args[i].linear_step,
4179 ncopies * nunits);
4180 tree tcst = wide_int_to_tree (type, cst);
b731b390 4181 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4182 new_stmt
4183 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4184 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4185 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4186 set_vinfo_for_stmt (new_stmt,
310213d4 4187 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4188 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4189 UNKNOWN_LOCATION);
4190 arginfo[i].op = phi_res;
4191 vargs.safe_push (phi_res);
4192 }
4193 else
4194 {
4195 enum tree_code code
4196 = POINTER_TYPE_P (TREE_TYPE (op))
4197 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4198 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4199 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4200 widest_int cst
4201 = wi::mul (bestn->simdclone->args[i].linear_step,
4202 j * nunits);
4203 tree tcst = wide_int_to_tree (type, cst);
b731b390 4204 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4205 new_stmt = gimple_build_assign (new_temp, code,
4206 arginfo[i].op, tcst);
0136f8f0
AH
4207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4208 vargs.safe_push (new_temp);
4209 }
4210 break;
7adb26f2
JJ
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4213 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4217 default:
4218 gcc_unreachable ();
4219 }
4220 }
4221
4222 new_stmt = gimple_build_call_vec (fndecl, vargs);
4223 if (vec_dest)
4224 {
cf1b2ba4 4225 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4226 if (ratype)
b731b390 4227 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4228 else if (simd_clone_subparts (vectype)
4229 == simd_clone_subparts (rtype))
0136f8f0
AH
4230 new_temp = make_ssa_name (vec_dest, new_stmt);
4231 else
4232 new_temp = make_ssa_name (rtype, new_stmt);
4233 gimple_call_set_lhs (new_stmt, new_temp);
4234 }
4235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4236
4237 if (vec_dest)
4238 {
cf1b2ba4 4239 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4240 {
4241 unsigned int k, l;
73a699ae
RS
4242 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4243 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4244 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4245 gcc_assert ((k & (k - 1)) == 0);
4246 for (l = 0; l < k; l++)
4247 {
4248 tree t;
4249 if (ratype)
4250 {
4251 t = build_fold_addr_expr (new_temp);
4252 t = build2 (MEM_REF, vectype, t,
73a699ae 4253 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4254 }
4255 else
4256 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4257 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4258 new_stmt
b731b390 4259 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4261 if (j == 0 && l == 0)
4262 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4263 else
4264 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4265
4266 prev_stmt_info = vinfo_for_stmt (new_stmt);
4267 }
4268
4269 if (ratype)
3ba4ff41 4270 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4271 continue;
4272 }
cf1b2ba4 4273 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4274 {
cf1b2ba4
RS
4275 unsigned int k = (simd_clone_subparts (vectype)
4276 / simd_clone_subparts (rtype));
0136f8f0
AH
4277 gcc_assert ((k & (k - 1)) == 0);
4278 if ((j & (k - 1)) == 0)
4279 vec_alloc (ret_ctor_elts, k);
4280 if (ratype)
4281 {
cf1b2ba4 4282 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4283 for (m = 0; m < o; m++)
4284 {
4285 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4286 size_int (m), NULL_TREE, NULL_TREE);
4287 new_stmt
b731b390 4288 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4289 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4290 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4291 gimple_assign_lhs (new_stmt));
4292 }
3ba4ff41 4293 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4294 }
4295 else
4296 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4297 if ((j & (k - 1)) != k - 1)
4298 continue;
4299 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4300 new_stmt
b731b390 4301 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4303
4304 if ((unsigned) j == k - 1)
4305 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4306 else
4307 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4308
4309 prev_stmt_info = vinfo_for_stmt (new_stmt);
4310 continue;
4311 }
4312 else if (ratype)
4313 {
4314 tree t = build_fold_addr_expr (new_temp);
4315 t = build2 (MEM_REF, vectype, t,
4316 build_int_cst (TREE_TYPE (t), 0));
4317 new_stmt
b731b390 4318 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4320 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4321 }
4322 }
4323
4324 if (j == 0)
4325 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4326 else
4327 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4328
4329 prev_stmt_info = vinfo_for_stmt (new_stmt);
4330 }
4331
4332 vargs.release ();
4333
4334 /* The call in STMT might prevent it from being removed in dce.
4335 We however cannot remove it here, due to the way the ssa name
4336 it defines is mapped to the new definition. So just replace
4337 rhs of the statement with something harmless. */
4338
4339 if (slp_node)
4340 return true;
4341
4342 if (scalar_dest)
4343 {
4344 type = TREE_TYPE (scalar_dest);
4345 if (is_pattern_stmt_p (stmt_info))
4346 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4347 else
4348 lhs = gimple_call_lhs (stmt);
4349 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4350 }
4351 else
4352 new_stmt = gimple_build_nop ();
4353 set_vinfo_for_stmt (new_stmt, stmt_info);
4354 set_vinfo_for_stmt (stmt, NULL);
4355 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4356 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4357 unlink_stmt_vdef (stmt);
4358
4359 return true;
4360}
4361
4362
ebfd146a
IR
4363/* Function vect_gen_widened_results_half
4364
4365 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4366 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4367 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4368 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4369 needs to be created (DECL is a function-decl of a target-builtin).
4370 STMT is the original scalar stmt that we are vectorizing. */
4371
355fe088 4372static gimple *
ebfd146a
IR
4373vect_gen_widened_results_half (enum tree_code code,
4374 tree decl,
4375 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4376 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4377 gimple *stmt)
b8698a0f 4378{
355fe088 4379 gimple *new_stmt;
b8698a0f
L
4380 tree new_temp;
4381
4382 /* Generate half of the widened result: */
4383 if (code == CALL_EXPR)
4384 {
4385 /* Target specific support */
ebfd146a
IR
4386 if (op_type == binary_op)
4387 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4388 else
4389 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4390 new_temp = make_ssa_name (vec_dest, new_stmt);
4391 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4392 }
4393 else
ebfd146a 4394 {
b8698a0f
L
4395 /* Generic support */
4396 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4397 if (op_type != binary_op)
4398 vec_oprnd1 = NULL;
0d0e4a03 4399 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4400 new_temp = make_ssa_name (vec_dest, new_stmt);
4401 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4402 }
ebfd146a
IR
4403 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4404
ebfd146a
IR
4405 return new_stmt;
4406}
4407
4a00c761
JJ
4408
4409/* Get vectorized definitions for loop-based vectorization. For the first
4410 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4411 scalar operand), and for the rest we get a copy with
4412 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4413 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4414 The vectors are collected into VEC_OPRNDS. */
4415
4416static void
355fe088 4417vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4418 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4419{
4420 tree vec_oprnd;
4421
4422 /* Get first vector operand. */
4423 /* All the vector operands except the very first one (that is scalar oprnd)
4424 are stmt copies. */
4425 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4426 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4427 else
4428 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4429
9771b263 4430 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4431
4432 /* Get second vector operand. */
4433 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4434 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4435
4436 *oprnd = vec_oprnd;
4437
4438 /* For conversion in multiple steps, continue to get operands
4439 recursively. */
4440 if (multi_step_cvt)
4441 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4442}
4443
4444
4445/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4446 For multi-step conversions store the resulting vectors and call the function
4447 recursively. */
4448
4449static void
9771b263 4450vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4451 int multi_step_cvt, gimple *stmt,
9771b263 4452 vec<tree> vec_dsts,
4a00c761
JJ
4453 gimple_stmt_iterator *gsi,
4454 slp_tree slp_node, enum tree_code code,
4455 stmt_vec_info *prev_stmt_info)
4456{
4457 unsigned int i;
4458 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4459 gimple *new_stmt;
4a00c761
JJ
4460 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4461
9771b263 4462 vec_dest = vec_dsts.pop ();
4a00c761 4463
9771b263 4464 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4465 {
4466 /* Create demotion operation. */
9771b263
DN
4467 vop0 = (*vec_oprnds)[i];
4468 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4469 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4470 new_tmp = make_ssa_name (vec_dest, new_stmt);
4471 gimple_assign_set_lhs (new_stmt, new_tmp);
4472 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4473
4474 if (multi_step_cvt)
4475 /* Store the resulting vector for next recursive call. */
9771b263 4476 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4477 else
4478 {
4479 /* This is the last step of the conversion sequence. Store the
4480 vectors in SLP_NODE or in vector info of the scalar statement
4481 (or in STMT_VINFO_RELATED_STMT chain). */
4482 if (slp_node)
9771b263 4483 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4484 else
c689ce1e
RB
4485 {
4486 if (!*prev_stmt_info)
4487 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4488 else
4489 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4490
c689ce1e
RB
4491 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4492 }
4a00c761
JJ
4493 }
4494 }
4495
4496 /* For multi-step demotion operations we first generate demotion operations
4497 from the source type to the intermediate types, and then combine the
4498 results (stored in VEC_OPRNDS) in demotion operation to the destination
4499 type. */
4500 if (multi_step_cvt)
4501 {
4502 /* At each level of recursion we have half of the operands we had at the
4503 previous level. */
9771b263 4504 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4505 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4506 stmt, vec_dsts, gsi, slp_node,
4507 VEC_PACK_TRUNC_EXPR,
4508 prev_stmt_info);
4509 }
4510
9771b263 4511 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4512}
4513
4514
4515/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4516 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4517 the resulting vectors and call the function recursively. */
4518
4519static void
9771b263
DN
4520vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4521 vec<tree> *vec_oprnds1,
355fe088 4522 gimple *stmt, tree vec_dest,
4a00c761
JJ
4523 gimple_stmt_iterator *gsi,
4524 enum tree_code code1,
4525 enum tree_code code2, tree decl1,
4526 tree decl2, int op_type)
4527{
4528 int i;
4529 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4530 gimple *new_stmt1, *new_stmt2;
6e1aa848 4531 vec<tree> vec_tmp = vNULL;
4a00c761 4532
9771b263
DN
4533 vec_tmp.create (vec_oprnds0->length () * 2);
4534 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4535 {
4536 if (op_type == binary_op)
9771b263 4537 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4538 else
4539 vop1 = NULL_TREE;
4540
4541 /* Generate the two halves of promotion operation. */
4542 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4543 op_type, vec_dest, gsi, stmt);
4544 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4545 op_type, vec_dest, gsi, stmt);
4546 if (is_gimple_call (new_stmt1))
4547 {
4548 new_tmp1 = gimple_call_lhs (new_stmt1);
4549 new_tmp2 = gimple_call_lhs (new_stmt2);
4550 }
4551 else
4552 {
4553 new_tmp1 = gimple_assign_lhs (new_stmt1);
4554 new_tmp2 = gimple_assign_lhs (new_stmt2);
4555 }
4556
4557 /* Store the results for the next step. */
9771b263
DN
4558 vec_tmp.quick_push (new_tmp1);
4559 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4560 }
4561
689eaba3 4562 vec_oprnds0->release ();
4a00c761
JJ
4563 *vec_oprnds0 = vec_tmp;
4564}
4565
4566
b8698a0f
L
4567/* Check if STMT performs a conversion operation, that can be vectorized.
4568 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4569 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4570 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4571
4572static bool
355fe088 4573vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4574 gimple **vec_stmt, slp_tree slp_node,
4575 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4576{
4577 tree vec_dest;
4578 tree scalar_dest;
4a00c761 4579 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4580 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4581 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4582 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4583 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4584 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4585 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4586 tree new_temp;
355fe088 4587 gimple *def_stmt;
ebfd146a 4588 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4589 int ndts = 2;
355fe088 4590 gimple *new_stmt = NULL;
ebfd146a 4591 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4592 poly_uint64 nunits_in;
4593 poly_uint64 nunits_out;
ebfd146a 4594 tree vectype_out, vectype_in;
4a00c761
JJ
4595 int ncopies, i, j;
4596 tree lhs_type, rhs_type;
ebfd146a 4597 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4598 vec<tree> vec_oprnds0 = vNULL;
4599 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4600 tree vop0;
4a00c761 4601 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4602 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4603 int multi_step_cvt = 0;
6e1aa848 4604 vec<tree> interm_types = vNULL;
4a00c761
JJ
4605 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4606 int op_type;
4a00c761 4607 unsigned short fltsz;
ebfd146a
IR
4608
4609 /* Is STMT a vectorizable conversion? */
4610
4a00c761 4611 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4612 return false;
4613
66c16fd9
RB
4614 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4615 && ! vec_stmt)
ebfd146a
IR
4616 return false;
4617
4618 if (!is_gimple_assign (stmt))
4619 return false;
4620
4621 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4622 return false;
4623
4624 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4625 if (!CONVERT_EXPR_CODE_P (code)
4626 && code != FIX_TRUNC_EXPR
4627 && code != FLOAT_EXPR
4628 && code != WIDEN_MULT_EXPR
4629 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4630 return false;
4631
4a00c761
JJ
4632 op_type = TREE_CODE_LENGTH (code);
4633
ebfd146a 4634 /* Check types of lhs and rhs. */
b690cc0f 4635 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4636 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4637 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4638
ebfd146a
IR
4639 op0 = gimple_assign_rhs1 (stmt);
4640 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4641
4642 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4643 && !((INTEGRAL_TYPE_P (lhs_type)
4644 && INTEGRAL_TYPE_P (rhs_type))
4645 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4646 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4647 return false;
4648
e6f5c25d
IE
4649 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4650 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4651 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4652 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4653 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4654 {
73fbfcad 4655 if (dump_enabled_p ())
78c60e3d 4656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4657 "type conversion to/from bit-precision unsupported."
4658 "\n");
4a00c761
JJ
4659 return false;
4660 }
4661
b690cc0f 4662 /* Check the operands of the operation. */
81c40241 4663 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4664 {
73fbfcad 4665 if (dump_enabled_p ())
78c60e3d 4666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4667 "use not simple.\n");
b690cc0f
RG
4668 return false;
4669 }
4a00c761
JJ
4670 if (op_type == binary_op)
4671 {
4672 bool ok;
4673
4674 op1 = gimple_assign_rhs2 (stmt);
4675 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4676 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4677 OP1. */
4678 if (CONSTANT_CLASS_P (op0))
81c40241 4679 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4680 else
81c40241 4681 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4682
4683 if (!ok)
4684 {
73fbfcad 4685 if (dump_enabled_p ())
78c60e3d 4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4687 "use not simple.\n");
4a00c761
JJ
4688 return false;
4689 }
4690 }
4691
b690cc0f
RG
4692 /* If op0 is an external or constant defs use a vector type of
4693 the same size as the output vector type. */
ebfd146a 4694 if (!vectype_in)
b690cc0f 4695 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4696 if (vec_stmt)
4697 gcc_assert (vectype_in);
4698 if (!vectype_in)
4699 {
73fbfcad 4700 if (dump_enabled_p ())
4a00c761 4701 {
78c60e3d
SS
4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4703 "no vectype for scalar type ");
4704 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4706 }
7d8930a0
IR
4707
4708 return false;
4709 }
ebfd146a 4710
e6f5c25d
IE
4711 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4712 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4713 {
4714 if (dump_enabled_p ())
4715 {
4716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4717 "can't convert between boolean and non "
4718 "boolean vectors");
4719 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4720 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4721 }
4722
4723 return false;
4724 }
4725
b690cc0f
RG
4726 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4727 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4728 if (known_eq (nunits_out, nunits_in))
ebfd146a 4729 modifier = NONE;
062d5ccc
RS
4730 else if (multiple_p (nunits_out, nunits_in))
4731 modifier = NARROW;
ebfd146a 4732 else
062d5ccc
RS
4733 {
4734 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4735 modifier = WIDEN;
4736 }
ebfd146a 4737
ff802fa1
IR
4738 /* Multiple types in SLP are handled by creating the appropriate number of
4739 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4740 case of SLP. */
fce57248 4741 if (slp_node)
ebfd146a 4742 ncopies = 1;
4a00c761 4743 else if (modifier == NARROW)
e8f142e2 4744 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4745 else
e8f142e2 4746 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4747
ebfd146a
IR
4748 /* Sanity check: make sure that at least one copy of the vectorized stmt
4749 needs to be generated. */
4750 gcc_assert (ncopies >= 1);
4751
16d22000
RS
4752 bool found_mode = false;
4753 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4754 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4755 opt_scalar_mode rhs_mode_iter;
b397965c 4756
ebfd146a 4757 /* Supportable by target? */
4a00c761 4758 switch (modifier)
ebfd146a 4759 {
4a00c761
JJ
4760 case NONE:
4761 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4762 return false;
4763 if (supportable_convert_operation (code, vectype_out, vectype_in,
4764 &decl1, &code1))
4765 break;
4766 /* FALLTHRU */
4767 unsupported:
73fbfcad 4768 if (dump_enabled_p ())
78c60e3d 4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4770 "conversion not supported by target.\n");
ebfd146a 4771 return false;
ebfd146a 4772
4a00c761
JJ
4773 case WIDEN:
4774 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4775 &code1, &code2, &multi_step_cvt,
4776 &interm_types))
4a00c761
JJ
4777 {
4778 /* Binary widening operation can only be supported directly by the
4779 architecture. */
4780 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4781 break;
4782 }
4783
4784 if (code != FLOAT_EXPR
b397965c 4785 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4786 goto unsupported;
4787
b397965c 4788 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4789 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4790 {
16d22000 4791 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4792 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4793 break;
4794
4a00c761
JJ
4795 cvt_type
4796 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4797 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4798 if (cvt_type == NULL_TREE)
4799 goto unsupported;
4800
4801 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4802 {
4803 if (!supportable_convert_operation (code, vectype_out,
4804 cvt_type, &decl1, &codecvt1))
4805 goto unsupported;
4806 }
4807 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4808 cvt_type, &codecvt1,
4809 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4810 &interm_types))
4811 continue;
4812 else
4813 gcc_assert (multi_step_cvt == 0);
4814
4815 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4816 vectype_in, &code1, &code2,
4817 &multi_step_cvt, &interm_types))
16d22000
RS
4818 {
4819 found_mode = true;
4820 break;
4821 }
4a00c761
JJ
4822 }
4823
16d22000 4824 if (!found_mode)
4a00c761
JJ
4825 goto unsupported;
4826
4827 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4828 codecvt2 = ERROR_MARK;
4829 else
4830 {
4831 multi_step_cvt++;
9771b263 4832 interm_types.safe_push (cvt_type);
4a00c761
JJ
4833 cvt_type = NULL_TREE;
4834 }
4835 break;
4836
4837 case NARROW:
4838 gcc_assert (op_type == unary_op);
4839 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4840 &code1, &multi_step_cvt,
4841 &interm_types))
4842 break;
4843
4844 if (code != FIX_TRUNC_EXPR
b397965c 4845 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4846 goto unsupported;
4847
4a00c761
JJ
4848 cvt_type
4849 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4850 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4851 if (cvt_type == NULL_TREE)
4852 goto unsupported;
4853 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4854 &decl1, &codecvt1))
4855 goto unsupported;
4856 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4857 &code1, &multi_step_cvt,
4858 &interm_types))
4859 break;
4860 goto unsupported;
4861
4862 default:
4863 gcc_unreachable ();
ebfd146a
IR
4864 }
4865
4866 if (!vec_stmt) /* transformation not required. */
4867 {
73fbfcad 4868 if (dump_enabled_p ())
78c60e3d 4869 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4870 "=== vectorizable_conversion ===\n");
4a00c761 4871 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4872 {
4873 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4874 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4875 cost_vec);
8bd37302 4876 }
4a00c761
JJ
4877 else if (modifier == NARROW)
4878 {
4879 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4880 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4881 cost_vec);
4a00c761
JJ
4882 }
4883 else
4884 {
4885 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4886 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4887 cost_vec);
4a00c761 4888 }
9771b263 4889 interm_types.release ();
ebfd146a
IR
4890 return true;
4891 }
4892
67b8dbac 4893 /* Transform. */
73fbfcad 4894 if (dump_enabled_p ())
78c60e3d 4895 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4896 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4897
4a00c761
JJ
4898 if (op_type == binary_op)
4899 {
4900 if (CONSTANT_CLASS_P (op0))
4901 op0 = fold_convert (TREE_TYPE (op1), op0);
4902 else if (CONSTANT_CLASS_P (op1))
4903 op1 = fold_convert (TREE_TYPE (op0), op1);
4904 }
4905
4906 /* In case of multi-step conversion, we first generate conversion operations
4907 to the intermediate types, and then from that types to the final one.
4908 We create vector destinations for the intermediate type (TYPES) received
4909 from supportable_*_operation, and store them in the correct order
4910 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4911 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4912 vec_dest = vect_create_destination_var (scalar_dest,
4913 (cvt_type && modifier == WIDEN)
4914 ? cvt_type : vectype_out);
9771b263 4915 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4916
4917 if (multi_step_cvt)
4918 {
9771b263
DN
4919 for (i = interm_types.length () - 1;
4920 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4921 {
4922 vec_dest = vect_create_destination_var (scalar_dest,
4923 intermediate_type);
9771b263 4924 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4925 }
4926 }
ebfd146a 4927
4a00c761 4928 if (cvt_type)
82294ec1
JJ
4929 vec_dest = vect_create_destination_var (scalar_dest,
4930 modifier == WIDEN
4931 ? vectype_out : cvt_type);
4a00c761
JJ
4932
4933 if (!slp_node)
4934 {
30862efc 4935 if (modifier == WIDEN)
4a00c761 4936 {
c3284718 4937 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4938 if (op_type == binary_op)
9771b263 4939 vec_oprnds1.create (1);
4a00c761 4940 }
30862efc 4941 else if (modifier == NARROW)
9771b263
DN
4942 vec_oprnds0.create (
4943 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4944 }
4945 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4946 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4947
4a00c761 4948 last_oprnd = op0;
ebfd146a
IR
4949 prev_stmt_info = NULL;
4950 switch (modifier)
4951 {
4952 case NONE:
4953 for (j = 0; j < ncopies; j++)
4954 {
ebfd146a 4955 if (j == 0)
306b0c92 4956 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4957 else
4958 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4959
9771b263 4960 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4961 {
4962 /* Arguments are ready, create the new vector stmt. */
4963 if (code1 == CALL_EXPR)
4964 {
4965 new_stmt = gimple_build_call (decl1, 1, vop0);
4966 new_temp = make_ssa_name (vec_dest, new_stmt);
4967 gimple_call_set_lhs (new_stmt, new_temp);
4968 }
4969 else
4970 {
4971 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4972 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4973 new_temp = make_ssa_name (vec_dest, new_stmt);
4974 gimple_assign_set_lhs (new_stmt, new_temp);
4975 }
4976
4977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4978 if (slp_node)
9771b263 4979 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4980 else
4981 {
4982 if (!prev_stmt_info)
4983 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4984 else
4985 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4986 prev_stmt_info = vinfo_for_stmt (new_stmt);
4987 }
4a00c761 4988 }
ebfd146a
IR
4989 }
4990 break;
4991
4992 case WIDEN:
4993 /* In case the vectorization factor (VF) is bigger than the number
4994 of elements that we can fit in a vectype (nunits), we have to
4995 generate more than one vector stmt - i.e - we need to "unroll"
4996 the vector stmt by a factor VF/nunits. */
4997 for (j = 0; j < ncopies; j++)
4998 {
4a00c761 4999 /* Handle uses. */
ebfd146a 5000 if (j == 0)
4a00c761
JJ
5001 {
5002 if (slp_node)
5003 {
5004 if (code == WIDEN_LSHIFT_EXPR)
5005 {
5006 unsigned int k;
ebfd146a 5007
4a00c761
JJ
5008 vec_oprnd1 = op1;
5009 /* Store vec_oprnd1 for every vector stmt to be created
5010 for SLP_NODE. We check during the analysis that all
5011 the shift arguments are the same. */
5012 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5013 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5014
5015 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5016 slp_node);
4a00c761
JJ
5017 }
5018 else
5019 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 5020 &vec_oprnds1, slp_node);
4a00c761
JJ
5021 }
5022 else
5023 {
81c40241 5024 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5025 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5026 if (op_type == binary_op)
5027 {
5028 if (code == WIDEN_LSHIFT_EXPR)
5029 vec_oprnd1 = op1;
5030 else
81c40241 5031 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5032 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5033 }
5034 }
5035 }
ebfd146a 5036 else
4a00c761
JJ
5037 {
5038 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5039 vec_oprnds0.truncate (0);
5040 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5041 if (op_type == binary_op)
5042 {
5043 if (code == WIDEN_LSHIFT_EXPR)
5044 vec_oprnd1 = op1;
5045 else
5046 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5047 vec_oprnd1);
9771b263
DN
5048 vec_oprnds1.truncate (0);
5049 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5050 }
5051 }
ebfd146a 5052
4a00c761
JJ
5053 /* Arguments are ready. Create the new vector stmts. */
5054 for (i = multi_step_cvt; i >= 0; i--)
5055 {
9771b263 5056 tree this_dest = vec_dsts[i];
4a00c761
JJ
5057 enum tree_code c1 = code1, c2 = code2;
5058 if (i == 0 && codecvt2 != ERROR_MARK)
5059 {
5060 c1 = codecvt1;
5061 c2 = codecvt2;
5062 }
5063 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5064 &vec_oprnds1,
5065 stmt, this_dest, gsi,
5066 c1, c2, decl1, decl2,
5067 op_type);
5068 }
5069
9771b263 5070 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5071 {
5072 if (cvt_type)
5073 {
5074 if (codecvt1 == CALL_EXPR)
5075 {
5076 new_stmt = gimple_build_call (decl1, 1, vop0);
5077 new_temp = make_ssa_name (vec_dest, new_stmt);
5078 gimple_call_set_lhs (new_stmt, new_temp);
5079 }
5080 else
5081 {
5082 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5083 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5084 new_stmt = gimple_build_assign (new_temp, codecvt1,
5085 vop0);
4a00c761
JJ
5086 }
5087
5088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5089 }
5090 else
5091 new_stmt = SSA_NAME_DEF_STMT (vop0);
5092
5093 if (slp_node)
9771b263 5094 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5095 else
c689ce1e
RB
5096 {
5097 if (!prev_stmt_info)
5098 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5099 else
5100 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5101 prev_stmt_info = vinfo_for_stmt (new_stmt);
5102 }
4a00c761 5103 }
ebfd146a 5104 }
4a00c761
JJ
5105
5106 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5107 break;
5108
5109 case NARROW:
5110 /* In case the vectorization factor (VF) is bigger than the number
5111 of elements that we can fit in a vectype (nunits), we have to
5112 generate more than one vector stmt - i.e - we need to "unroll"
5113 the vector stmt by a factor VF/nunits. */
5114 for (j = 0; j < ncopies; j++)
5115 {
5116 /* Handle uses. */
4a00c761
JJ
5117 if (slp_node)
5118 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5119 slp_node);
ebfd146a
IR
5120 else
5121 {
9771b263 5122 vec_oprnds0.truncate (0);
4a00c761
JJ
5123 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5124 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5125 }
5126
4a00c761
JJ
5127 /* Arguments are ready. Create the new vector stmts. */
5128 if (cvt_type)
9771b263 5129 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5130 {
5131 if (codecvt1 == CALL_EXPR)
5132 {
5133 new_stmt = gimple_build_call (decl1, 1, vop0);
5134 new_temp = make_ssa_name (vec_dest, new_stmt);
5135 gimple_call_set_lhs (new_stmt, new_temp);
5136 }
5137 else
5138 {
5139 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5140 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5141 new_stmt = gimple_build_assign (new_temp, codecvt1,
5142 vop0);
4a00c761 5143 }
ebfd146a 5144
4a00c761 5145 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5146 vec_oprnds0[i] = new_temp;
4a00c761 5147 }
ebfd146a 5148
4a00c761
JJ
5149 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5150 stmt, vec_dsts, gsi,
5151 slp_node, code1,
5152 &prev_stmt_info);
ebfd146a
IR
5153 }
5154
5155 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5156 break;
ebfd146a
IR
5157 }
5158
9771b263
DN
5159 vec_oprnds0.release ();
5160 vec_oprnds1.release ();
9771b263 5161 interm_types.release ();
ebfd146a
IR
5162
5163 return true;
5164}
ff802fa1
IR
5165
5166
ebfd146a
IR
5167/* Function vectorizable_assignment.
5168
b8698a0f
L
5169 Check if STMT performs an assignment (copy) that can be vectorized.
5170 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5171 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5172 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5173
5174static bool
355fe088 5175vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5176 gimple **vec_stmt, slp_tree slp_node,
5177 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5178{
5179 tree vec_dest;
5180 tree scalar_dest;
5181 tree op;
5182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5184 tree new_temp;
355fe088 5185 gimple *def_stmt;
4fc5ebf1
JG
5186 enum vect_def_type dt[1] = {vect_unknown_def_type};
5187 int ndts = 1;
ebfd146a 5188 int ncopies;
f18b55bd 5189 int i, j;
6e1aa848 5190 vec<tree> vec_oprnds = vNULL;
ebfd146a 5191 tree vop;
a70d6342 5192 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5193 vec_info *vinfo = stmt_info->vinfo;
355fe088 5194 gimple *new_stmt = NULL;
f18b55bd 5195 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5196 enum tree_code code;
5197 tree vectype_in;
ebfd146a 5198
a70d6342 5199 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5200 return false;
5201
66c16fd9
RB
5202 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5203 && ! vec_stmt)
ebfd146a
IR
5204 return false;
5205
5206 /* Is vectorizable assignment? */
5207 if (!is_gimple_assign (stmt))
5208 return false;
5209
5210 scalar_dest = gimple_assign_lhs (stmt);
5211 if (TREE_CODE (scalar_dest) != SSA_NAME)
5212 return false;
5213
fde9c428 5214 code = gimple_assign_rhs_code (stmt);
ebfd146a 5215 if (gimple_assign_single_p (stmt)
fde9c428
RG
5216 || code == PAREN_EXPR
5217 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5218 op = gimple_assign_rhs1 (stmt);
5219 else
5220 return false;
5221
7b7ec6c5
RG
5222 if (code == VIEW_CONVERT_EXPR)
5223 op = TREE_OPERAND (op, 0);
5224
465c8c19 5225 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5226 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5227
5228 /* Multiple types in SLP are handled by creating the appropriate number of
5229 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5230 case of SLP. */
fce57248 5231 if (slp_node)
465c8c19
JJ
5232 ncopies = 1;
5233 else
e8f142e2 5234 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5235
5236 gcc_assert (ncopies >= 1);
5237
81c40241 5238 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 5239 {
73fbfcad 5240 if (dump_enabled_p ())
78c60e3d 5241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5242 "use not simple.\n");
ebfd146a
IR
5243 return false;
5244 }
5245
fde9c428
RG
5246 /* We can handle NOP_EXPR conversions that do not change the number
5247 of elements or the vector size. */
7b7ec6c5
RG
5248 if ((CONVERT_EXPR_CODE_P (code)
5249 || code == VIEW_CONVERT_EXPR)
fde9c428 5250 && (!vectype_in
928686b1 5251 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5252 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5253 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5254 return false;
5255
7b7b1813
RG
5256 /* We do not handle bit-precision changes. */
5257 if ((CONVERT_EXPR_CODE_P (code)
5258 || code == VIEW_CONVERT_EXPR)
5259 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5260 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5261 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5262 /* But a conversion that does not change the bit-pattern is ok. */
5263 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5264 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5265 && TYPE_UNSIGNED (TREE_TYPE (op)))
5266 /* Conversion between boolean types of different sizes is
5267 a simple assignment in case their vectypes are same
5268 boolean vectors. */
5269 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5270 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5271 {
73fbfcad 5272 if (dump_enabled_p ())
78c60e3d
SS
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "type conversion to/from bit-precision "
e645e942 5275 "unsupported.\n");
7b7b1813
RG
5276 return false;
5277 }
5278
ebfd146a
IR
5279 if (!vec_stmt) /* transformation not required. */
5280 {
5281 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 5282 if (dump_enabled_p ())
78c60e3d 5283 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5284 "=== vectorizable_assignment ===\n");
68435eb2 5285 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5286 return true;
5287 }
5288
67b8dbac 5289 /* Transform. */
73fbfcad 5290 if (dump_enabled_p ())
e645e942 5291 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5292
5293 /* Handle def. */
5294 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5295
5296 /* Handle use. */
f18b55bd 5297 for (j = 0; j < ncopies; j++)
ebfd146a 5298 {
f18b55bd
IR
5299 /* Handle uses. */
5300 if (j == 0)
306b0c92 5301 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5302 else
5303 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5304
5305 /* Arguments are ready. create the new vector stmt. */
9771b263 5306 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5307 {
7b7ec6c5
RG
5308 if (CONVERT_EXPR_CODE_P (code)
5309 || code == VIEW_CONVERT_EXPR)
4a73490d 5310 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5311 new_stmt = gimple_build_assign (vec_dest, vop);
5312 new_temp = make_ssa_name (vec_dest, new_stmt);
5313 gimple_assign_set_lhs (new_stmt, new_temp);
5314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5315 if (slp_node)
9771b263 5316 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5317 }
ebfd146a
IR
5318
5319 if (slp_node)
f18b55bd
IR
5320 continue;
5321
5322 if (j == 0)
5323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5324 else
5325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5326
5327 prev_stmt_info = vinfo_for_stmt (new_stmt);
5328 }
b8698a0f 5329
9771b263 5330 vec_oprnds.release ();
ebfd146a
IR
5331 return true;
5332}
5333
9dc3f7de 5334
1107f3ae
IR
5335/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5336 either as shift by a scalar or by a vector. */
5337
5338bool
5339vect_supportable_shift (enum tree_code code, tree scalar_type)
5340{
5341
ef4bddc2 5342 machine_mode vec_mode;
1107f3ae
IR
5343 optab optab;
5344 int icode;
5345 tree vectype;
5346
5347 vectype = get_vectype_for_scalar_type (scalar_type);
5348 if (!vectype)
5349 return false;
5350
5351 optab = optab_for_tree_code (code, vectype, optab_scalar);
5352 if (!optab
5353 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5354 {
5355 optab = optab_for_tree_code (code, vectype, optab_vector);
5356 if (!optab
5357 || (optab_handler (optab, TYPE_MODE (vectype))
5358 == CODE_FOR_nothing))
5359 return false;
5360 }
5361
5362 vec_mode = TYPE_MODE (vectype);
5363 icode = (int) optab_handler (optab, vec_mode);
5364 if (icode == CODE_FOR_nothing)
5365 return false;
5366
5367 return true;
5368}
5369
5370
9dc3f7de
IR
5371/* Function vectorizable_shift.
5372
5373 Check if STMT performs a shift operation that can be vectorized.
5374 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5375 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5376 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5377
5378static bool
355fe088 5379vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5380 gimple **vec_stmt, slp_tree slp_node,
5381 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5382{
5383 tree vec_dest;
5384 tree scalar_dest;
5385 tree op0, op1 = NULL;
5386 tree vec_oprnd1 = NULL_TREE;
5387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5388 tree vectype;
5389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5390 enum tree_code code;
ef4bddc2 5391 machine_mode vec_mode;
9dc3f7de
IR
5392 tree new_temp;
5393 optab optab;
5394 int icode;
ef4bddc2 5395 machine_mode optab_op2_mode;
355fe088 5396 gimple *def_stmt;
9dc3f7de 5397 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5398 int ndts = 2;
355fe088 5399 gimple *new_stmt = NULL;
9dc3f7de 5400 stmt_vec_info prev_stmt_info;
928686b1
RS
5401 poly_uint64 nunits_in;
5402 poly_uint64 nunits_out;
9dc3f7de 5403 tree vectype_out;
cede2577 5404 tree op1_vectype;
9dc3f7de
IR
5405 int ncopies;
5406 int j, i;
6e1aa848
DN
5407 vec<tree> vec_oprnds0 = vNULL;
5408 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5409 tree vop0, vop1;
5410 unsigned int k;
49eab32e 5411 bool scalar_shift_arg = true;
9dc3f7de 5412 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5413 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5414
5415 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5416 return false;
5417
66c16fd9
RB
5418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5419 && ! vec_stmt)
9dc3f7de
IR
5420 return false;
5421
5422 /* Is STMT a vectorizable binary/unary operation? */
5423 if (!is_gimple_assign (stmt))
5424 return false;
5425
5426 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5427 return false;
5428
5429 code = gimple_assign_rhs_code (stmt);
5430
5431 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5432 || code == RROTATE_EXPR))
5433 return false;
5434
5435 scalar_dest = gimple_assign_lhs (stmt);
5436 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5437 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5438 {
73fbfcad 5439 if (dump_enabled_p ())
78c60e3d 5440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5441 "bit-precision shifts not supported.\n");
7b7b1813
RG
5442 return false;
5443 }
9dc3f7de
IR
5444
5445 op0 = gimple_assign_rhs1 (stmt);
81c40241 5446 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 5447 {
73fbfcad 5448 if (dump_enabled_p ())
78c60e3d 5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5450 "use not simple.\n");
9dc3f7de
IR
5451 return false;
5452 }
5453 /* If op0 is an external or constant def use a vector type with
5454 the same size as the output vector type. */
5455 if (!vectype)
5456 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5457 if (vec_stmt)
5458 gcc_assert (vectype);
5459 if (!vectype)
5460 {
73fbfcad 5461 if (dump_enabled_p ())
78c60e3d 5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5463 "no vectype for scalar type\n");
9dc3f7de
IR
5464 return false;
5465 }
5466
5467 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5468 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5469 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5470 return false;
5471
5472 op1 = gimple_assign_rhs2 (stmt);
81c40241 5473 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5474 {
73fbfcad 5475 if (dump_enabled_p ())
78c60e3d 5476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5477 "use not simple.\n");
9dc3f7de
IR
5478 return false;
5479 }
5480
9dc3f7de
IR
5481 /* Multiple types in SLP are handled by creating the appropriate number of
5482 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5483 case of SLP. */
fce57248 5484 if (slp_node)
9dc3f7de
IR
5485 ncopies = 1;
5486 else
e8f142e2 5487 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5488
5489 gcc_assert (ncopies >= 1);
5490
5491 /* Determine whether the shift amount is a vector, or scalar. If the
5492 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5493
dbfa87aa
YR
5494 if ((dt[1] == vect_internal_def
5495 || dt[1] == vect_induction_def)
5496 && !slp_node)
49eab32e
JJ
5497 scalar_shift_arg = false;
5498 else if (dt[1] == vect_constant_def
5499 || dt[1] == vect_external_def
5500 || dt[1] == vect_internal_def)
5501 {
5502 /* In SLP, need to check whether the shift count is the same,
5503 in loops if it is a constant or invariant, it is always
5504 a scalar shift. */
5505 if (slp_node)
5506 {
355fe088
TS
5507 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5508 gimple *slpstmt;
49eab32e 5509
9771b263 5510 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5511 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5512 scalar_shift_arg = false;
5513 }
60d393e8
RB
5514
5515 /* If the shift amount is computed by a pattern stmt we cannot
5516 use the scalar amount directly thus give up and use a vector
5517 shift. */
5518 if (dt[1] == vect_internal_def)
5519 {
5520 gimple *def = SSA_NAME_DEF_STMT (op1);
5521 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5522 scalar_shift_arg = false;
5523 }
49eab32e
JJ
5524 }
5525 else
5526 {
73fbfcad 5527 if (dump_enabled_p ())
78c60e3d 5528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5529 "operand mode requires invariant argument.\n");
49eab32e
JJ
5530 return false;
5531 }
5532
9dc3f7de 5533 /* Vector shifted by vector. */
49eab32e 5534 if (!scalar_shift_arg)
9dc3f7de
IR
5535 {
5536 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5537 if (dump_enabled_p ())
78c60e3d 5538 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5539 "vector/vector shift/rotate found.\n");
78c60e3d 5540
aa948027
JJ
5541 if (!op1_vectype)
5542 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5543 if (op1_vectype == NULL_TREE
5544 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5545 {
73fbfcad 5546 if (dump_enabled_p ())
78c60e3d
SS
5547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5548 "unusable type for last operand in"
e645e942 5549 " vector/vector shift/rotate.\n");
cede2577
JJ
5550 return false;
5551 }
9dc3f7de
IR
5552 }
5553 /* See if the machine has a vector shifted by scalar insn and if not
5554 then see if it has a vector shifted by vector insn. */
49eab32e 5555 else
9dc3f7de
IR
5556 {
5557 optab = optab_for_tree_code (code, vectype, optab_scalar);
5558 if (optab
5559 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5560 {
73fbfcad 5561 if (dump_enabled_p ())
78c60e3d 5562 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5563 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5564 }
5565 else
5566 {
5567 optab = optab_for_tree_code (code, vectype, optab_vector);
5568 if (optab
5569 && (optab_handler (optab, TYPE_MODE (vectype))
5570 != CODE_FOR_nothing))
5571 {
49eab32e
JJ
5572 scalar_shift_arg = false;
5573
73fbfcad 5574 if (dump_enabled_p ())
78c60e3d 5575 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5576 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5577
5578 /* Unlike the other binary operators, shifts/rotates have
5579 the rhs being int, instead of the same type as the lhs,
5580 so make sure the scalar is the right type if we are
aa948027 5581 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5582 if (dt[1] == vect_constant_def)
5583 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5584 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5585 TREE_TYPE (op1)))
5586 {
5587 if (slp_node
5588 && TYPE_MODE (TREE_TYPE (vectype))
5589 != TYPE_MODE (TREE_TYPE (op1)))
5590 {
73fbfcad 5591 if (dump_enabled_p ())
78c60e3d
SS
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5593 "unusable type for last operand in"
e645e942 5594 " vector/vector shift/rotate.\n");
21c0a521 5595 return false;
aa948027
JJ
5596 }
5597 if (vec_stmt && !slp_node)
5598 {
5599 op1 = fold_convert (TREE_TYPE (vectype), op1);
5600 op1 = vect_init_vector (stmt, op1,
5601 TREE_TYPE (vectype), NULL);
5602 }
5603 }
9dc3f7de
IR
5604 }
5605 }
5606 }
9dc3f7de
IR
5607
5608 /* Supportable by target? */
5609 if (!optab)
5610 {
73fbfcad 5611 if (dump_enabled_p ())
78c60e3d 5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5613 "no optab.\n");
9dc3f7de
IR
5614 return false;
5615 }
5616 vec_mode = TYPE_MODE (vectype);
5617 icode = (int) optab_handler (optab, vec_mode);
5618 if (icode == CODE_FOR_nothing)
5619 {
73fbfcad 5620 if (dump_enabled_p ())
78c60e3d 5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5622 "op not supported by target.\n");
9dc3f7de 5623 /* Check only during analysis. */
cf098191 5624 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5625 || (!vec_stmt
5626 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5627 return false;
73fbfcad 5628 if (dump_enabled_p ())
e645e942
TJ
5629 dump_printf_loc (MSG_NOTE, vect_location,
5630 "proceeding using word mode.\n");
9dc3f7de
IR
5631 }
5632
5633 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5634 if (!vec_stmt
5635 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5636 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5637 {
73fbfcad 5638 if (dump_enabled_p ())
78c60e3d 5639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5640 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5641 return false;
5642 }
5643
5644 if (!vec_stmt) /* transformation not required. */
5645 {
5646 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5647 if (dump_enabled_p ())
e645e942
TJ
5648 dump_printf_loc (MSG_NOTE, vect_location,
5649 "=== vectorizable_shift ===\n");
68435eb2 5650 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5651 return true;
5652 }
5653
67b8dbac 5654 /* Transform. */
9dc3f7de 5655
73fbfcad 5656 if (dump_enabled_p ())
78c60e3d 5657 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5658 "transform binary/unary operation.\n");
9dc3f7de
IR
5659
5660 /* Handle def. */
5661 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5662
9dc3f7de
IR
5663 prev_stmt_info = NULL;
5664 for (j = 0; j < ncopies; j++)
5665 {
5666 /* Handle uses. */
5667 if (j == 0)
5668 {
5669 if (scalar_shift_arg)
5670 {
5671 /* Vector shl and shr insn patterns can be defined with scalar
5672 operand 2 (shift operand). In this case, use constant or loop
5673 invariant op1 directly, without extending it to vector mode
5674 first. */
5675 optab_op2_mode = insn_data[icode].operand[2].mode;
5676 if (!VECTOR_MODE_P (optab_op2_mode))
5677 {
73fbfcad 5678 if (dump_enabled_p ())
78c60e3d 5679 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5680 "operand 1 using scalar mode.\n");
9dc3f7de 5681 vec_oprnd1 = op1;
8930f723 5682 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5683 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5684 if (slp_node)
5685 {
5686 /* Store vec_oprnd1 for every vector stmt to be created
5687 for SLP_NODE. We check during the analysis that all
5688 the shift arguments are the same.
5689 TODO: Allow different constants for different vector
5690 stmts generated for an SLP instance. */
5691 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5692 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5693 }
5694 }
5695 }
5696
5697 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5698 (a special case for certain kind of vector shifts); otherwise,
5699 operand 1 should be of a vector type (the usual case). */
5700 if (vec_oprnd1)
5701 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5702 slp_node);
9dc3f7de
IR
5703 else
5704 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5705 slp_node);
9dc3f7de
IR
5706 }
5707 else
5708 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5709
5710 /* Arguments are ready. Create the new vector stmt. */
9771b263 5711 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5712 {
9771b263 5713 vop1 = vec_oprnds1[i];
0d0e4a03 5714 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5715 new_temp = make_ssa_name (vec_dest, new_stmt);
5716 gimple_assign_set_lhs (new_stmt, new_temp);
5717 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5718 if (slp_node)
9771b263 5719 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5720 }
5721
5722 if (slp_node)
5723 continue;
5724
5725 if (j == 0)
5726 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5727 else
5728 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5729 prev_stmt_info = vinfo_for_stmt (new_stmt);
5730 }
5731
9771b263
DN
5732 vec_oprnds0.release ();
5733 vec_oprnds1.release ();
9dc3f7de
IR
5734
5735 return true;
5736}
5737
5738
ebfd146a
IR
5739/* Function vectorizable_operation.
5740
16949072
RG
5741 Check if STMT performs a binary, unary or ternary operation that can
5742 be vectorized.
b8698a0f 5743 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5744 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5745 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5746
5747static bool
355fe088 5748vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5749 gimple **vec_stmt, slp_tree slp_node,
5750 stmt_vector_for_cost *cost_vec)
ebfd146a 5751{
00f07b86 5752 tree vec_dest;
ebfd146a 5753 tree scalar_dest;
16949072 5754 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5756 tree vectype;
ebfd146a 5757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5758 enum tree_code code, orig_code;
ef4bddc2 5759 machine_mode vec_mode;
ebfd146a
IR
5760 tree new_temp;
5761 int op_type;
00f07b86 5762 optab optab;
523ba738 5763 bool target_support_p;
355fe088 5764 gimple *def_stmt;
16949072
RG
5765 enum vect_def_type dt[3]
5766 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5767 int ndts = 3;
355fe088 5768 gimple *new_stmt = NULL;
ebfd146a 5769 stmt_vec_info prev_stmt_info;
928686b1
RS
5770 poly_uint64 nunits_in;
5771 poly_uint64 nunits_out;
ebfd146a
IR
5772 tree vectype_out;
5773 int ncopies;
5774 int j, i;
6e1aa848
DN
5775 vec<tree> vec_oprnds0 = vNULL;
5776 vec<tree> vec_oprnds1 = vNULL;
5777 vec<tree> vec_oprnds2 = vNULL;
16949072 5778 tree vop0, vop1, vop2;
a70d6342 5779 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5780 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5781
a70d6342 5782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5783 return false;
5784
66c16fd9
RB
5785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5786 && ! vec_stmt)
ebfd146a
IR
5787 return false;
5788
5789 /* Is STMT a vectorizable binary/unary operation? */
5790 if (!is_gimple_assign (stmt))
5791 return false;
5792
5793 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5794 return false;
5795
0eb952ea 5796 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5797
1af4ebf5
MG
5798 /* For pointer addition and subtraction, we should use the normal
5799 plus and minus for the vector operation. */
ebfd146a
IR
5800 if (code == POINTER_PLUS_EXPR)
5801 code = PLUS_EXPR;
1af4ebf5
MG
5802 if (code == POINTER_DIFF_EXPR)
5803 code = MINUS_EXPR;
ebfd146a
IR
5804
5805 /* Support only unary or binary operations. */
5806 op_type = TREE_CODE_LENGTH (code);
16949072 5807 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5808 {
73fbfcad 5809 if (dump_enabled_p ())
78c60e3d 5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5811 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5812 op_type);
ebfd146a
IR
5813 return false;
5814 }
5815
b690cc0f
RG
5816 scalar_dest = gimple_assign_lhs (stmt);
5817 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5818
7b7b1813
RG
5819 /* Most operations cannot handle bit-precision types without extra
5820 truncations. */
045c1278 5821 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5822 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5823 /* Exception are bitwise binary operations. */
5824 && code != BIT_IOR_EXPR
5825 && code != BIT_XOR_EXPR
5826 && code != BIT_AND_EXPR)
5827 {
73fbfcad 5828 if (dump_enabled_p ())
78c60e3d 5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5830 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5831 return false;
5832 }
5833
ebfd146a 5834 op0 = gimple_assign_rhs1 (stmt);
81c40241 5835 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5836 {
73fbfcad 5837 if (dump_enabled_p ())
78c60e3d 5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5839 "use not simple.\n");
ebfd146a
IR
5840 return false;
5841 }
b690cc0f
RG
5842 /* If op0 is an external or constant def use a vector type with
5843 the same size as the output vector type. */
5844 if (!vectype)
b036c6c5
IE
5845 {
5846 /* For boolean type we cannot determine vectype by
5847 invariant value (don't know whether it is a vector
5848 of booleans or vector of integers). We use output
5849 vectype because operations on boolean don't change
5850 type. */
2568d8a1 5851 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5852 {
2568d8a1 5853 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5854 {
5855 if (dump_enabled_p ())
5856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5857 "not supported operation on bool value.\n");
5858 return false;
5859 }
5860 vectype = vectype_out;
5861 }
5862 else
5863 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5864 }
7d8930a0
IR
5865 if (vec_stmt)
5866 gcc_assert (vectype);
5867 if (!vectype)
5868 {
73fbfcad 5869 if (dump_enabled_p ())
7d8930a0 5870 {
78c60e3d
SS
5871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5872 "no vectype for scalar type ");
5873 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5874 TREE_TYPE (op0));
e645e942 5875 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5876 }
5877
5878 return false;
5879 }
b690cc0f
RG
5880
5881 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5882 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5883 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5884 return false;
ebfd146a 5885
16949072 5886 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5887 {
5888 op1 = gimple_assign_rhs2 (stmt);
81c40241 5889 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5890 {
73fbfcad 5891 if (dump_enabled_p ())
78c60e3d 5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5893 "use not simple.\n");
ebfd146a
IR
5894 return false;
5895 }
5896 }
16949072
RG
5897 if (op_type == ternary_op)
5898 {
5899 op2 = gimple_assign_rhs3 (stmt);
81c40241 5900 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5901 {
73fbfcad 5902 if (dump_enabled_p ())
78c60e3d 5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5904 "use not simple.\n");
16949072
RG
5905 return false;
5906 }
5907 }
ebfd146a 5908
b690cc0f 5909 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5910 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5911 case of SLP. */
fce57248 5912 if (slp_node)
b690cc0f
RG
5913 ncopies = 1;
5914 else
e8f142e2 5915 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5916
5917 gcc_assert (ncopies >= 1);
5918
9dc3f7de 5919 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5920 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5921 || code == RROTATE_EXPR)
9dc3f7de 5922 return false;
ebfd146a 5923
ebfd146a 5924 /* Supportable by target? */
00f07b86
RH
5925
5926 vec_mode = TYPE_MODE (vectype);
5927 if (code == MULT_HIGHPART_EXPR)
523ba738 5928 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5929 else
5930 {
5931 optab = optab_for_tree_code (code, vectype, optab_default);
5932 if (!optab)
5deb57cb 5933 {
73fbfcad 5934 if (dump_enabled_p ())
78c60e3d 5935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5936 "no optab.\n");
00f07b86 5937 return false;
5deb57cb 5938 }
523ba738
RS
5939 target_support_p = (optab_handler (optab, vec_mode)
5940 != CODE_FOR_nothing);
5deb57cb
JJ
5941 }
5942
523ba738 5943 if (!target_support_p)
ebfd146a 5944 {
73fbfcad 5945 if (dump_enabled_p ())
78c60e3d 5946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5947 "op not supported by target.\n");
ebfd146a 5948 /* Check only during analysis. */
cf098191 5949 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5950 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5951 return false;
73fbfcad 5952 if (dump_enabled_p ())
e645e942
TJ
5953 dump_printf_loc (MSG_NOTE, vect_location,
5954 "proceeding using word mode.\n");
383d9c83
IR
5955 }
5956
4a00c761 5957 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5958 if (!VECTOR_MODE_P (vec_mode)
5959 && !vec_stmt
ca09abcb 5960 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5961 {
73fbfcad 5962 if (dump_enabled_p ())
78c60e3d 5963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5964 "not worthwhile without SIMD support.\n");
e34842c6 5965 return false;
7d8930a0 5966 }
ebfd146a 5967
ebfd146a
IR
5968 if (!vec_stmt) /* transformation not required. */
5969 {
4a00c761 5970 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5971 if (dump_enabled_p ())
78c60e3d 5972 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5973 "=== vectorizable_operation ===\n");
68435eb2 5974 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5975 return true;
5976 }
5977
67b8dbac 5978 /* Transform. */
ebfd146a 5979
73fbfcad 5980 if (dump_enabled_p ())
78c60e3d 5981 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5982 "transform binary/unary operation.\n");
383d9c83 5983
ebfd146a 5984 /* Handle def. */
00f07b86 5985 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5986
0eb952ea
JJ
5987 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5988 vectors with unsigned elements, but the result is signed. So, we
5989 need to compute the MINUS_EXPR into vectype temporary and
5990 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5991 tree vec_cvt_dest = NULL_TREE;
5992 if (orig_code == POINTER_DIFF_EXPR)
5993 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5994
ebfd146a
IR
5995 /* In case the vectorization factor (VF) is bigger than the number
5996 of elements that we can fit in a vectype (nunits), we have to generate
5997 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5998 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5999 from one copy of the vector stmt to the next, in the field
6000 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6001 stages to find the correct vector defs to be used when vectorizing
6002 stmts that use the defs of the current stmt. The example below
6003 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6004 we need to create 4 vectorized stmts):
6005
6006 before vectorization:
6007 RELATED_STMT VEC_STMT
6008 S1: x = memref - -
6009 S2: z = x + 1 - -
6010
6011 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6012 there):
6013 RELATED_STMT VEC_STMT
6014 VS1_0: vx0 = memref0 VS1_1 -
6015 VS1_1: vx1 = memref1 VS1_2 -
6016 VS1_2: vx2 = memref2 VS1_3 -
6017 VS1_3: vx3 = memref3 - -
6018 S1: x = load - VS1_0
6019 S2: z = x + 1 - -
6020
6021 step2: vectorize stmt S2 (done here):
6022 To vectorize stmt S2 we first need to find the relevant vector
6023 def for the first operand 'x'. This is, as usual, obtained from
6024 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6025 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6026 relevant vector def 'vx0'. Having found 'vx0' we can generate
6027 the vector stmt VS2_0, and as usual, record it in the
6028 STMT_VINFO_VEC_STMT of stmt S2.
6029 When creating the second copy (VS2_1), we obtain the relevant vector
6030 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6031 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6032 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6033 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6034 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6035 chain of stmts and pointers:
6036 RELATED_STMT VEC_STMT
6037 VS1_0: vx0 = memref0 VS1_1 -
6038 VS1_1: vx1 = memref1 VS1_2 -
6039 VS1_2: vx2 = memref2 VS1_3 -
6040 VS1_3: vx3 = memref3 - -
6041 S1: x = load - VS1_0
6042 VS2_0: vz0 = vx0 + v1 VS2_1 -
6043 VS2_1: vz1 = vx1 + v1 VS2_2 -
6044 VS2_2: vz2 = vx2 + v1 VS2_3 -
6045 VS2_3: vz3 = vx3 + v1 - -
6046 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6047
6048 prev_stmt_info = NULL;
6049 for (j = 0; j < ncopies; j++)
6050 {
6051 /* Handle uses. */
6052 if (j == 0)
4a00c761 6053 {
d6476f90 6054 if (op_type == binary_op)
4a00c761 6055 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6056 slp_node);
d6476f90
RB
6057 else if (op_type == ternary_op)
6058 {
6059 if (slp_node)
6060 {
6061 auto_vec<tree> ops(3);
6062 ops.quick_push (op0);
6063 ops.quick_push (op1);
6064 ops.quick_push (op2);
6065 auto_vec<vec<tree> > vec_defs(3);
6066 vect_get_slp_defs (ops, slp_node, &vec_defs);
6067 vec_oprnds0 = vec_defs[0];
6068 vec_oprnds1 = vec_defs[1];
6069 vec_oprnds2 = vec_defs[2];
6070 }
6071 else
6072 {
6073 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6074 NULL);
6075 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6076 NULL);
6077 }
6078 }
4a00c761
JJ
6079 else
6080 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6081 slp_node);
4a00c761 6082 }
ebfd146a 6083 else
4a00c761
JJ
6084 {
6085 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6086 if (op_type == ternary_op)
6087 {
9771b263
DN
6088 tree vec_oprnd = vec_oprnds2.pop ();
6089 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6090 vec_oprnd));
4a00c761
JJ
6091 }
6092 }
6093
6094 /* Arguments are ready. Create the new vector stmt. */
9771b263 6095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6096 {
4a00c761 6097 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6098 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6099 vop2 = ((op_type == ternary_op)
9771b263 6100 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6101 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6102 new_temp = make_ssa_name (vec_dest, new_stmt);
6103 gimple_assign_set_lhs (new_stmt, new_temp);
6104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6105 if (vec_cvt_dest)
6106 {
6107 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6108 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6109 new_temp);
6110 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6111 gimple_assign_set_lhs (new_stmt, new_temp);
6112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6113 }
4a00c761 6114 if (slp_node)
9771b263 6115 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6116 }
6117
4a00c761
JJ
6118 if (slp_node)
6119 continue;
6120
6121 if (j == 0)
6122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6123 else
6124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6125 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6126 }
6127
9771b263
DN
6128 vec_oprnds0.release ();
6129 vec_oprnds1.release ();
6130 vec_oprnds2.release ();
ebfd146a 6131
ebfd146a
IR
6132 return true;
6133}
6134
f702e7d4 6135/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6136
6137static void
f702e7d4 6138ensure_base_align (struct data_reference *dr)
c716e67f
XDL
6139{
6140 if (!dr->aux)
6141 return;
6142
52639a61 6143 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6144 {
52639a61 6145 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6146
f702e7d4
RS
6147 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6148
428f0c67 6149 if (decl_in_symtab_p (base_decl))
f702e7d4 6150 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6151 else
6152 {
f702e7d4 6153 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6154 DECL_USER_ALIGN (base_decl) = 1;
6155 }
52639a61 6156 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6157 }
6158}
6159
ebfd146a 6160
44fc7854
BE
6161/* Function get_group_alias_ptr_type.
6162
6163 Return the alias type for the group starting at FIRST_STMT. */
6164
6165static tree
6166get_group_alias_ptr_type (gimple *first_stmt)
6167{
6168 struct data_reference *first_dr, *next_dr;
6169 gimple *next_stmt;
6170
6171 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6172 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
44fc7854
BE
6173 while (next_stmt)
6174 {
6175 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6176 if (get_alias_set (DR_REF (first_dr))
6177 != get_alias_set (DR_REF (next_dr)))
6178 {
6179 if (dump_enabled_p ())
6180 dump_printf_loc (MSG_NOTE, vect_location,
6181 "conflicting alias set types.\n");
6182 return ptr_type_node;
6183 }
2c53b149 6184 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
44fc7854
BE
6185 }
6186 return reference_alias_ptr_type (DR_REF (first_dr));
6187}
6188
6189
ebfd146a
IR
6190/* Function vectorizable_store.
6191
b8698a0f
L
6192 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6193 can be vectorized.
6194 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6195 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6196 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6197
6198static bool
355fe088 6199vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6200 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6201{
ebfd146a
IR
6202 tree data_ref;
6203 tree op;
6204 tree vec_oprnd = NULL_TREE;
6205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6206 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6207 tree elem_type;
ebfd146a 6208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6209 struct loop *loop = NULL;
ef4bddc2 6210 machine_mode vec_mode;
ebfd146a
IR
6211 tree dummy;
6212 enum dr_alignment_support alignment_support_scheme;
355fe088 6213 gimple *def_stmt;
929b4411
RS
6214 enum vect_def_type rhs_dt = vect_unknown_def_type;
6215 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6216 stmt_vec_info prev_stmt_info = NULL;
6217 tree dataref_ptr = NULL_TREE;
74bf76ed 6218 tree dataref_offset = NULL_TREE;
355fe088 6219 gimple *ptr_incr = NULL;
ebfd146a
IR
6220 int ncopies;
6221 int j;
2de001ee
RS
6222 gimple *next_stmt, *first_stmt;
6223 bool grouped_store;
ebfd146a 6224 unsigned int group_size, i;
6e1aa848
DN
6225 vec<tree> oprnds = vNULL;
6226 vec<tree> result_chain = vNULL;
ebfd146a 6227 bool inv_p;
09dfa495 6228 tree offset = NULL_TREE;
6e1aa848 6229 vec<tree> vec_oprnds = vNULL;
ebfd146a 6230 bool slp = (slp_node != NULL);
ebfd146a 6231 unsigned int vec_num;
a70d6342 6232 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6233 vec_info *vinfo = stmt_info->vinfo;
272c6793 6234 tree aggr_type;
134c85ca 6235 gather_scatter_info gs_info;
355fe088 6236 gimple *new_stmt;
d9f21f6a 6237 poly_uint64 vf;
2de001ee 6238 vec_load_store_type vls_type;
44fc7854 6239 tree ref_type;
a70d6342 6240
a70d6342 6241 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6242 return false;
6243
66c16fd9
RB
6244 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6245 && ! vec_stmt)
ebfd146a
IR
6246 return false;
6247
6248 /* Is vectorizable store? */
6249
c3a8f964
RS
6250 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6251 if (is_gimple_assign (stmt))
6252 {
6253 tree scalar_dest = gimple_assign_lhs (stmt);
6254 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6255 && is_pattern_stmt_p (stmt_info))
6256 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6257 if (TREE_CODE (scalar_dest) != ARRAY_REF
6258 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6259 && TREE_CODE (scalar_dest) != INDIRECT_REF
6260 && TREE_CODE (scalar_dest) != COMPONENT_REF
6261 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6262 && TREE_CODE (scalar_dest) != REALPART_EXPR
6263 && TREE_CODE (scalar_dest) != MEM_REF)
6264 return false;
6265 }
6266 else
6267 {
6268 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6269 if (!call || !gimple_call_internal_p (call))
6270 return false;
6271
6272 internal_fn ifn = gimple_call_internal_fn (call);
6273 if (!internal_store_fn_p (ifn))
c3a8f964 6274 return false;
ebfd146a 6275
c3a8f964
RS
6276 if (slp_node != NULL)
6277 {
6278 if (dump_enabled_p ())
6279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6280 "SLP of masked stores not supported.\n");
6281 return false;
6282 }
6283
f307441a
RS
6284 int mask_index = internal_fn_mask_index (ifn);
6285 if (mask_index >= 0)
6286 {
6287 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6288 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6289 &mask_vectype))
f307441a
RS
6290 return false;
6291 }
c3a8f964
RS
6292 }
6293
6294 op = vect_get_store_rhs (stmt);
ebfd146a 6295
fce57248
RS
6296 /* Cannot have hybrid store SLP -- that would mean storing to the
6297 same location twice. */
6298 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6299
f4d09712 6300 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6301 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6302
6303 if (loop_vinfo)
b17dc4d4
RB
6304 {
6305 loop = LOOP_VINFO_LOOP (loop_vinfo);
6306 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6307 }
6308 else
6309 vf = 1;
465c8c19
JJ
6310
6311 /* Multiple types in SLP are handled by creating the appropriate number of
6312 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6313 case of SLP. */
fce57248 6314 if (slp)
465c8c19
JJ
6315 ncopies = 1;
6316 else
e8f142e2 6317 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6318
6319 gcc_assert (ncopies >= 1);
6320
6321 /* FORNOW. This restriction should be relaxed. */
6322 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6323 {
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "multiple types in nested loop.\n");
6327 return false;
6328 }
6329
929b4411 6330 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6331 return false;
6332
272c6793 6333 elem_type = TREE_TYPE (vectype);
ebfd146a 6334 vec_mode = TYPE_MODE (vectype);
7b7b1813 6335
ebfd146a
IR
6336 if (!STMT_VINFO_DATA_REF (stmt_info))
6337 return false;
6338
2de001ee 6339 vect_memory_access_type memory_access_type;
7e11fc7f 6340 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6341 &memory_access_type, &gs_info))
6342 return false;
3bab6342 6343
c3a8f964
RS
6344 if (mask)
6345 {
7e11fc7f
RS
6346 if (memory_access_type == VMAT_CONTIGUOUS)
6347 {
6348 if (!VECTOR_MODE_P (vec_mode)
6349 || !can_vec_mask_load_store_p (vec_mode,
6350 TYPE_MODE (mask_vectype), false))
6351 return false;
6352 }
f307441a
RS
6353 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6354 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6355 {
6356 if (dump_enabled_p ())
6357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6358 "unsupported access type for masked store.\n");
6359 return false;
6360 }
c3a8f964
RS
6361 }
6362 else
6363 {
6364 /* FORNOW. In some cases can vectorize even if data-type not supported
6365 (e.g. - array initialization with 0). */
6366 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6367 return false;
6368 }
6369
f307441a 6370 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6371 && memory_access_type != VMAT_GATHER_SCATTER
6372 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6373 if (grouped_store)
6374 {
2c53b149 6375 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7cfb4d93 6376 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6377 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7cfb4d93
RS
6378 }
6379 else
6380 {
6381 first_stmt = stmt;
6382 first_dr = dr;
6383 group_size = vec_num = 1;
6384 }
6385
ebfd146a
IR
6386 if (!vec_stmt) /* transformation not required. */
6387 {
2de001ee 6388 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6389
6390 if (loop_vinfo
6391 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6392 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6393 memory_access_type, &gs_info);
7cfb4d93 6394
ebfd146a 6395 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6396 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6397 vls_type, slp_node, cost_vec);
ebfd146a
IR
6398 return true;
6399 }
2de001ee 6400 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6401
67b8dbac 6402 /* Transform. */
ebfd146a 6403
f702e7d4 6404 ensure_base_align (dr);
c716e67f 6405
f307441a 6406 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6407 {
c3a8f964 6408 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6409 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6410 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6411 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6412 edge pe = loop_preheader_edge (loop);
6413 gimple_seq seq;
6414 basic_block new_bb;
6415 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6416 poly_uint64 scatter_off_nunits
6417 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6418
4d694b27 6419 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6420 modifier = NONE;
4d694b27 6421 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6422 {
3bab6342
AT
6423 modifier = WIDEN;
6424
4d694b27
RS
6425 /* Currently gathers and scatters are only supported for
6426 fixed-length vectors. */
6427 unsigned int count = scatter_off_nunits.to_constant ();
6428 vec_perm_builder sel (count, count, 1);
6429 for (i = 0; i < (unsigned int) count; ++i)
6430 sel.quick_push (i | (count / 2));
3bab6342 6431
4d694b27 6432 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6433 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6434 indices);
3bab6342
AT
6435 gcc_assert (perm_mask != NULL_TREE);
6436 }
4d694b27 6437 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6438 {
3bab6342
AT
6439 modifier = NARROW;
6440
4d694b27
RS
6441 /* Currently gathers and scatters are only supported for
6442 fixed-length vectors. */
6443 unsigned int count = nunits.to_constant ();
6444 vec_perm_builder sel (count, count, 1);
6445 for (i = 0; i < (unsigned int) count; ++i)
6446 sel.quick_push (i | (count / 2));
3bab6342 6447
4d694b27 6448 vec_perm_indices indices (sel, 2, count);
e3342de4 6449 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6450 gcc_assert (perm_mask != NULL_TREE);
6451 ncopies *= 2;
6452 }
6453 else
6454 gcc_unreachable ();
6455
134c85ca 6456 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6457 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6458 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6459 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6460 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6461 scaletype = TREE_VALUE (arglist);
6462
6463 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6464 && TREE_CODE (rettype) == VOID_TYPE);
6465
134c85ca 6466 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6467 if (!is_gimple_min_invariant (ptr))
6468 {
6469 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6470 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6471 gcc_assert (!new_bb);
6472 }
6473
6474 /* Currently we support only unconditional scatter stores,
6475 so mask should be all ones. */
6476 mask = build_int_cst (masktype, -1);
6477 mask = vect_init_vector (stmt, mask, masktype, NULL);
6478
134c85ca 6479 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6480
6481 prev_stmt_info = NULL;
6482 for (j = 0; j < ncopies; ++j)
6483 {
6484 if (j == 0)
6485 {
6486 src = vec_oprnd1
c3a8f964 6487 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6488 op = vec_oprnd0
134c85ca 6489 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6490 }
6491 else if (modifier != NONE && (j & 1))
6492 {
6493 if (modifier == WIDEN)
6494 {
6495 src = vec_oprnd1
929b4411 6496 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6497 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6498 stmt, gsi);
6499 }
6500 else if (modifier == NARROW)
6501 {
6502 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6503 stmt, gsi);
6504 op = vec_oprnd0
134c85ca
RS
6505 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6506 vec_oprnd0);
3bab6342
AT
6507 }
6508 else
6509 gcc_unreachable ();
6510 }
6511 else
6512 {
6513 src = vec_oprnd1
929b4411 6514 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6515 op = vec_oprnd0
134c85ca
RS
6516 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6517 vec_oprnd0);
3bab6342
AT
6518 }
6519
6520 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6521 {
928686b1
RS
6522 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6523 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6524 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6525 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6526 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6528 src = var;
6529 }
6530
6531 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6532 {
928686b1
RS
6533 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6534 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6535 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6536 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6537 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6538 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6539 op = var;
6540 }
6541
6542 new_stmt
134c85ca 6543 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6544
6545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6546
6547 if (prev_stmt_info == NULL)
6548 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6549 else
6550 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6551 prev_stmt_info = vinfo_for_stmt (new_stmt);
6552 }
6553 return true;
6554 }
6555
f307441a 6556 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6557 {
2c53b149
RB
6558 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6559 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
f307441a 6560 }
ebfd146a 6561
f307441a
RS
6562 if (grouped_store)
6563 {
ebfd146a 6564 /* FORNOW */
a70d6342 6565 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6566
6567 /* We vectorize all the stmts of the interleaving group when we
6568 reach the last stmt in the group. */
2c53b149
RB
6569 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6570 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6571 && !slp)
6572 {
6573 *vec_stmt = NULL;
6574 return true;
6575 }
6576
6577 if (slp)
4b5caab7 6578 {
0d0293ac 6579 grouped_store = false;
4b5caab7
IR
6580 /* VEC_NUM is the number of vect stmts to be created for this
6581 group. */
6582 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6583 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2c53b149 6584 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6585 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6586 op = vect_get_store_rhs (first_stmt);
4b5caab7 6587 }
ebfd146a 6588 else
4b5caab7
IR
6589 /* VEC_NUM is the number of vect stmts to be created for this
6590 group. */
ebfd146a 6591 vec_num = group_size;
44fc7854
BE
6592
6593 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6594 }
b8698a0f 6595 else
7cfb4d93 6596 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6597
73fbfcad 6598 if (dump_enabled_p ())
78c60e3d 6599 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6600 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6601
2de001ee
RS
6602 if (memory_access_type == VMAT_ELEMENTWISE
6603 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6604 {
6605 gimple_stmt_iterator incr_gsi;
6606 bool insert_after;
355fe088 6607 gimple *incr;
f2e2a985
MM
6608 tree offvar;
6609 tree ivstep;
6610 tree running_off;
f2e2a985
MM
6611 tree stride_base, stride_step, alias_off;
6612 tree vec_oprnd;
f502d50e 6613 unsigned int g;
4d694b27
RS
6614 /* Checked by get_load_store_type. */
6615 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6616
7cfb4d93 6617 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6618 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6619
6620 stride_base
6621 = fold_build_pointer_plus
b210f45f 6622 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6623 size_binop (PLUS_EXPR,
b210f45f 6624 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6625 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6626 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6627
6628 /* For a store with loop-invariant (but other than power-of-2)
6629 stride (i.e. not a grouped access) like so:
6630
6631 for (i = 0; i < n; i += stride)
6632 array[i] = ...;
6633
6634 we generate a new induction variable and new stores from
6635 the components of the (vectorized) rhs:
6636
6637 for (j = 0; ; j += VF*stride)
6638 vectemp = ...;
6639 tmp1 = vectemp[0];
6640 array[j] = tmp1;
6641 tmp2 = vectemp[1];
6642 array[j + stride] = tmp2;
6643 ...
6644 */
6645
4d694b27 6646 unsigned nstores = const_nunits;
b17dc4d4 6647 unsigned lnel = 1;
cee62fee 6648 tree ltype = elem_type;
04199738 6649 tree lvectype = vectype;
cee62fee
MM
6650 if (slp)
6651 {
4d694b27
RS
6652 if (group_size < const_nunits
6653 && const_nunits % group_size == 0)
b17dc4d4 6654 {
4d694b27 6655 nstores = const_nunits / group_size;
b17dc4d4
RB
6656 lnel = group_size;
6657 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6658 lvectype = vectype;
6659
6660 /* First check if vec_extract optab doesn't support extraction
6661 of vector elts directly. */
b397965c 6662 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6663 machine_mode vmode;
6664 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6665 || !VECTOR_MODE_P (vmode)
414fef4e 6666 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6667 || (convert_optab_handler (vec_extract_optab,
6668 TYPE_MODE (vectype), vmode)
6669 == CODE_FOR_nothing))
6670 {
6671 /* Try to avoid emitting an extract of vector elements
6672 by performing the extracts using an integer type of the
6673 same size, extracting from a vector of those and then
6674 re-interpreting it as the original vector type if
6675 supported. */
6676 unsigned lsize
6677 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6678 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6679 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6680 /* If we can't construct such a vector fall back to
6681 element extracts from the original vector type and
6682 element size stores. */
4d694b27 6683 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6684 && VECTOR_MODE_P (vmode)
414fef4e 6685 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6686 && (convert_optab_handler (vec_extract_optab,
6687 vmode, elmode)
6688 != CODE_FOR_nothing))
6689 {
4d694b27 6690 nstores = lnunits;
04199738
RB
6691 lnel = group_size;
6692 ltype = build_nonstandard_integer_type (lsize, 1);
6693 lvectype = build_vector_type (ltype, nstores);
6694 }
6695 /* Else fall back to vector extraction anyway.
6696 Fewer stores are more important than avoiding spilling
6697 of the vector we extract from. Compared to the
6698 construction case in vectorizable_load no store-forwarding
6699 issue exists here for reasonable archs. */
6700 }
b17dc4d4 6701 }
4d694b27
RS
6702 else if (group_size >= const_nunits
6703 && group_size % const_nunits == 0)
b17dc4d4
RB
6704 {
6705 nstores = 1;
4d694b27 6706 lnel = const_nunits;
b17dc4d4 6707 ltype = vectype;
04199738 6708 lvectype = vectype;
b17dc4d4 6709 }
cee62fee
MM
6710 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6711 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6712 }
6713
f2e2a985
MM
6714 ivstep = stride_step;
6715 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6716 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6717
6718 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6719
b210f45f
RB
6720 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6721 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6722 create_iv (stride_base, ivstep, NULL,
6723 loop, &incr_gsi, insert_after,
6724 &offvar, NULL);
6725 incr = gsi_stmt (incr_gsi);
310213d4 6726 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6727
b210f45f 6728 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6729
6730 prev_stmt_info = NULL;
44fc7854 6731 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6732 next_stmt = first_stmt;
6733 for (g = 0; g < group_size; g++)
f2e2a985 6734 {
f502d50e
MM
6735 running_off = offvar;
6736 if (g)
f2e2a985 6737 {
f502d50e
MM
6738 tree size = TYPE_SIZE_UNIT (ltype);
6739 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6740 size);
f502d50e 6741 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6742 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6743 running_off, pos);
f2e2a985 6744 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6745 running_off = newoff;
f502d50e 6746 }
b17dc4d4
RB
6747 unsigned int group_el = 0;
6748 unsigned HOST_WIDE_INT
6749 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6750 for (j = 0; j < ncopies; j++)
6751 {
c3a8f964 6752 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6753 and first_stmt == stmt. */
6754 if (j == 0)
6755 {
6756 if (slp)
6757 {
6758 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6759 slp_node);
f502d50e
MM
6760 vec_oprnd = vec_oprnds[0];
6761 }
6762 else
6763 {
c3a8f964 6764 op = vect_get_store_rhs (next_stmt);
81c40241 6765 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6766 }
6767 }
f2e2a985 6768 else
f502d50e
MM
6769 {
6770 if (slp)
6771 vec_oprnd = vec_oprnds[j];
6772 else
c079cbac 6773 {
929b4411
RS
6774 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6775 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6776 vec_oprnd);
c079cbac 6777 }
f502d50e 6778 }
04199738
RB
6779 /* Pun the vector to extract from if necessary. */
6780 if (lvectype != vectype)
6781 {
6782 tree tem = make_ssa_name (lvectype);
6783 gimple *pun
6784 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6785 lvectype, vec_oprnd));
6786 vect_finish_stmt_generation (stmt, pun, gsi);
6787 vec_oprnd = tem;
6788 }
f502d50e
MM
6789 for (i = 0; i < nstores; i++)
6790 {
6791 tree newref, newoff;
355fe088 6792 gimple *incr, *assign;
f502d50e
MM
6793 tree size = TYPE_SIZE (ltype);
6794 /* Extract the i'th component. */
6795 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6796 bitsize_int (i), size);
6797 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6798 size, pos);
6799
6800 elem = force_gimple_operand_gsi (gsi, elem, true,
6801 NULL_TREE, true,
6802 GSI_SAME_STMT);
6803
b17dc4d4
RB
6804 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6805 group_el * elsz);
f502d50e 6806 newref = build2 (MEM_REF, ltype,
b17dc4d4 6807 running_off, this_off);
19986382 6808 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6809
6810 /* And store it to *running_off. */
6811 assign = gimple_build_assign (newref, elem);
6812 vect_finish_stmt_generation (stmt, assign, gsi);
6813
b17dc4d4
RB
6814 group_el += lnel;
6815 if (! slp
6816 || group_el == group_size)
6817 {
6818 newoff = copy_ssa_name (running_off, NULL);
6819 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6820 running_off, stride_step);
6821 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6822
b17dc4d4
RB
6823 running_off = newoff;
6824 group_el = 0;
6825 }
225ce44b
RB
6826 if (g == group_size - 1
6827 && !slp)
f502d50e
MM
6828 {
6829 if (j == 0 && i == 0)
225ce44b
RB
6830 STMT_VINFO_VEC_STMT (stmt_info)
6831 = *vec_stmt = assign;
f502d50e
MM
6832 else
6833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6834 prev_stmt_info = vinfo_for_stmt (assign);
6835 }
6836 }
f2e2a985 6837 }
2c53b149 6838 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6839 if (slp)
6840 break;
f2e2a985 6841 }
778dd3b6
RB
6842
6843 vec_oprnds.release ();
f2e2a985
MM
6844 return true;
6845 }
6846
8c681247 6847 auto_vec<tree> dr_chain (group_size);
9771b263 6848 oprnds.create (group_size);
ebfd146a 6849
720f5239 6850 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6851 gcc_assert (alignment_support_scheme);
70088b95
RS
6852 vec_loop_masks *loop_masks
6853 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6854 ? &LOOP_VINFO_MASKS (loop_vinfo)
6855 : NULL);
272c6793 6856 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6857 realignment. vect_supportable_dr_alignment always returns either
6858 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6859 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6860 && !mask
70088b95 6861 && !loop_masks)
272c6793
RS
6862 || alignment_support_scheme == dr_aligned
6863 || alignment_support_scheme == dr_unaligned_supported);
6864
62da9e14
RS
6865 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6866 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6867 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6868
f307441a
RS
6869 tree bump;
6870 tree vec_offset = NULL_TREE;
6871 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6872 {
6873 aggr_type = NULL_TREE;
6874 bump = NULL_TREE;
6875 }
6876 else if (memory_access_type == VMAT_GATHER_SCATTER)
6877 {
6878 aggr_type = elem_type;
6879 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6880 &bump, &vec_offset);
6881 }
272c6793 6882 else
f307441a
RS
6883 {
6884 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6885 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6886 else
6887 aggr_type = vectype;
6888 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6889 }
ebfd146a 6890
c3a8f964
RS
6891 if (mask)
6892 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6893
ebfd146a
IR
6894 /* In case the vectorization factor (VF) is bigger than the number
6895 of elements that we can fit in a vectype (nunits), we have to generate
6896 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6897 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6898 vect_get_vec_def_for_copy_stmt. */
6899
0d0293ac 6900 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6901
6902 S1: &base + 2 = x2
6903 S2: &base = x0
6904 S3: &base + 1 = x1
6905 S4: &base + 3 = x3
6906
6907 We create vectorized stores starting from base address (the access of the
6908 first stmt in the chain (S2 in the above example), when the last store stmt
6909 of the chain (S4) is reached:
6910
6911 VS1: &base = vx2
6912 VS2: &base + vec_size*1 = vx0
6913 VS3: &base + vec_size*2 = vx1
6914 VS4: &base + vec_size*3 = vx3
6915
6916 Then permutation statements are generated:
6917
3fcc1b55
JJ
6918 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6919 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6920 ...
b8698a0f 6921
ebfd146a
IR
6922 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6923 (the order of the data-refs in the output of vect_permute_store_chain
6924 corresponds to the order of scalar stmts in the interleaving chain - see
6925 the documentation of vect_permute_store_chain()).
6926
6927 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6928 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6929 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6930 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6931 */
6932
6933 prev_stmt_info = NULL;
c3a8f964 6934 tree vec_mask = NULL_TREE;
ebfd146a
IR
6935 for (j = 0; j < ncopies; j++)
6936 {
ebfd146a
IR
6937
6938 if (j == 0)
6939 {
6940 if (slp)
6941 {
6942 /* Get vectorized arguments for SLP_NODE. */
d092494c 6943 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6944 NULL, slp_node);
ebfd146a 6945
9771b263 6946 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6947 }
6948 else
6949 {
b8698a0f
L
6950 /* For interleaved stores we collect vectorized defs for all the
6951 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6952 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6953 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6954
2c53b149 6955 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6956 OPRNDS are of size 1. */
b8698a0f 6957 next_stmt = first_stmt;
ebfd146a
IR
6958 for (i = 0; i < group_size; i++)
6959 {
b8698a0f 6960 /* Since gaps are not supported for interleaved stores,
2c53b149 6961 DR_GROUP_SIZE is the exact number of stmts in the chain.
b8698a0f 6962 Therefore, NEXT_STMT can't be NULL_TREE. In case that
2c53b149 6963 there is no interleaving, DR_GROUP_SIZE is 1, and only one
ebfd146a 6964 iteration of the loop will be executed. */
c3a8f964 6965 op = vect_get_store_rhs (next_stmt);
81c40241 6966 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6967 dr_chain.quick_push (vec_oprnd);
6968 oprnds.quick_push (vec_oprnd);
2c53b149 6969 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6970 }
c3a8f964
RS
6971 if (mask)
6972 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6973 mask_vectype);
ebfd146a
IR
6974 }
6975
6976 /* We should have catched mismatched types earlier. */
6977 gcc_assert (useless_type_conversion_p (vectype,
6978 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6979 bool simd_lane_access_p
6980 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6981 if (simd_lane_access_p
6982 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6983 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6984 && integer_zerop (DR_OFFSET (first_dr))
6985 && integer_zerop (DR_INIT (first_dr))
6986 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6987 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6988 {
6989 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6990 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6991 inv_p = false;
74bf76ed 6992 }
f307441a
RS
6993 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6994 {
6995 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6996 &dataref_ptr, &vec_offset);
6997 inv_p = false;
6998 }
74bf76ed
JJ
6999 else
7000 dataref_ptr
7001 = vect_create_data_ref_ptr (first_stmt, aggr_type,
7002 simd_lane_access_p ? loop : NULL,
09dfa495 7003 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
7004 simd_lane_access_p, &inv_p,
7005 NULL_TREE, bump);
a70d6342 7006 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 7007 }
b8698a0f 7008 else
ebfd146a 7009 {
b8698a0f
L
7010 /* For interleaved stores we created vectorized defs for all the
7011 defs stored in OPRNDS in the previous iteration (previous copy).
7012 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
7013 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7014 next copy.
2c53b149 7015 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
7016 OPRNDS are of size 1. */
7017 for (i = 0; i < group_size; i++)
7018 {
9771b263 7019 op = oprnds[i];
929b4411
RS
7020 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
7021 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
7022 dr_chain[i] = vec_oprnd;
7023 oprnds[i] = vec_oprnd;
ebfd146a 7024 }
c3a8f964 7025 if (mask)
929b4411 7026 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
7027 if (dataref_offset)
7028 dataref_offset
f307441a
RS
7029 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7030 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7031 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7032 vec_offset);
74bf76ed
JJ
7033 else
7034 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7035 bump);
ebfd146a
IR
7036 }
7037
2de001ee 7038 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7039 {
272c6793 7040 tree vec_array;
267d3070 7041
3ba4ff41 7042 /* Get an array into which we can store the individual vectors. */
272c6793 7043 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7044
7045 /* Invalidate the current contents of VEC_ARRAY. This should
7046 become an RTL clobber too, which prevents the vector registers
7047 from being upward-exposed. */
7048 vect_clobber_variable (stmt, gsi, vec_array);
7049
7050 /* Store the individual vectors into the array. */
272c6793 7051 for (i = 0; i < vec_num; i++)
c2d7ab2a 7052 {
9771b263 7053 vec_oprnd = dr_chain[i];
272c6793 7054 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7055 }
b8698a0f 7056
7cfb4d93 7057 tree final_mask = NULL;
70088b95
RS
7058 if (loop_masks)
7059 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7060 vectype, j);
7cfb4d93
RS
7061 if (vec_mask)
7062 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7063 vec_mask, gsi);
7064
7e11fc7f 7065 gcall *call;
7cfb4d93 7066 if (final_mask)
7e11fc7f
RS
7067 {
7068 /* Emit:
7069 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7070 VEC_ARRAY). */
7071 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7072 tree alias_ptr = build_int_cst (ref_type, align);
7073 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7074 dataref_ptr, alias_ptr,
7cfb4d93 7075 final_mask, vec_array);
7e11fc7f
RS
7076 }
7077 else
7078 {
7079 /* Emit:
7080 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7081 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7082 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7083 vec_array);
7084 gimple_call_set_lhs (call, data_ref);
7085 }
a844293d
RS
7086 gimple_call_set_nothrow (call, true);
7087 new_stmt = call;
267d3070 7088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7089
7090 /* Record that VEC_ARRAY is now dead. */
7091 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7092 }
7093 else
7094 {
7095 new_stmt = NULL;
0d0293ac 7096 if (grouped_store)
272c6793 7097 {
b6b9227d
JJ
7098 if (j == 0)
7099 result_chain.create (group_size);
272c6793
RS
7100 /* Permute. */
7101 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7102 &result_chain);
7103 }
c2d7ab2a 7104
272c6793
RS
7105 next_stmt = first_stmt;
7106 for (i = 0; i < vec_num; i++)
7107 {
644ffefd 7108 unsigned align, misalign;
272c6793 7109
7cfb4d93 7110 tree final_mask = NULL_TREE;
70088b95
RS
7111 if (loop_masks)
7112 final_mask = vect_get_loop_mask (gsi, loop_masks,
7113 vec_num * ncopies,
7cfb4d93
RS
7114 vectype, vec_num * j + i);
7115 if (vec_mask)
7116 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7117 vec_mask, gsi);
7118
f307441a
RS
7119 if (memory_access_type == VMAT_GATHER_SCATTER)
7120 {
7121 tree scale = size_int (gs_info.scale);
7122 gcall *call;
70088b95 7123 if (loop_masks)
f307441a
RS
7124 call = gimple_build_call_internal
7125 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7126 scale, vec_oprnd, final_mask);
7127 else
7128 call = gimple_build_call_internal
7129 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7130 scale, vec_oprnd);
7131 gimple_call_set_nothrow (call, true);
7132 new_stmt = call;
7133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7134 break;
7135 }
7136
272c6793
RS
7137 if (i > 0)
7138 /* Bump the vector pointer. */
7139 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7140 stmt, bump);
272c6793
RS
7141
7142 if (slp)
9771b263 7143 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7144 else if (grouped_store)
7145 /* For grouped stores vectorized defs are interleaved in
272c6793 7146 vect_permute_store_chain(). */
9771b263 7147 vec_oprnd = result_chain[i];
272c6793 7148
f702e7d4 7149 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7150 if (aligned_access_p (first_dr))
644ffefd 7151 misalign = 0;
272c6793
RS
7152 else if (DR_MISALIGNMENT (first_dr) == -1)
7153 {
25f68d90 7154 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7155 misalign = 0;
272c6793
RS
7156 }
7157 else
c3a8f964 7158 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7159 if (dataref_offset == NULL_TREE
7160 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7161 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7162 misalign);
c2d7ab2a 7163
62da9e14 7164 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7165 {
7166 tree perm_mask = perm_mask_for_reverse (vectype);
7167 tree perm_dest
c3a8f964 7168 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7169 vectype);
b731b390 7170 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7171
7172 /* Generate the permute statement. */
355fe088 7173 gimple *perm_stmt
0d0e4a03
JJ
7174 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7175 vec_oprnd, perm_mask);
09dfa495
BM
7176 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7177
7178 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7179 vec_oprnd = new_temp;
7180 }
7181
272c6793 7182 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7183 if (final_mask)
c3a8f964
RS
7184 {
7185 align = least_bit_hwi (misalign | align);
7186 tree ptr = build_int_cst (ref_type, align);
7187 gcall *call
7188 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7189 dataref_ptr, ptr,
7cfb4d93 7190 final_mask, vec_oprnd);
c3a8f964
RS
7191 gimple_call_set_nothrow (call, true);
7192 new_stmt = call;
7193 }
7194 else
7195 {
7196 data_ref = fold_build2 (MEM_REF, vectype,
7197 dataref_ptr,
7198 dataref_offset
7199 ? dataref_offset
7200 : build_int_cst (ref_type, 0));
7201 if (aligned_access_p (first_dr))
7202 ;
7203 else if (DR_MISALIGNMENT (first_dr) == -1)
7204 TREE_TYPE (data_ref)
7205 = build_aligned_type (TREE_TYPE (data_ref),
7206 align * BITS_PER_UNIT);
7207 else
7208 TREE_TYPE (data_ref)
7209 = build_aligned_type (TREE_TYPE (data_ref),
7210 TYPE_ALIGN (elem_type));
19986382 7211 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7212 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7213 }
272c6793 7214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7215
7216 if (slp)
7217 continue;
7218
2c53b149 7219 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7220 if (!next_stmt)
7221 break;
7222 }
ebfd146a 7223 }
1da0876c
RS
7224 if (!slp)
7225 {
7226 if (j == 0)
7227 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7228 else
7229 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7230 prev_stmt_info = vinfo_for_stmt (new_stmt);
7231 }
ebfd146a
IR
7232 }
7233
9771b263
DN
7234 oprnds.release ();
7235 result_chain.release ();
7236 vec_oprnds.release ();
ebfd146a
IR
7237
7238 return true;
7239}
7240
557be5a8
AL
7241/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7242 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7243 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7244 vect_gen_perm_mask_checked. */
a1e53f3f 7245
3fcc1b55 7246tree
4aae3cb3 7247vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7248{
b00cb3bf 7249 tree mask_type;
a1e53f3f 7250
0ecc2b7d
RS
7251 poly_uint64 nunits = sel.length ();
7252 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7253
7254 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7255 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7256}
7257
7ac7e286 7258/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7259 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7260
7261tree
4aae3cb3 7262vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7263{
7ac7e286 7264 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7265 return vect_gen_perm_mask_any (vectype, sel);
7266}
7267
aec7ae7d
JJ
7268/* Given a vector variable X and Y, that was generated for the scalar
7269 STMT, generate instructions to permute the vector elements of X and Y
7270 using permutation mask MASK_VEC, insert them at *GSI and return the
7271 permuted vector variable. */
a1e53f3f
L
7272
7273static tree
355fe088 7274permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7275 gimple_stmt_iterator *gsi)
a1e53f3f
L
7276{
7277 tree vectype = TREE_TYPE (x);
aec7ae7d 7278 tree perm_dest, data_ref;
355fe088 7279 gimple *perm_stmt;
a1e53f3f 7280
7ad429a4
RS
7281 tree scalar_dest = gimple_get_lhs (stmt);
7282 if (TREE_CODE (scalar_dest) == SSA_NAME)
7283 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7284 else
7285 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7286 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7287
7288 /* Generate the permute statement. */
0d0e4a03 7289 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7290 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7291
7292 return data_ref;
7293}
7294
6b916b36
RB
7295/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7296 inserting them on the loops preheader edge. Returns true if we
7297 were successful in doing so (and thus STMT can be moved then),
7298 otherwise returns false. */
7299
7300static bool
355fe088 7301hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7302{
7303 ssa_op_iter i;
7304 tree op;
7305 bool any = false;
7306
7307 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7308 {
355fe088 7309 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7310 if (!gimple_nop_p (def_stmt)
7311 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7312 {
7313 /* Make sure we don't need to recurse. While we could do
7314 so in simple cases when there are more complex use webs
7315 we don't have an easy way to preserve stmt order to fulfil
7316 dependencies within them. */
7317 tree op2;
7318 ssa_op_iter i2;
d1417442
JJ
7319 if (gimple_code (def_stmt) == GIMPLE_PHI)
7320 return false;
6b916b36
RB
7321 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7322 {
355fe088 7323 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7324 if (!gimple_nop_p (def_stmt2)
7325 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7326 return false;
7327 }
7328 any = true;
7329 }
7330 }
7331
7332 if (!any)
7333 return true;
7334
7335 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7336 {
355fe088 7337 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7338 if (!gimple_nop_p (def_stmt)
7339 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7340 {
7341 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7342 gsi_remove (&gsi, false);
7343 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7344 }
7345 }
7346
7347 return true;
7348}
7349
ebfd146a
IR
7350/* vectorizable_load.
7351
b8698a0f
L
7352 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7353 can be vectorized.
7354 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7355 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7356 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7357
7358static bool
355fe088 7359vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7360 slp_tree slp_node, slp_instance slp_node_instance,
7361 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7362{
7363 tree scalar_dest;
7364 tree vec_dest = NULL;
7365 tree data_ref = NULL;
7366 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7367 stmt_vec_info prev_stmt_info;
ebfd146a 7368 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7369 struct loop *loop = NULL;
ebfd146a 7370 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7371 bool nested_in_vect_loop = false;
c716e67f 7372 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7373 tree elem_type;
ebfd146a 7374 tree new_temp;
ef4bddc2 7375 machine_mode mode;
355fe088 7376 gimple *new_stmt = NULL;
ebfd146a
IR
7377 tree dummy;
7378 enum dr_alignment_support alignment_support_scheme;
7379 tree dataref_ptr = NULL_TREE;
74bf76ed 7380 tree dataref_offset = NULL_TREE;
355fe088 7381 gimple *ptr_incr = NULL;
ebfd146a 7382 int ncopies;
4d694b27
RS
7383 int i, j;
7384 unsigned int group_size;
7385 poly_uint64 group_gap_adj;
ebfd146a
IR
7386 tree msq = NULL_TREE, lsq;
7387 tree offset = NULL_TREE;
356bbc4c 7388 tree byte_offset = NULL_TREE;
ebfd146a 7389 tree realignment_token = NULL_TREE;
538dd0b7 7390 gphi *phi = NULL;
6e1aa848 7391 vec<tree> dr_chain = vNULL;
0d0293ac 7392 bool grouped_load = false;
355fe088 7393 gimple *first_stmt;
4f0a0218 7394 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7395 bool inv_p;
7396 bool compute_in_loop = false;
7397 struct loop *at_loop;
7398 int vec_num;
7399 bool slp = (slp_node != NULL);
7400 bool slp_perm = false;
a70d6342 7401 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7402 poly_uint64 vf;
272c6793 7403 tree aggr_type;
134c85ca 7404 gather_scatter_info gs_info;
310213d4 7405 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7406 tree ref_type;
929b4411 7407 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7408
465c8c19
JJ
7409 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7410 return false;
7411
66c16fd9
RB
7412 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7413 && ! vec_stmt)
465c8c19
JJ
7414 return false;
7415
c3a8f964
RS
7416 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7417 if (is_gimple_assign (stmt))
7418 {
7419 scalar_dest = gimple_assign_lhs (stmt);
7420 if (TREE_CODE (scalar_dest) != SSA_NAME)
7421 return false;
465c8c19 7422
c3a8f964
RS
7423 tree_code code = gimple_assign_rhs_code (stmt);
7424 if (code != ARRAY_REF
7425 && code != BIT_FIELD_REF
7426 && code != INDIRECT_REF
7427 && code != COMPONENT_REF
7428 && code != IMAGPART_EXPR
7429 && code != REALPART_EXPR
7430 && code != MEM_REF
7431 && TREE_CODE_CLASS (code) != tcc_declaration)
7432 return false;
7433 }
7434 else
7435 {
7436 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7437 if (!call || !gimple_call_internal_p (call))
7438 return false;
7439
7440 internal_fn ifn = gimple_call_internal_fn (call);
7441 if (!internal_load_fn_p (ifn))
c3a8f964 7442 return false;
465c8c19 7443
c3a8f964
RS
7444 scalar_dest = gimple_call_lhs (call);
7445 if (!scalar_dest)
7446 return false;
7447
7448 if (slp_node != NULL)
7449 {
7450 if (dump_enabled_p ())
7451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7452 "SLP of masked loads not supported.\n");
7453 return false;
7454 }
7455
bfaa08b7
RS
7456 int mask_index = internal_fn_mask_index (ifn);
7457 if (mask_index >= 0)
7458 {
7459 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7460 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7461 &mask_vectype))
bfaa08b7
RS
7462 return false;
7463 }
c3a8f964 7464 }
465c8c19
JJ
7465
7466 if (!STMT_VINFO_DATA_REF (stmt_info))
7467 return false;
7468
7469 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7470 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7471
a70d6342
IR
7472 if (loop_vinfo)
7473 {
7474 loop = LOOP_VINFO_LOOP (loop_vinfo);
7475 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7476 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7477 }
7478 else
3533e503 7479 vf = 1;
ebfd146a
IR
7480
7481 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7482 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7483 case of SLP. */
fce57248 7484 if (slp)
ebfd146a
IR
7485 ncopies = 1;
7486 else
e8f142e2 7487 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7488
7489 gcc_assert (ncopies >= 1);
7490
7491 /* FORNOW. This restriction should be relaxed. */
7492 if (nested_in_vect_loop && ncopies > 1)
7493 {
73fbfcad 7494 if (dump_enabled_p ())
78c60e3d 7495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7496 "multiple types in nested loop.\n");
ebfd146a
IR
7497 return false;
7498 }
7499
f2556b68
RB
7500 /* Invalidate assumptions made by dependence analysis when vectorization
7501 on the unrolled body effectively re-orders stmts. */
7502 if (ncopies > 1
7503 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7504 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7505 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7506 {
7507 if (dump_enabled_p ())
7508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7509 "cannot perform implicit CSE when unrolling "
7510 "with negative dependence distance\n");
7511 return false;
7512 }
7513
7b7b1813 7514 elem_type = TREE_TYPE (vectype);
947131ba 7515 mode = TYPE_MODE (vectype);
ebfd146a
IR
7516
7517 /* FORNOW. In some cases can vectorize even if data-type not supported
7518 (e.g. - data copies). */
947131ba 7519 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7520 {
73fbfcad 7521 if (dump_enabled_p ())
78c60e3d 7522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7523 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7524 return false;
7525 }
7526
ebfd146a 7527 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7528 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7529 {
0d0293ac 7530 grouped_load = true;
ebfd146a 7531 /* FORNOW */
2de001ee
RS
7532 gcc_assert (!nested_in_vect_loop);
7533 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7534
2c53b149
RB
7535 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7536 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7537
b1af7da6
RB
7538 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7539 slp_perm = true;
7540
f2556b68
RB
7541 /* Invalidate assumptions made by dependence analysis when vectorization
7542 on the unrolled body effectively re-orders stmts. */
7543 if (!PURE_SLP_STMT (stmt_info)
7544 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7545 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7546 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7547 {
7548 if (dump_enabled_p ())
7549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7550 "cannot perform implicit CSE when performing "
7551 "group loads with negative dependence distance\n");
7552 return false;
7553 }
96bb56b2
RB
7554
7555 /* Similarly when the stmt is a load that is both part of a SLP
7556 instance and a loop vectorized stmt via the same-dr mechanism
7557 we have to give up. */
2c53b149 7558 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2
RB
7559 && (STMT_SLP_TYPE (stmt_info)
7560 != STMT_SLP_TYPE (vinfo_for_stmt
2c53b149 7561 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
96bb56b2
RB
7562 {
7563 if (dump_enabled_p ())
7564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7565 "conflicting SLP types for CSEd load\n");
7566 return false;
7567 }
ebfd146a 7568 }
7cfb4d93
RS
7569 else
7570 group_size = 1;
ebfd146a 7571
2de001ee 7572 vect_memory_access_type memory_access_type;
7e11fc7f 7573 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7574 &memory_access_type, &gs_info))
7575 return false;
a1e53f3f 7576
c3a8f964
RS
7577 if (mask)
7578 {
7579 if (memory_access_type == VMAT_CONTIGUOUS)
7580 {
7e11fc7f
RS
7581 machine_mode vec_mode = TYPE_MODE (vectype);
7582 if (!VECTOR_MODE_P (vec_mode)
7583 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7584 TYPE_MODE (mask_vectype), true))
7585 return false;
7586 }
bfaa08b7 7587 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7588 {
7589 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7590 tree masktype
7591 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7592 if (TREE_CODE (masktype) == INTEGER_TYPE)
7593 {
7594 if (dump_enabled_p ())
7595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7596 "masked gather with integer mask not"
7597 " supported.");
7598 return false;
7599 }
7600 }
bfaa08b7
RS
7601 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7602 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7603 {
7604 if (dump_enabled_p ())
7605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7606 "unsupported access type for masked load.\n");
7607 return false;
7608 }
7609 }
7610
ebfd146a
IR
7611 if (!vec_stmt) /* transformation not required. */
7612 {
2de001ee
RS
7613 if (!slp)
7614 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7615
7616 if (loop_vinfo
7617 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7618 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7619 memory_access_type, &gs_info);
7cfb4d93 7620
ebfd146a 7621 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7622 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7623 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7624 return true;
7625 }
7626
2de001ee
RS
7627 if (!slp)
7628 gcc_assert (memory_access_type
7629 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7630
73fbfcad 7631 if (dump_enabled_p ())
78c60e3d 7632 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7633 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7634
67b8dbac 7635 /* Transform. */
ebfd146a 7636
f702e7d4 7637 ensure_base_align (dr);
c716e67f 7638
bfaa08b7 7639 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7640 {
929b4411
RS
7641 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7642 mask_dt);
aec7ae7d
JJ
7643 return true;
7644 }
2de001ee
RS
7645
7646 if (memory_access_type == VMAT_ELEMENTWISE
7647 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7648 {
7649 gimple_stmt_iterator incr_gsi;
7650 bool insert_after;
355fe088 7651 gimple *incr;
7d75abc8 7652 tree offvar;
7d75abc8
MM
7653 tree ivstep;
7654 tree running_off;
9771b263 7655 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7656 tree stride_base, stride_step, alias_off;
4d694b27
RS
7657 /* Checked by get_load_store_type. */
7658 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7659 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7660
7cfb4d93 7661 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7662 gcc_assert (!nested_in_vect_loop);
7d75abc8 7663
b210f45f 7664 if (grouped_load)
44fc7854 7665 {
2c53b149 7666 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7667 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7668 }
ab313a8c 7669 else
44fc7854
BE
7670 {
7671 first_stmt = stmt;
7672 first_dr = dr;
b210f45f
RB
7673 }
7674 if (slp && grouped_load)
7675 {
2c53b149 7676 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
b210f45f
RB
7677 ref_type = get_group_alias_ptr_type (first_stmt);
7678 }
7679 else
7680 {
7681 if (grouped_load)
7682 cst_offset
7683 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7684 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7685 group_size = 1;
b210f45f 7686 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7687 }
ab313a8c 7688
14ac6aa2
RB
7689 stride_base
7690 = fold_build_pointer_plus
ab313a8c 7691 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7692 size_binop (PLUS_EXPR,
ab313a8c
RB
7693 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7694 convert_to_ptrofftype (DR_INIT (first_dr))));
7695 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7696
7697 /* For a load with loop-invariant (but other than power-of-2)
7698 stride (i.e. not a grouped access) like so:
7699
7700 for (i = 0; i < n; i += stride)
7701 ... = array[i];
7702
7703 we generate a new induction variable and new accesses to
7704 form a new vector (or vectors, depending on ncopies):
7705
7706 for (j = 0; ; j += VF*stride)
7707 tmp1 = array[j];
7708 tmp2 = array[j + stride];
7709 ...
7710 vectemp = {tmp1, tmp2, ...}
7711 */
7712
ab313a8c
RB
7713 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7714 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7715
7716 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7717
b210f45f
RB
7718 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7719 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7720 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7721 loop, &incr_gsi, insert_after,
7722 &offvar, NULL);
7723 incr = gsi_stmt (incr_gsi);
310213d4 7724 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7725
b210f45f 7726 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7727
7728 prev_stmt_info = NULL;
7729 running_off = offvar;
44fc7854 7730 alias_off = build_int_cst (ref_type, 0);
4d694b27 7731 int nloads = const_nunits;
e09b4c37 7732 int lnel = 1;
7b5fc413 7733 tree ltype = TREE_TYPE (vectype);
ea60dd34 7734 tree lvectype = vectype;
b266b968 7735 auto_vec<tree> dr_chain;
2de001ee 7736 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7737 {
4d694b27 7738 if (group_size < const_nunits)
e09b4c37 7739 {
ff03930a
JJ
7740 /* First check if vec_init optab supports construction from
7741 vector elts directly. */
b397965c 7742 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7743 machine_mode vmode;
7744 if (mode_for_vector (elmode, group_size).exists (&vmode)
7745 && VECTOR_MODE_P (vmode)
414fef4e 7746 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7747 && (convert_optab_handler (vec_init_optab,
7748 TYPE_MODE (vectype), vmode)
7749 != CODE_FOR_nothing))
ea60dd34 7750 {
4d694b27 7751 nloads = const_nunits / group_size;
ea60dd34 7752 lnel = group_size;
ff03930a
JJ
7753 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7754 }
7755 else
7756 {
7757 /* Otherwise avoid emitting a constructor of vector elements
7758 by performing the loads using an integer type of the same
7759 size, constructing a vector of those and then
7760 re-interpreting it as the original vector type.
7761 This avoids a huge runtime penalty due to the general
7762 inability to perform store forwarding from smaller stores
7763 to a larger load. */
7764 unsigned lsize
7765 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7766 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7767 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7768 /* If we can't construct such a vector fall back to
7769 element loads of the original vector type. */
4d694b27 7770 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7771 && VECTOR_MODE_P (vmode)
414fef4e 7772 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7773 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7774 != CODE_FOR_nothing))
7775 {
4d694b27 7776 nloads = lnunits;
ff03930a
JJ
7777 lnel = group_size;
7778 ltype = build_nonstandard_integer_type (lsize, 1);
7779 lvectype = build_vector_type (ltype, nloads);
7780 }
ea60dd34 7781 }
e09b4c37 7782 }
2de001ee 7783 else
e09b4c37 7784 {
ea60dd34 7785 nloads = 1;
4d694b27 7786 lnel = const_nunits;
e09b4c37 7787 ltype = vectype;
e09b4c37 7788 }
2de001ee
RS
7789 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7790 }
bb4e4747
BC
7791 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7792 else if (nloads == 1)
7793 ltype = vectype;
7794
2de001ee
RS
7795 if (slp)
7796 {
66c16fd9
RB
7797 /* For SLP permutation support we need to load the whole group,
7798 not only the number of vector stmts the permutation result
7799 fits in. */
b266b968 7800 if (slp_perm)
66c16fd9 7801 {
d9f21f6a
RS
7802 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7803 variable VF. */
7804 unsigned int const_vf = vf.to_constant ();
4d694b27 7805 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7806 dr_chain.create (ncopies);
7807 }
7808 else
7809 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7810 }
4d694b27 7811 unsigned int group_el = 0;
e09b4c37
RB
7812 unsigned HOST_WIDE_INT
7813 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7814 for (j = 0; j < ncopies; j++)
7815 {
7b5fc413 7816 if (nloads > 1)
e09b4c37
RB
7817 vec_alloc (v, nloads);
7818 for (i = 0; i < nloads; i++)
7b5fc413 7819 {
e09b4c37 7820 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7821 group_el * elsz + cst_offset);
19986382
RB
7822 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7823 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7824 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7825 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7826 if (nloads > 1)
7827 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7828 gimple_assign_lhs (new_stmt));
7829
7830 group_el += lnel;
7831 if (! slp
7832 || group_el == group_size)
7b5fc413 7833 {
e09b4c37
RB
7834 tree newoff = copy_ssa_name (running_off);
7835 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7836 running_off, stride_step);
7b5fc413
RB
7837 vect_finish_stmt_generation (stmt, incr, gsi);
7838
7839 running_off = newoff;
e09b4c37 7840 group_el = 0;
7b5fc413 7841 }
7b5fc413 7842 }
e09b4c37 7843 if (nloads > 1)
7d75abc8 7844 {
ea60dd34
RB
7845 tree vec_inv = build_constructor (lvectype, v);
7846 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7847 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7848 if (lvectype != vectype)
7849 {
7850 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7851 VIEW_CONVERT_EXPR,
7852 build1 (VIEW_CONVERT_EXPR,
7853 vectype, new_temp));
7854 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7855 }
7d75abc8
MM
7856 }
7857
7b5fc413 7858 if (slp)
b266b968 7859 {
b266b968
RB
7860 if (slp_perm)
7861 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7862 else
7863 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7864 }
7d75abc8 7865 else
225ce44b
RB
7866 {
7867 if (j == 0)
7868 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7869 else
7870 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7871 prev_stmt_info = vinfo_for_stmt (new_stmt);
7872 }
7d75abc8 7873 }
b266b968 7874 if (slp_perm)
29afecdf
RB
7875 {
7876 unsigned n_perms;
7877 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7878 slp_node_instance, false, &n_perms);
7879 }
7d75abc8
MM
7880 return true;
7881 }
aec7ae7d 7882
b5ec4de7
RS
7883 if (memory_access_type == VMAT_GATHER_SCATTER
7884 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7885 grouped_load = false;
7886
0d0293ac 7887 if (grouped_load)
ebfd146a 7888 {
2c53b149
RB
7889 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7890 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7891 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7892 without permutation. */
7893 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7894 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7895 /* For BB vectorization always use the first stmt to base
7896 the data ref pointer on. */
7897 if (bb_vinfo)
7898 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7899
ebfd146a 7900 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7901 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7902 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7903 ??? But we can only do so if there is exactly one
7904 as we have no way to get at the rest. Leave the CSE
7905 opportunity alone.
7906 ??? With the group load eventually participating
7907 in multiple different permutations (having multiple
7908 slp nodes which refer to the same group) the CSE
7909 is even wrong code. See PR56270. */
7910 && !slp)
ebfd146a
IR
7911 {
7912 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7913 return true;
7914 }
7915 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7916 group_gap_adj = 0;
ebfd146a
IR
7917
7918 /* VEC_NUM is the number of vect stmts to be created for this group. */
7919 if (slp)
7920 {
0d0293ac 7921 grouped_load = false;
91ff1504
RB
7922 /* For SLP permutation support we need to load the whole group,
7923 not only the number of vector stmts the permutation result
7924 fits in. */
7925 if (slp_perm)
b267968e 7926 {
d9f21f6a
RS
7927 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7928 variable VF. */
7929 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7930 unsigned int const_nunits = nunits.to_constant ();
7931 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7932 group_gap_adj = vf * group_size - nunits * vec_num;
7933 }
91ff1504 7934 else
b267968e
RB
7935 {
7936 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7937 group_gap_adj
7938 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7939 }
a70d6342 7940 }
ebfd146a 7941 else
9b999e8c 7942 vec_num = group_size;
44fc7854
BE
7943
7944 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7945 }
7946 else
7947 {
7948 first_stmt = stmt;
7949 first_dr = dr;
7950 group_size = vec_num = 1;
9b999e8c 7951 group_gap_adj = 0;
44fc7854 7952 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7953 }
7954
720f5239 7955 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7956 gcc_assert (alignment_support_scheme);
70088b95
RS
7957 vec_loop_masks *loop_masks
7958 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7959 ? &LOOP_VINFO_MASKS (loop_vinfo)
7960 : NULL);
7cfb4d93
RS
7961 /* Targets with store-lane instructions must not require explicit
7962 realignment. vect_supportable_dr_alignment always returns either
7963 dr_aligned or dr_unaligned_supported for masked operations. */
7964 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7965 && !mask
70088b95 7966 && !loop_masks)
272c6793
RS
7967 || alignment_support_scheme == dr_aligned
7968 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7969
7970 /* In case the vectorization factor (VF) is bigger than the number
7971 of elements that we can fit in a vectype (nunits), we have to generate
7972 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7973 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7974 from one copy of the vector stmt to the next, in the field
ff802fa1 7975 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7976 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7977 stmts that use the defs of the current stmt. The example below
7978 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7979 need to create 4 vectorized stmts):
ebfd146a
IR
7980
7981 before vectorization:
7982 RELATED_STMT VEC_STMT
7983 S1: x = memref - -
7984 S2: z = x + 1 - -
7985
7986 step 1: vectorize stmt S1:
7987 We first create the vector stmt VS1_0, and, as usual, record a
7988 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7989 Next, we create the vector stmt VS1_1, and record a pointer to
7990 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7991 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7992 stmts and pointers:
7993 RELATED_STMT VEC_STMT
7994 VS1_0: vx0 = memref0 VS1_1 -
7995 VS1_1: vx1 = memref1 VS1_2 -
7996 VS1_2: vx2 = memref2 VS1_3 -
7997 VS1_3: vx3 = memref3 - -
7998 S1: x = load - VS1_0
7999 S2: z = x + 1 - -
8000
b8698a0f
L
8001 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8002 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8003 stmt S2. */
8004
0d0293ac 8005 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8006
8007 S1: x2 = &base + 2
8008 S2: x0 = &base
8009 S3: x1 = &base + 1
8010 S4: x3 = &base + 3
8011
b8698a0f 8012 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8013 starting from the access of the first stmt of the chain:
8014
8015 VS1: vx0 = &base
8016 VS2: vx1 = &base + vec_size*1
8017 VS3: vx3 = &base + vec_size*2
8018 VS4: vx4 = &base + vec_size*3
8019
8020 Then permutation statements are generated:
8021
e2c83630
RH
8022 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8023 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8024 ...
8025
8026 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8027 (the order of the data-refs in the output of vect_permute_load_chain
8028 corresponds to the order of scalar stmts in the interleaving chain - see
8029 the documentation of vect_permute_load_chain()).
8030 The generation of permutation stmts and recording them in
0d0293ac 8031 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8032
b8698a0f 8033 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8034 permutation stmts above are created for every copy. The result vector
8035 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8036 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8037
8038 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8039 on a target that supports unaligned accesses (dr_unaligned_supported)
8040 we generate the following code:
8041 p = initial_addr;
8042 indx = 0;
8043 loop {
8044 p = p + indx * vectype_size;
8045 vec_dest = *(p);
8046 indx = indx + 1;
8047 }
8048
8049 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8050 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8051 then generate the following code, in which the data in each iteration is
8052 obtained by two vector loads, one from the previous iteration, and one
8053 from the current iteration:
8054 p1 = initial_addr;
8055 msq_init = *(floor(p1))
8056 p2 = initial_addr + VS - 1;
8057 realignment_token = call target_builtin;
8058 indx = 0;
8059 loop {
8060 p2 = p2 + indx * vectype_size
8061 lsq = *(floor(p2))
8062 vec_dest = realign_load (msq, lsq, realignment_token)
8063 indx = indx + 1;
8064 msq = lsq;
8065 } */
8066
8067 /* If the misalignment remains the same throughout the execution of the
8068 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8069 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8070 This can only occur when vectorizing memory accesses in the inner-loop
8071 nested within an outer-loop that is being vectorized. */
8072
d1e4b493 8073 if (nested_in_vect_loop
cf098191
RS
8074 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8075 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8076 {
8077 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8078 compute_in_loop = true;
8079 }
8080
8081 if ((alignment_support_scheme == dr_explicit_realign_optimized
8082 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8083 && !compute_in_loop)
ebfd146a
IR
8084 {
8085 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8086 alignment_support_scheme, NULL_TREE,
8087 &at_loop);
8088 if (alignment_support_scheme == dr_explicit_realign_optimized)
8089 {
538dd0b7 8090 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8091 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8092 size_one_node);
ebfd146a
IR
8093 }
8094 }
8095 else
8096 at_loop = loop;
8097
62da9e14 8098 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8099 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8100
ab2fc782
RS
8101 tree bump;
8102 tree vec_offset = NULL_TREE;
8103 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8104 {
8105 aggr_type = NULL_TREE;
8106 bump = NULL_TREE;
8107 }
8108 else if (memory_access_type == VMAT_GATHER_SCATTER)
8109 {
8110 aggr_type = elem_type;
8111 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8112 &bump, &vec_offset);
8113 }
272c6793 8114 else
ab2fc782
RS
8115 {
8116 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8117 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8118 else
8119 aggr_type = vectype;
8120 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8121 }
272c6793 8122
c3a8f964 8123 tree vec_mask = NULL_TREE;
ebfd146a 8124 prev_stmt_info = NULL;
4d694b27 8125 poly_uint64 group_elt = 0;
ebfd146a 8126 for (j = 0; j < ncopies; j++)
b8698a0f 8127 {
272c6793 8128 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8129 if (j == 0)
74bf76ed
JJ
8130 {
8131 bool simd_lane_access_p
8132 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8133 if (simd_lane_access_p
8134 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8135 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8136 && integer_zerop (DR_OFFSET (first_dr))
8137 && integer_zerop (DR_INIT (first_dr))
8138 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8139 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8140 && (alignment_support_scheme == dr_aligned
8141 || alignment_support_scheme == dr_unaligned_supported))
8142 {
8143 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8144 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8145 inv_p = false;
74bf76ed 8146 }
4f0a0218
RB
8147 else if (first_stmt_for_drptr
8148 && first_stmt != first_stmt_for_drptr)
8149 {
8150 dataref_ptr
8151 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8152 at_loop, offset, &dummy, gsi,
8153 &ptr_incr, simd_lane_access_p,
ab2fc782 8154 &inv_p, byte_offset, bump);
4f0a0218
RB
8155 /* Adjust the pointer by the difference to first_stmt. */
8156 data_reference_p ptrdr
8157 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8158 tree diff = fold_convert (sizetype,
8159 size_binop (MINUS_EXPR,
8160 DR_INIT (first_dr),
8161 DR_INIT (ptrdr)));
8162 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8163 stmt, diff);
8164 }
bfaa08b7
RS
8165 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8166 {
8167 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8168 &dataref_ptr, &vec_offset);
8169 inv_p = false;
8170 }
74bf76ed
JJ
8171 else
8172 dataref_ptr
8173 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8174 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8175 simd_lane_access_p, &inv_p,
ab2fc782 8176 byte_offset, bump);
c3a8f964
RS
8177 if (mask)
8178 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8179 mask_vectype);
74bf76ed 8180 }
ebfd146a 8181 else
c3a8f964
RS
8182 {
8183 if (dataref_offset)
8184 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8185 bump);
bfaa08b7 8186 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8187 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8188 vec_offset);
c3a8f964 8189 else
ab2fc782
RS
8190 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8191 stmt, bump);
c3a8f964 8192 if (mask)
929b4411 8193 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8194 }
ebfd146a 8195
0d0293ac 8196 if (grouped_load || slp_perm)
9771b263 8197 dr_chain.create (vec_num);
5ce1ee7f 8198
2de001ee 8199 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8200 {
272c6793
RS
8201 tree vec_array;
8202
8203 vec_array = create_vector_array (vectype, vec_num);
8204
7cfb4d93 8205 tree final_mask = NULL_TREE;
70088b95
RS
8206 if (loop_masks)
8207 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8208 vectype, j);
7cfb4d93
RS
8209 if (vec_mask)
8210 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8211 vec_mask, gsi);
8212
7e11fc7f 8213 gcall *call;
7cfb4d93 8214 if (final_mask)
7e11fc7f
RS
8215 {
8216 /* Emit:
8217 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8218 VEC_MASK). */
8219 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8220 tree alias_ptr = build_int_cst (ref_type, align);
8221 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8222 dataref_ptr, alias_ptr,
7cfb4d93 8223 final_mask);
7e11fc7f
RS
8224 }
8225 else
8226 {
8227 /* Emit:
8228 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8229 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8230 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8231 }
a844293d
RS
8232 gimple_call_set_lhs (call, vec_array);
8233 gimple_call_set_nothrow (call, true);
8234 new_stmt = call;
272c6793 8235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8236
272c6793
RS
8237 /* Extract each vector into an SSA_NAME. */
8238 for (i = 0; i < vec_num; i++)
ebfd146a 8239 {
272c6793
RS
8240 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8241 vec_array, i);
9771b263 8242 dr_chain.quick_push (new_temp);
272c6793
RS
8243 }
8244
8245 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8246 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8247
8248 /* Record that VEC_ARRAY is now dead. */
8249 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8250 }
8251 else
8252 {
8253 for (i = 0; i < vec_num; i++)
8254 {
7cfb4d93 8255 tree final_mask = NULL_TREE;
70088b95 8256 if (loop_masks
7cfb4d93 8257 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8258 final_mask = vect_get_loop_mask (gsi, loop_masks,
8259 vec_num * ncopies,
7cfb4d93
RS
8260 vectype, vec_num * j + i);
8261 if (vec_mask)
8262 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8263 vec_mask, gsi);
8264
272c6793
RS
8265 if (i > 0)
8266 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8267 stmt, bump);
272c6793
RS
8268
8269 /* 2. Create the vector-load in the loop. */
8270 switch (alignment_support_scheme)
8271 {
8272 case dr_aligned:
8273 case dr_unaligned_supported:
be1ac4ec 8274 {
644ffefd
MJ
8275 unsigned int align, misalign;
8276
bfaa08b7
RS
8277 if (memory_access_type == VMAT_GATHER_SCATTER)
8278 {
8279 tree scale = size_int (gs_info.scale);
8280 gcall *call;
70088b95 8281 if (loop_masks)
bfaa08b7
RS
8282 call = gimple_build_call_internal
8283 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8284 vec_offset, scale, final_mask);
8285 else
8286 call = gimple_build_call_internal
8287 (IFN_GATHER_LOAD, 3, dataref_ptr,
8288 vec_offset, scale);
8289 gimple_call_set_nothrow (call, true);
8290 new_stmt = call;
8291 data_ref = NULL_TREE;
8292 break;
8293 }
8294
f702e7d4 8295 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8296 if (alignment_support_scheme == dr_aligned)
8297 {
8298 gcc_assert (aligned_access_p (first_dr));
644ffefd 8299 misalign = 0;
272c6793
RS
8300 }
8301 else if (DR_MISALIGNMENT (first_dr) == -1)
8302 {
25f68d90 8303 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8304 misalign = 0;
272c6793
RS
8305 }
8306 else
c3a8f964 8307 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8308 if (dataref_offset == NULL_TREE
8309 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8310 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8311 align, misalign);
c3a8f964 8312
7cfb4d93 8313 if (final_mask)
c3a8f964
RS
8314 {
8315 align = least_bit_hwi (misalign | align);
8316 tree ptr = build_int_cst (ref_type, align);
8317 gcall *call
8318 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8319 dataref_ptr, ptr,
7cfb4d93 8320 final_mask);
c3a8f964
RS
8321 gimple_call_set_nothrow (call, true);
8322 new_stmt = call;
8323 data_ref = NULL_TREE;
8324 }
8325 else
8326 {
8327 data_ref
8328 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8329 dataref_offset
8330 ? dataref_offset
8331 : build_int_cst (ref_type, 0));
8332 if (alignment_support_scheme == dr_aligned)
8333 ;
8334 else if (DR_MISALIGNMENT (first_dr) == -1)
8335 TREE_TYPE (data_ref)
8336 = build_aligned_type (TREE_TYPE (data_ref),
8337 align * BITS_PER_UNIT);
8338 else
8339 TREE_TYPE (data_ref)
8340 = build_aligned_type (TREE_TYPE (data_ref),
8341 TYPE_ALIGN (elem_type));
8342 }
272c6793 8343 break;
be1ac4ec 8344 }
272c6793 8345 case dr_explicit_realign:
267d3070 8346 {
272c6793 8347 tree ptr, bump;
272c6793 8348
d88981fc 8349 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8350
8351 if (compute_in_loop)
8352 msq = vect_setup_realignment (first_stmt, gsi,
8353 &realignment_token,
8354 dr_explicit_realign,
8355 dataref_ptr, NULL);
8356
aed93b23
RB
8357 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8358 ptr = copy_ssa_name (dataref_ptr);
8359 else
8360 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8361 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8362 new_stmt = gimple_build_assign
8363 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8364 build_int_cst
8365 (TREE_TYPE (dataref_ptr),
f702e7d4 8366 -(HOST_WIDE_INT) align));
272c6793
RS
8367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8368 data_ref
8369 = build2 (MEM_REF, vectype, ptr,
44fc7854 8370 build_int_cst (ref_type, 0));
19986382 8371 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8372 vec_dest = vect_create_destination_var (scalar_dest,
8373 vectype);
8374 new_stmt = gimple_build_assign (vec_dest, data_ref);
8375 new_temp = make_ssa_name (vec_dest, new_stmt);
8376 gimple_assign_set_lhs (new_stmt, new_temp);
8377 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8378 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8380 msq = new_temp;
8381
d88981fc 8382 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8383 TYPE_SIZE_UNIT (elem_type));
d88981fc 8384 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8385 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8386 new_stmt = gimple_build_assign
8387 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8388 build_int_cst
f702e7d4 8389 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8390 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8391 gimple_assign_set_lhs (new_stmt, ptr);
8392 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8393 data_ref
8394 = build2 (MEM_REF, vectype, ptr,
44fc7854 8395 build_int_cst (ref_type, 0));
272c6793 8396 break;
267d3070 8397 }
272c6793 8398 case dr_explicit_realign_optimized:
f702e7d4
RS
8399 {
8400 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8401 new_temp = copy_ssa_name (dataref_ptr);
8402 else
8403 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8404 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8405 new_stmt = gimple_build_assign
8406 (new_temp, BIT_AND_EXPR, dataref_ptr,
8407 build_int_cst (TREE_TYPE (dataref_ptr),
8408 -(HOST_WIDE_INT) align));
8409 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8410 data_ref
8411 = build2 (MEM_REF, vectype, new_temp,
8412 build_int_cst (ref_type, 0));
8413 break;
8414 }
272c6793
RS
8415 default:
8416 gcc_unreachable ();
8417 }
ebfd146a 8418 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8419 /* DATA_REF is null if we've already built the statement. */
8420 if (data_ref)
19986382
RB
8421 {
8422 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8423 new_stmt = gimple_build_assign (vec_dest, data_ref);
8424 }
ebfd146a 8425 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8426 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8427 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8428
272c6793
RS
8429 /* 3. Handle explicit realignment if necessary/supported.
8430 Create in loop:
8431 vec_dest = realign_load (msq, lsq, realignment_token) */
8432 if (alignment_support_scheme == dr_explicit_realign_optimized
8433 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8434 {
272c6793
RS
8435 lsq = gimple_assign_lhs (new_stmt);
8436 if (!realignment_token)
8437 realignment_token = dataref_ptr;
8438 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8439 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8440 msq, lsq, realignment_token);
272c6793
RS
8441 new_temp = make_ssa_name (vec_dest, new_stmt);
8442 gimple_assign_set_lhs (new_stmt, new_temp);
8443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8444
8445 if (alignment_support_scheme == dr_explicit_realign_optimized)
8446 {
8447 gcc_assert (phi);
8448 if (i == vec_num - 1 && j == ncopies - 1)
8449 add_phi_arg (phi, lsq,
8450 loop_latch_edge (containing_loop),
9e227d60 8451 UNKNOWN_LOCATION);
272c6793
RS
8452 msq = lsq;
8453 }
ebfd146a 8454 }
ebfd146a 8455
59fd17e3
RB
8456 /* 4. Handle invariant-load. */
8457 if (inv_p && !bb_vinfo)
8458 {
59fd17e3 8459 gcc_assert (!grouped_load);
d1417442
JJ
8460 /* If we have versioned for aliasing or the loop doesn't
8461 have any data dependencies that would preclude this,
8462 then we are sure this is a loop invariant load and
8463 thus we can insert it on the preheader edge. */
8464 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8465 && !nested_in_vect_loop
6b916b36 8466 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8467 {
8468 if (dump_enabled_p ())
8469 {
8470 dump_printf_loc (MSG_NOTE, vect_location,
8471 "hoisting out of the vectorized "
8472 "loop: ");
8473 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8474 }
b731b390 8475 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8476 gsi_insert_on_edge_immediate
8477 (loop_preheader_edge (loop),
8478 gimple_build_assign (tem,
8479 unshare_expr
8480 (gimple_assign_rhs1 (stmt))));
8481 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8482 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8483 set_vinfo_for_stmt (new_stmt,
8484 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8485 }
8486 else
8487 {
8488 gimple_stmt_iterator gsi2 = *gsi;
8489 gsi_next (&gsi2);
8490 new_temp = vect_init_vector (stmt, scalar_dest,
8491 vectype, &gsi2);
34cd48e5 8492 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8493 }
59fd17e3
RB
8494 }
8495
62da9e14 8496 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8497 {
aec7ae7d
JJ
8498 tree perm_mask = perm_mask_for_reverse (vectype);
8499 new_temp = permute_vec_elements (new_temp, new_temp,
8500 perm_mask, stmt, gsi);
ebfd146a
IR
8501 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8502 }
267d3070 8503
272c6793 8504 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8505 vect_transform_grouped_load (). */
8506 if (grouped_load || slp_perm)
9771b263 8507 dr_chain.quick_push (new_temp);
267d3070 8508
272c6793
RS
8509 /* Store vector loads in the corresponding SLP_NODE. */
8510 if (slp && !slp_perm)
9771b263 8511 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8512
8513 /* With SLP permutation we load the gaps as well, without
8514 we need to skip the gaps after we manage to fully load
2c53b149 8515 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8516 group_elt += nunits;
d9f21f6a
RS
8517 if (maybe_ne (group_gap_adj, 0U)
8518 && !slp_perm
8519 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8520 {
d9f21f6a
RS
8521 poly_wide_int bump_val
8522 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8523 * group_gap_adj);
8e6cdc90 8524 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8525 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8526 stmt, bump);
8527 group_elt = 0;
8528 }
272c6793 8529 }
9b999e8c
RB
8530 /* Bump the vector pointer to account for a gap or for excess
8531 elements loaded for a permuted SLP load. */
d9f21f6a 8532 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8533 {
d9f21f6a
RS
8534 poly_wide_int bump_val
8535 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8536 * group_gap_adj);
8e6cdc90 8537 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8538 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8539 stmt, bump);
8540 }
ebfd146a
IR
8541 }
8542
8543 if (slp && !slp_perm)
8544 continue;
8545
8546 if (slp_perm)
8547 {
29afecdf 8548 unsigned n_perms;
01d8bf07 8549 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8550 slp_node_instance, false,
8551 &n_perms))
ebfd146a 8552 {
9771b263 8553 dr_chain.release ();
ebfd146a
IR
8554 return false;
8555 }
8556 }
8557 else
8558 {
0d0293ac 8559 if (grouped_load)
ebfd146a 8560 {
2de001ee 8561 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8562 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8563 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8564 }
8565 else
8566 {
8567 if (j == 0)
8568 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8569 else
8570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8571 prev_stmt_info = vinfo_for_stmt (new_stmt);
8572 }
8573 }
9771b263 8574 dr_chain.release ();
ebfd146a
IR
8575 }
8576
ebfd146a
IR
8577 return true;
8578}
8579
8580/* Function vect_is_simple_cond.
b8698a0f 8581
ebfd146a
IR
8582 Input:
8583 LOOP - the loop that is being vectorized.
8584 COND - Condition that is checked for simple use.
8585
e9e1d143
RG
8586 Output:
8587 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8588 *DTS - The def types for the arguments of the comparison
e9e1d143 8589
ebfd146a
IR
8590 Returns whether a COND can be vectorized. Checks whether
8591 condition operands are supportable using vec_is_simple_use. */
8592
87aab9b2 8593static bool
4fc5ebf1 8594vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8595 tree *comp_vectype, enum vect_def_type *dts,
8596 tree vectype)
ebfd146a
IR
8597{
8598 tree lhs, rhs;
e9e1d143 8599 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8600
a414c77f
IE
8601 /* Mask case. */
8602 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8603 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
8604 {
8605 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8606 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 8607 &dts[0], comp_vectype)
a414c77f
IE
8608 || !*comp_vectype
8609 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8610 return false;
8611 return true;
8612 }
8613
ebfd146a
IR
8614 if (!COMPARISON_CLASS_P (cond))
8615 return false;
8616
8617 lhs = TREE_OPERAND (cond, 0);
8618 rhs = TREE_OPERAND (cond, 1);
8619
8620 if (TREE_CODE (lhs) == SSA_NAME)
8621 {
355fe088 8622 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 8623 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
8624 return false;
8625 }
4fc5ebf1
JG
8626 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8627 || TREE_CODE (lhs) == FIXED_CST)
8628 dts[0] = vect_constant_def;
8629 else
ebfd146a
IR
8630 return false;
8631
8632 if (TREE_CODE (rhs) == SSA_NAME)
8633 {
355fe088 8634 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 8635 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
8636 return false;
8637 }
4fc5ebf1
JG
8638 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8639 || TREE_CODE (rhs) == FIXED_CST)
8640 dts[1] = vect_constant_def;
8641 else
ebfd146a
IR
8642 return false;
8643
28b33016 8644 if (vectype1 && vectype2
928686b1
RS
8645 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8646 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8647 return false;
8648
e9e1d143 8649 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8650 /* Invariant comparison. */
4515e413 8651 if (! *comp_vectype && vectype)
8da4c8d8
RB
8652 {
8653 tree scalar_type = TREE_TYPE (lhs);
8654 /* If we can widen the comparison to match vectype do so. */
8655 if (INTEGRAL_TYPE_P (scalar_type)
8656 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8657 TYPE_SIZE (TREE_TYPE (vectype))))
8658 scalar_type = build_nonstandard_integer_type
8659 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8660 TYPE_UNSIGNED (scalar_type));
8661 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8662 }
8663
ebfd146a
IR
8664 return true;
8665}
8666
8667/* vectorizable_condition.
8668
b8698a0f
L
8669 Check if STMT is conditional modify expression that can be vectorized.
8670 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8671 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8672 at GSI.
8673
8674 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8675 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8676 else clause if it is 2).
ebfd146a
IR
8677
8678 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8679
4bbe8262 8680bool
355fe088
TS
8681vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8682 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8683 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8684{
8685 tree scalar_dest = NULL_TREE;
8686 tree vec_dest = NULL_TREE;
01216d27
JJ
8687 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8688 tree then_clause, else_clause;
ebfd146a 8689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8690 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8691 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8692 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8693 tree vec_compare;
ebfd146a
IR
8694 tree new_temp;
8695 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8696 enum vect_def_type dts[4]
8697 = {vect_unknown_def_type, vect_unknown_def_type,
8698 vect_unknown_def_type, vect_unknown_def_type};
8699 int ndts = 4;
f7e531cf 8700 int ncopies;
01216d27 8701 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8702 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8703 int i, j;
8704 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8705 vec<tree> vec_oprnds0 = vNULL;
8706 vec<tree> vec_oprnds1 = vNULL;
8707 vec<tree> vec_oprnds2 = vNULL;
8708 vec<tree> vec_oprnds3 = vNULL;
74946978 8709 tree vec_cmp_type;
a414c77f 8710 bool masked = false;
b8698a0f 8711
f7e531cf
IR
8712 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8713 return false;
8714
bb6c2b68
RS
8715 vect_reduction_type reduction_type
8716 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8717 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8718 {
8719 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8720 return false;
ebfd146a 8721
af29617a
AH
8722 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8723 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8724 && reduc_def))
8725 return false;
ebfd146a 8726
af29617a
AH
8727 /* FORNOW: not yet supported. */
8728 if (STMT_VINFO_LIVE_P (stmt_info))
8729 {
8730 if (dump_enabled_p ())
8731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8732 "value used after loop.\n");
8733 return false;
8734 }
ebfd146a
IR
8735 }
8736
8737 /* Is vectorizable conditional operation? */
8738 if (!is_gimple_assign (stmt))
8739 return false;
8740
8741 code = gimple_assign_rhs_code (stmt);
8742
8743 if (code != COND_EXPR)
8744 return false;
8745
465c8c19 8746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8747 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8748
fce57248 8749 if (slp_node)
465c8c19
JJ
8750 ncopies = 1;
8751 else
e8f142e2 8752 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8753
8754 gcc_assert (ncopies >= 1);
8755 if (reduc_index && ncopies > 1)
8756 return false; /* FORNOW */
8757
4e71066d
RG
8758 cond_expr = gimple_assign_rhs1 (stmt);
8759 then_clause = gimple_assign_rhs2 (stmt);
8760 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8761
4fc5ebf1 8762 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8763 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8764 || !comp_vectype)
ebfd146a
IR
8765 return false;
8766
81c40241 8767 gimple *def_stmt;
4fc5ebf1 8768 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8769 &vectype1))
8770 return false;
4fc5ebf1 8771 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8772 &vectype2))
ebfd146a 8773 return false;
2947d3b2
IE
8774
8775 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8776 return false;
8777
8778 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8779 return false;
8780
28b33016
IE
8781 masked = !COMPARISON_CLASS_P (cond_expr);
8782 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8783
74946978
MP
8784 if (vec_cmp_type == NULL_TREE)
8785 return false;
784fb9b3 8786
01216d27
JJ
8787 cond_code = TREE_CODE (cond_expr);
8788 if (!masked)
8789 {
8790 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8791 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8792 }
8793
8794 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8795 {
8796 /* Boolean values may have another representation in vectors
8797 and therefore we prefer bit operations over comparison for
8798 them (which also works for scalar masks). We store opcodes
8799 to use in bitop1 and bitop2. Statement is vectorized as
8800 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8801 depending on bitop1 and bitop2 arity. */
8802 switch (cond_code)
8803 {
8804 case GT_EXPR:
8805 bitop1 = BIT_NOT_EXPR;
8806 bitop2 = BIT_AND_EXPR;
8807 break;
8808 case GE_EXPR:
8809 bitop1 = BIT_NOT_EXPR;
8810 bitop2 = BIT_IOR_EXPR;
8811 break;
8812 case LT_EXPR:
8813 bitop1 = BIT_NOT_EXPR;
8814 bitop2 = BIT_AND_EXPR;
8815 std::swap (cond_expr0, cond_expr1);
8816 break;
8817 case LE_EXPR:
8818 bitop1 = BIT_NOT_EXPR;
8819 bitop2 = BIT_IOR_EXPR;
8820 std::swap (cond_expr0, cond_expr1);
8821 break;
8822 case NE_EXPR:
8823 bitop1 = BIT_XOR_EXPR;
8824 break;
8825 case EQ_EXPR:
8826 bitop1 = BIT_XOR_EXPR;
8827 bitop2 = BIT_NOT_EXPR;
8828 break;
8829 default:
8830 return false;
8831 }
8832 cond_code = SSA_NAME;
8833 }
8834
b8698a0f 8835 if (!vec_stmt)
ebfd146a 8836 {
01216d27
JJ
8837 if (bitop1 != NOP_EXPR)
8838 {
8839 machine_mode mode = TYPE_MODE (comp_vectype);
8840 optab optab;
8841
8842 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8843 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8844 return false;
8845
8846 if (bitop2 != NOP_EXPR)
8847 {
8848 optab = optab_for_tree_code (bitop2, comp_vectype,
8849 optab_default);
8850 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8851 return false;
8852 }
8853 }
4fc5ebf1
JG
8854 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8855 cond_code))
8856 {
68435eb2
RB
8857 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8858 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8859 cost_vec);
4fc5ebf1
JG
8860 return true;
8861 }
8862 return false;
ebfd146a
IR
8863 }
8864
f7e531cf
IR
8865 /* Transform. */
8866
8867 if (!slp_node)
8868 {
9771b263
DN
8869 vec_oprnds0.create (1);
8870 vec_oprnds1.create (1);
8871 vec_oprnds2.create (1);
8872 vec_oprnds3.create (1);
f7e531cf 8873 }
ebfd146a
IR
8874
8875 /* Handle def. */
8876 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8877 if (reduction_type != EXTRACT_LAST_REDUCTION)
8878 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8879
8880 /* Handle cond expr. */
a855b1b1
MM
8881 for (j = 0; j < ncopies; j++)
8882 {
bb6c2b68 8883 gimple *new_stmt = NULL;
a855b1b1
MM
8884 if (j == 0)
8885 {
f7e531cf
IR
8886 if (slp_node)
8887 {
00f96dc9
TS
8888 auto_vec<tree, 4> ops;
8889 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8890
a414c77f 8891 if (masked)
01216d27 8892 ops.safe_push (cond_expr);
a414c77f
IE
8893 else
8894 {
01216d27
JJ
8895 ops.safe_push (cond_expr0);
8896 ops.safe_push (cond_expr1);
a414c77f 8897 }
9771b263
DN
8898 ops.safe_push (then_clause);
8899 ops.safe_push (else_clause);
306b0c92 8900 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8901 vec_oprnds3 = vec_defs.pop ();
8902 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8903 if (!masked)
8904 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8905 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8906 }
8907 else
8908 {
355fe088 8909 gimple *gtemp;
a414c77f
IE
8910 if (masked)
8911 {
8912 vec_cond_lhs
8913 = vect_get_vec_def_for_operand (cond_expr, stmt,
8914 comp_vectype);
8915 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8916 &gtemp, &dts[0]);
8917 }
8918 else
8919 {
01216d27
JJ
8920 vec_cond_lhs
8921 = vect_get_vec_def_for_operand (cond_expr0,
8922 stmt, comp_vectype);
8923 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8924
8925 vec_cond_rhs
8926 = vect_get_vec_def_for_operand (cond_expr1,
8927 stmt, comp_vectype);
8928 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8929 }
f7e531cf
IR
8930 if (reduc_index == 1)
8931 vec_then_clause = reduc_def;
8932 else
8933 {
8934 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8935 stmt);
8936 vect_is_simple_use (then_clause, loop_vinfo,
8937 &gtemp, &dts[2]);
f7e531cf
IR
8938 }
8939 if (reduc_index == 2)
8940 vec_else_clause = reduc_def;
8941 else
8942 {
8943 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8944 stmt);
8945 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8946 }
a855b1b1
MM
8947 }
8948 }
8949 else
8950 {
a414c77f
IE
8951 vec_cond_lhs
8952 = vect_get_vec_def_for_stmt_copy (dts[0],
8953 vec_oprnds0.pop ());
8954 if (!masked)
8955 vec_cond_rhs
8956 = vect_get_vec_def_for_stmt_copy (dts[1],
8957 vec_oprnds1.pop ());
8958
a855b1b1 8959 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8960 vec_oprnds2.pop ());
a855b1b1 8961 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8962 vec_oprnds3.pop ());
f7e531cf
IR
8963 }
8964
8965 if (!slp_node)
8966 {
9771b263 8967 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8968 if (!masked)
8969 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8970 vec_oprnds2.quick_push (vec_then_clause);
8971 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8972 }
8973
9dc3f7de 8974 /* Arguments are ready. Create the new vector stmt. */
9771b263 8975 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8976 {
9771b263
DN
8977 vec_then_clause = vec_oprnds2[i];
8978 vec_else_clause = vec_oprnds3[i];
a855b1b1 8979
a414c77f
IE
8980 if (masked)
8981 vec_compare = vec_cond_lhs;
8982 else
8983 {
8984 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8985 if (bitop1 == NOP_EXPR)
8986 vec_compare = build2 (cond_code, vec_cmp_type,
8987 vec_cond_lhs, vec_cond_rhs);
8988 else
8989 {
8990 new_temp = make_ssa_name (vec_cmp_type);
8991 if (bitop1 == BIT_NOT_EXPR)
8992 new_stmt = gimple_build_assign (new_temp, bitop1,
8993 vec_cond_rhs);
8994 else
8995 new_stmt
8996 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8997 vec_cond_rhs);
8998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8999 if (bitop2 == NOP_EXPR)
9000 vec_compare = new_temp;
9001 else if (bitop2 == BIT_NOT_EXPR)
9002 {
9003 /* Instead of doing ~x ? y : z do x ? z : y. */
9004 vec_compare = new_temp;
9005 std::swap (vec_then_clause, vec_else_clause);
9006 }
9007 else
9008 {
9009 vec_compare = make_ssa_name (vec_cmp_type);
9010 new_stmt
9011 = gimple_build_assign (vec_compare, bitop2,
9012 vec_cond_lhs, new_temp);
9013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9014 }
9015 }
a414c77f 9016 }
bb6c2b68
RS
9017 if (reduction_type == EXTRACT_LAST_REDUCTION)
9018 {
9019 if (!is_gimple_val (vec_compare))
9020 {
9021 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9022 new_stmt = gimple_build_assign (vec_compare_name,
9023 vec_compare);
9024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9025 vec_compare = vec_compare_name;
9026 }
9027 gcc_assert (reduc_index == 2);
9028 new_stmt = gimple_build_call_internal
9029 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9030 vec_then_clause);
9031 gimple_call_set_lhs (new_stmt, scalar_dest);
9032 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9033 if (stmt == gsi_stmt (*gsi))
9034 vect_finish_replace_stmt (stmt, new_stmt);
9035 else
9036 {
9037 /* In this case we're moving the definition to later in the
9038 block. That doesn't matter because the only uses of the
9039 lhs are in phi statements. */
9040 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9041 gsi_remove (&old_gsi, true);
9042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9043 }
9044 }
9045 else
9046 {
9047 new_temp = make_ssa_name (vec_dest);
9048 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9049 vec_compare, vec_then_clause,
9050 vec_else_clause);
9051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9052 }
f7e531cf 9053 if (slp_node)
9771b263 9054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9055 }
9056
9057 if (slp_node)
9058 continue;
9059
9060 if (j == 0)
9061 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9062 else
9063 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9064
9065 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9066 }
b8698a0f 9067
9771b263
DN
9068 vec_oprnds0.release ();
9069 vec_oprnds1.release ();
9070 vec_oprnds2.release ();
9071 vec_oprnds3.release ();
f7e531cf 9072
ebfd146a
IR
9073 return true;
9074}
9075
42fd8198
IE
9076/* vectorizable_comparison.
9077
9078 Check if STMT is comparison expression that can be vectorized.
9079 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9080 comparison, put it in VEC_STMT, and insert it at GSI.
9081
9082 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9083
fce57248 9084static bool
42fd8198
IE
9085vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9086 gimple **vec_stmt, tree reduc_def,
68435eb2 9087 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9088{
9089 tree lhs, rhs1, rhs2;
9090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9091 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9092 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9093 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9094 tree new_temp;
9095 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9096 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9097 int ndts = 2;
928686b1 9098 poly_uint64 nunits;
42fd8198 9099 int ncopies;
49e76ff1 9100 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9101 stmt_vec_info prev_stmt_info = NULL;
9102 int i, j;
9103 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9104 vec<tree> vec_oprnds0 = vNULL;
9105 vec<tree> vec_oprnds1 = vNULL;
9106 gimple *def_stmt;
9107 tree mask_type;
9108 tree mask;
9109
c245362b
IE
9110 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9111 return false;
9112
30480bcd 9113 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9114 return false;
9115
9116 mask_type = vectype;
9117 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9118
fce57248 9119 if (slp_node)
42fd8198
IE
9120 ncopies = 1;
9121 else
e8f142e2 9122 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9123
9124 gcc_assert (ncopies >= 1);
42fd8198
IE
9125 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9126 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9127 && reduc_def))
9128 return false;
9129
9130 if (STMT_VINFO_LIVE_P (stmt_info))
9131 {
9132 if (dump_enabled_p ())
9133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9134 "value used after loop.\n");
9135 return false;
9136 }
9137
9138 if (!is_gimple_assign (stmt))
9139 return false;
9140
9141 code = gimple_assign_rhs_code (stmt);
9142
9143 if (TREE_CODE_CLASS (code) != tcc_comparison)
9144 return false;
9145
9146 rhs1 = gimple_assign_rhs1 (stmt);
9147 rhs2 = gimple_assign_rhs2 (stmt);
9148
9149 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9150 &dts[0], &vectype1))
9151 return false;
9152
9153 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9154 &dts[1], &vectype2))
9155 return false;
9156
9157 if (vectype1 && vectype2
928686b1
RS
9158 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9159 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9160 return false;
9161
9162 vectype = vectype1 ? vectype1 : vectype2;
9163
9164 /* Invariant comparison. */
9165 if (!vectype)
9166 {
69a9a66f 9167 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9168 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9169 return false;
9170 }
928686b1 9171 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9172 return false;
9173
49e76ff1
IE
9174 /* Can't compare mask and non-mask types. */
9175 if (vectype1 && vectype2
9176 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9177 return false;
9178
9179 /* Boolean values may have another representation in vectors
9180 and therefore we prefer bit operations over comparison for
9181 them (which also works for scalar masks). We store opcodes
9182 to use in bitop1 and bitop2. Statement is vectorized as
9183 BITOP2 (rhs1 BITOP1 rhs2) or
9184 rhs1 BITOP2 (BITOP1 rhs2)
9185 depending on bitop1 and bitop2 arity. */
9186 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9187 {
9188 if (code == GT_EXPR)
9189 {
9190 bitop1 = BIT_NOT_EXPR;
9191 bitop2 = BIT_AND_EXPR;
9192 }
9193 else if (code == GE_EXPR)
9194 {
9195 bitop1 = BIT_NOT_EXPR;
9196 bitop2 = BIT_IOR_EXPR;
9197 }
9198 else if (code == LT_EXPR)
9199 {
9200 bitop1 = BIT_NOT_EXPR;
9201 bitop2 = BIT_AND_EXPR;
9202 std::swap (rhs1, rhs2);
264d951a 9203 std::swap (dts[0], dts[1]);
49e76ff1
IE
9204 }
9205 else if (code == LE_EXPR)
9206 {
9207 bitop1 = BIT_NOT_EXPR;
9208 bitop2 = BIT_IOR_EXPR;
9209 std::swap (rhs1, rhs2);
264d951a 9210 std::swap (dts[0], dts[1]);
49e76ff1
IE
9211 }
9212 else
9213 {
9214 bitop1 = BIT_XOR_EXPR;
9215 if (code == EQ_EXPR)
9216 bitop2 = BIT_NOT_EXPR;
9217 }
9218 }
9219
42fd8198
IE
9220 if (!vec_stmt)
9221 {
49e76ff1 9222 if (bitop1 == NOP_EXPR)
68435eb2
RB
9223 {
9224 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9225 return false;
9226 }
49e76ff1
IE
9227 else
9228 {
9229 machine_mode mode = TYPE_MODE (vectype);
9230 optab optab;
9231
9232 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9233 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9234 return false;
9235
9236 if (bitop2 != NOP_EXPR)
9237 {
9238 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9239 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9240 return false;
9241 }
49e76ff1 9242 }
68435eb2
RB
9243
9244 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9245 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9246 dts, ndts, slp_node, cost_vec);
9247 return true;
42fd8198
IE
9248 }
9249
9250 /* Transform. */
9251 if (!slp_node)
9252 {
9253 vec_oprnds0.create (1);
9254 vec_oprnds1.create (1);
9255 }
9256
9257 /* Handle def. */
9258 lhs = gimple_assign_lhs (stmt);
9259 mask = vect_create_destination_var (lhs, mask_type);
9260
9261 /* Handle cmp expr. */
9262 for (j = 0; j < ncopies; j++)
9263 {
9264 gassign *new_stmt = NULL;
9265 if (j == 0)
9266 {
9267 if (slp_node)
9268 {
9269 auto_vec<tree, 2> ops;
9270 auto_vec<vec<tree>, 2> vec_defs;
9271
9272 ops.safe_push (rhs1);
9273 ops.safe_push (rhs2);
306b0c92 9274 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9275 vec_oprnds1 = vec_defs.pop ();
9276 vec_oprnds0 = vec_defs.pop ();
9277 }
9278 else
9279 {
e4af0bc4
IE
9280 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9281 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9282 }
9283 }
9284 else
9285 {
9286 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9287 vec_oprnds0.pop ());
9288 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9289 vec_oprnds1.pop ());
9290 }
9291
9292 if (!slp_node)
9293 {
9294 vec_oprnds0.quick_push (vec_rhs1);
9295 vec_oprnds1.quick_push (vec_rhs2);
9296 }
9297
9298 /* Arguments are ready. Create the new vector stmt. */
9299 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9300 {
9301 vec_rhs2 = vec_oprnds1[i];
9302
9303 new_temp = make_ssa_name (mask);
49e76ff1
IE
9304 if (bitop1 == NOP_EXPR)
9305 {
9306 new_stmt = gimple_build_assign (new_temp, code,
9307 vec_rhs1, vec_rhs2);
9308 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9309 }
9310 else
9311 {
9312 if (bitop1 == BIT_NOT_EXPR)
9313 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9314 else
9315 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9316 vec_rhs2);
9317 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9318 if (bitop2 != NOP_EXPR)
9319 {
9320 tree res = make_ssa_name (mask);
9321 if (bitop2 == BIT_NOT_EXPR)
9322 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9323 else
9324 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9325 new_temp);
9326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9327 }
9328 }
42fd8198
IE
9329 if (slp_node)
9330 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9331 }
9332
9333 if (slp_node)
9334 continue;
9335
9336 if (j == 0)
9337 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9338 else
9339 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9340
9341 prev_stmt_info = vinfo_for_stmt (new_stmt);
9342 }
9343
9344 vec_oprnds0.release ();
9345 vec_oprnds1.release ();
9346
9347 return true;
9348}
ebfd146a 9349
68a0f2ff
RS
9350/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9351 can handle all live statements in the node. Otherwise return true
9352 if STMT is not live or if vectorizable_live_operation can handle it.
9353 GSI and VEC_STMT are as for vectorizable_live_operation. */
9354
9355static bool
9356can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9357 slp_tree slp_node, gimple **vec_stmt,
9358 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9359{
9360 if (slp_node)
9361 {
9362 gimple *slp_stmt;
9363 unsigned int i;
9364 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9365 {
9366 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9367 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9368 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9369 vec_stmt, cost_vec))
68a0f2ff
RS
9370 return false;
9371 }
9372 }
9373 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9374 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9375 cost_vec))
68a0f2ff
RS
9376 return false;
9377
9378 return true;
9379}
9380
8644a673 9381/* Make sure the statement is vectorizable. */
ebfd146a
IR
9382
9383bool
891ad31c 9384vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9385 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9386{
8644a673 9387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9389 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9390 bool ok;
355fe088 9391 gimple *pattern_stmt;
363477c0 9392 gimple_seq pattern_def_seq;
ebfd146a 9393
73fbfcad 9394 if (dump_enabled_p ())
ebfd146a 9395 {
78c60e3d
SS
9396 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9397 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9398 }
ebfd146a 9399
1825a1f3 9400 if (gimple_has_volatile_ops (stmt))
b8698a0f 9401 {
73fbfcad 9402 if (dump_enabled_p ())
78c60e3d 9403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9404 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9405
9406 return false;
9407 }
b8698a0f
L
9408
9409 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9410 to include:
9411 - the COND_EXPR which is the loop exit condition
9412 - any LABEL_EXPRs in the loop
b8698a0f 9413 - computations that are used only for array indexing or loop control.
8644a673 9414 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9415 instance, therefore, all the statements are relevant.
ebfd146a 9416
d092494c 9417 Pattern statement needs to be analyzed instead of the original statement
83197f37 9418 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9419 statements. In basic blocks we are called from some SLP instance
9420 traversal, don't analyze pattern stmts instead, the pattern stmts
9421 already will be part of SLP instance. */
83197f37
IR
9422
9423 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9424 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9425 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9426 {
9d5e7640 9427 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9428 && pattern_stmt
9d5e7640
IR
9429 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9430 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9431 {
83197f37 9432 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9433 stmt = pattern_stmt;
9434 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9435 if (dump_enabled_p ())
9d5e7640 9436 {
78c60e3d
SS
9437 dump_printf_loc (MSG_NOTE, vect_location,
9438 "==> examining pattern statement: ");
9439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9440 }
9441 }
9442 else
9443 {
73fbfcad 9444 if (dump_enabled_p ())
e645e942 9445 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9446
9d5e7640
IR
9447 return true;
9448 }
8644a673 9449 }
83197f37 9450 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9451 && node == NULL
83197f37
IR
9452 && pattern_stmt
9453 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9454 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9455 {
9456 /* Analyze PATTERN_STMT too. */
73fbfcad 9457 if (dump_enabled_p ())
83197f37 9458 {
78c60e3d
SS
9459 dump_printf_loc (MSG_NOTE, vect_location,
9460 "==> examining pattern statement: ");
9461 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9462 }
9463
891ad31c 9464 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9465 node_instance, cost_vec))
83197f37
IR
9466 return false;
9467 }
ebfd146a 9468
1107f3ae 9469 if (is_pattern_stmt_p (stmt_info)
079c527f 9470 && node == NULL
363477c0 9471 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 9472 {
363477c0 9473 gimple_stmt_iterator si;
1107f3ae 9474
363477c0
JJ
9475 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9476 {
355fe088 9477 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
9478 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9479 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9480 {
9481 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 9482 if (dump_enabled_p ())
363477c0 9483 {
78c60e3d
SS
9484 dump_printf_loc (MSG_NOTE, vect_location,
9485 "==> examining pattern def statement: ");
9486 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 9487 }
1107f3ae 9488
363477c0 9489 if (!vect_analyze_stmt (pattern_def_stmt,
68435eb2
RB
9490 need_to_vectorize, node, node_instance,
9491 cost_vec))
363477c0
JJ
9492 return false;
9493 }
9494 }
9495 }
1107f3ae 9496
8644a673
IR
9497 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9498 {
9499 case vect_internal_def:
9500 break;
ebfd146a 9501
8644a673 9502 case vect_reduction_def:
7c5222ff 9503 case vect_nested_cycle:
14a61437
RB
9504 gcc_assert (!bb_vinfo
9505 && (relevance == vect_used_in_outer
9506 || relevance == vect_used_in_outer_by_reduction
9507 || relevance == vect_used_by_reduction
b28ead45
AH
9508 || relevance == vect_unused_in_scope
9509 || relevance == vect_used_only_live));
8644a673
IR
9510 break;
9511
9512 case vect_induction_def:
e7baeb39
RB
9513 gcc_assert (!bb_vinfo);
9514 break;
9515
8644a673
IR
9516 case vect_constant_def:
9517 case vect_external_def:
9518 case vect_unknown_def_type:
9519 default:
9520 gcc_unreachable ();
9521 }
ebfd146a 9522
8644a673 9523 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9524 {
8644a673 9525 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9526 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9527 || (is_gimple_call (stmt)
9528 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9529 *need_to_vectorize = true;
ebfd146a
IR
9530 }
9531
b1af7da6
RB
9532 if (PURE_SLP_STMT (stmt_info) && !node)
9533 {
9534 dump_printf_loc (MSG_NOTE, vect_location,
9535 "handled only by SLP analysis\n");
9536 return true;
9537 }
9538
9539 ok = true;
9540 if (!bb_vinfo
9541 && (STMT_VINFO_RELEVANT_P (stmt_info)
9542 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9543 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9544 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9545 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9546 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9547 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9548 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9549 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9550 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9551 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9552 cost_vec)
9553 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9554 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9555 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9556 else
9557 {
9558 if (bb_vinfo)
68435eb2
RB
9559 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9560 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9561 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9562 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9563 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9564 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9565 cost_vec)
9566 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9567 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9568 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9569 cost_vec)
9570 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9571 cost_vec));
b1af7da6 9572 }
8644a673
IR
9573
9574 if (!ok)
ebfd146a 9575 {
73fbfcad 9576 if (dump_enabled_p ())
8644a673 9577 {
78c60e3d
SS
9578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9579 "not vectorized: relevant stmt not ");
9580 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9582 }
b8698a0f 9583
ebfd146a
IR
9584 return false;
9585 }
9586
8644a673
IR
9587 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9588 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9589 if (!bb_vinfo
9590 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9591 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9592 {
73fbfcad 9593 if (dump_enabled_p ())
8644a673 9594 {
78c60e3d 9595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9596 "not vectorized: live stmt not supported: ");
78c60e3d 9597 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9598 }
b8698a0f 9599
8644a673 9600 return false;
ebfd146a
IR
9601 }
9602
ebfd146a
IR
9603 return true;
9604}
9605
9606
9607/* Function vect_transform_stmt.
9608
9609 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9610
9611bool
355fe088 9612vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9613 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9614 slp_instance slp_node_instance)
9615{
9616 bool is_store = false;
355fe088 9617 gimple *vec_stmt = NULL;
ebfd146a 9618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9619 bool done;
ebfd146a 9620
fce57248 9621 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9622 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9623
e57d9a82
RB
9624 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9625 && nested_in_vect_loop_p
9626 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9627 stmt));
9628
ebfd146a
IR
9629 switch (STMT_VINFO_TYPE (stmt_info))
9630 {
9631 case type_demotion_vec_info_type:
ebfd146a 9632 case type_promotion_vec_info_type:
ebfd146a 9633 case type_conversion_vec_info_type:
68435eb2 9634 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9635 gcc_assert (done);
9636 break;
9637
9638 case induc_vec_info_type:
68435eb2 9639 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9640 gcc_assert (done);
9641 break;
9642
9dc3f7de 9643 case shift_vec_info_type:
68435eb2 9644 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9645 gcc_assert (done);
9646 break;
9647
ebfd146a 9648 case op_vec_info_type:
68435eb2 9649 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9650 gcc_assert (done);
9651 break;
9652
9653 case assignment_vec_info_type:
68435eb2 9654 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9655 gcc_assert (done);
9656 break;
9657
9658 case load_vec_info_type:
b8698a0f 9659 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9660 slp_node_instance, NULL);
ebfd146a
IR
9661 gcc_assert (done);
9662 break;
9663
9664 case store_vec_info_type:
68435eb2 9665 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9666 gcc_assert (done);
0d0293ac 9667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9668 {
9669 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9670 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9671 one are skipped, and there vec_stmt_info shouldn't be freed
9672 meanwhile. */
0d0293ac 9673 *grouped_store = true;
f307441a 9674 stmt_vec_info group_info
2c53b149
RB
9675 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9676 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9677 is_store = true;
f307441a 9678 }
ebfd146a
IR
9679 else
9680 is_store = true;
9681 break;
9682
9683 case condition_vec_info_type:
68435eb2 9684 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9685 gcc_assert (done);
9686 break;
9687
42fd8198 9688 case comparison_vec_info_type:
68435eb2 9689 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9690 gcc_assert (done);
9691 break;
9692
ebfd146a 9693 case call_vec_info_type:
68435eb2 9694 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9695 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9696 break;
9697
0136f8f0 9698 case call_simd_clone_vec_info_type:
68435eb2 9699 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9700 stmt = gsi_stmt (*gsi);
9701 break;
9702
ebfd146a 9703 case reduc_vec_info_type:
891ad31c 9704 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9705 slp_node_instance, NULL);
ebfd146a
IR
9706 gcc_assert (done);
9707 break;
9708
9709 default:
9710 if (!STMT_VINFO_LIVE_P (stmt_info))
9711 {
73fbfcad 9712 if (dump_enabled_p ())
78c60e3d 9713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9714 "stmt not supported.\n");
ebfd146a
IR
9715 gcc_unreachable ();
9716 }
9717 }
9718
225ce44b
RB
9719 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9720 This would break hybrid SLP vectorization. */
9721 if (slp_node)
d90f8440
RB
9722 gcc_assert (!vec_stmt
9723 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9724
ebfd146a
IR
9725 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9726 is being vectorized, but outside the immediately enclosing loop. */
9727 if (vec_stmt
e57d9a82 9728 && nested_p
ebfd146a
IR
9729 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9730 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9731 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9732 vect_used_in_outer_by_reduction))
ebfd146a 9733 {
a70d6342
IR
9734 struct loop *innerloop = LOOP_VINFO_LOOP (
9735 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9736 imm_use_iterator imm_iter;
9737 use_operand_p use_p;
9738 tree scalar_dest;
355fe088 9739 gimple *exit_phi;
ebfd146a 9740
73fbfcad 9741 if (dump_enabled_p ())
78c60e3d 9742 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9743 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9744
9745 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9746 (to be used when vectorizing outer-loop stmts that use the DEF of
9747 STMT). */
9748 if (gimple_code (stmt) == GIMPLE_PHI)
9749 scalar_dest = PHI_RESULT (stmt);
9750 else
9751 scalar_dest = gimple_assign_lhs (stmt);
9752
9753 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9754 {
9755 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9756 {
9757 exit_phi = USE_STMT (use_p);
9758 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9759 }
9760 }
9761 }
9762
9763 /* Handle stmts whose DEF is used outside the loop-nest that is
9764 being vectorized. */
68a0f2ff 9765 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9766 {
68435eb2 9767 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9768 gcc_assert (done);
9769 }
9770
9771 if (vec_stmt)
83197f37 9772 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9773
b8698a0f 9774 return is_store;
ebfd146a
IR
9775}
9776
9777
b8698a0f 9778/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9779 stmt_vec_info. */
9780
9781void
355fe088 9782vect_remove_stores (gimple *first_stmt)
ebfd146a 9783{
355fe088
TS
9784 gimple *next = first_stmt;
9785 gimple *tmp;
ebfd146a
IR
9786 gimple_stmt_iterator next_si;
9787
9788 while (next)
9789 {
78048b1c
JJ
9790 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9791
2c53b149 9792 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9793 if (is_pattern_stmt_p (stmt_info))
9794 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9795 /* Free the attached stmt_vec_info and remove the stmt. */
9796 next_si = gsi_for_stmt (next);
3d3f2249 9797 unlink_stmt_vdef (next);
ebfd146a 9798 gsi_remove (&next_si, true);
3d3f2249 9799 release_defs (next);
ebfd146a
IR
9800 free_stmt_vec_info (next);
9801 next = tmp;
9802 }
9803}
9804
9805
9806/* Function new_stmt_vec_info.
9807
9808 Create and initialize a new stmt_vec_info struct for STMT. */
9809
9810stmt_vec_info
310213d4 9811new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9812{
9813 stmt_vec_info res;
9814 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9815
9816 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9817 STMT_VINFO_STMT (res) = stmt;
310213d4 9818 res->vinfo = vinfo;
8644a673 9819 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9820 STMT_VINFO_LIVE_P (res) = false;
9821 STMT_VINFO_VECTYPE (res) = NULL;
9822 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9823 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9824 STMT_VINFO_IN_PATTERN_P (res) = false;
9825 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9826 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9827 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9828 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9829 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9830
ebfd146a
IR
9831 if (gimple_code (stmt) == GIMPLE_PHI
9832 && is_loop_header_bb_p (gimple_bb (stmt)))
9833 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9834 else
8644a673
IR
9835 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9836
9771b263 9837 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9838 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9839 STMT_VINFO_NUM_SLP_USES (res) = 0;
9840
2c53b149
RB
9841 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9842 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9843 res->size = 0; /* GROUP_SIZE */
9844 res->store_count = 0; /* GROUP_STORE_COUNT */
9845 res->gap = 0; /* GROUP_GAP */
9846 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a
IR
9847
9848 return res;
9849}
9850
9851
f8c0baaf 9852/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9853
9854void
f8c0baaf 9855set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9856{
f8c0baaf 9857 stmt_vec_info_vec = v;
ebfd146a
IR
9858}
9859
f8c0baaf 9860/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9861
9862void
f8c0baaf 9863free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9864{
93675444 9865 unsigned int i;
3161455c 9866 stmt_vec_info info;
f8c0baaf 9867 FOR_EACH_VEC_ELT (*v, i, info)
93675444 9868 if (info != NULL)
3161455c 9869 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9870 if (v == stmt_vec_info_vec)
9871 stmt_vec_info_vec = NULL;
9872 v->release ();
ebfd146a
IR
9873}
9874
9875
9876/* Free stmt vectorization related info. */
9877
9878void
355fe088 9879free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9880{
9881 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9882
9883 if (!stmt_info)
9884 return;
9885
78048b1c
JJ
9886 /* Check if this statement has a related "pattern stmt"
9887 (introduced by the vectorizer during the pattern recognition
9888 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9889 too. */
9890 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9891 {
9892 stmt_vec_info patt_info
9893 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9894 if (patt_info)
9895 {
363477c0 9896 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9897 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9898 gimple_set_bb (patt_stmt, NULL);
9899 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9900 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9901 release_ssa_name (lhs);
363477c0
JJ
9902 if (seq)
9903 {
9904 gimple_stmt_iterator si;
9905 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9906 {
355fe088 9907 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9908 gimple_set_bb (seq_stmt, NULL);
7532abf2 9909 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9910 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9911 release_ssa_name (lhs);
9912 free_stmt_vec_info (seq_stmt);
9913 }
363477c0 9914 }
f0281fde 9915 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9916 }
9917 }
9918
9771b263 9919 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9920 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9921 set_vinfo_for_stmt (stmt, NULL);
9922 free (stmt_info);
9923}
9924
9925
bb67d9c7 9926/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9927
bb67d9c7 9928 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9929 by the target. */
9930
c803b2a9 9931tree
86e36728 9932get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9933{
c7d97b28 9934 tree orig_scalar_type = scalar_type;
3bd8f481 9935 scalar_mode inner_mode;
ef4bddc2 9936 machine_mode simd_mode;
86e36728 9937 poly_uint64 nunits;
ebfd146a
IR
9938 tree vectype;
9939
3bd8f481
RS
9940 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9941 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9942 return NULL_TREE;
9943
3bd8f481 9944 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9945
7b7b1813
RG
9946 /* For vector types of elements whose mode precision doesn't
9947 match their types precision we use a element type of mode
9948 precision. The vectorization routines will have to make sure
48f2e373
RB
9949 they support the proper result truncation/extension.
9950 We also make sure to build vector types with INTEGER_TYPE
9951 component type only. */
6d7971b8 9952 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9953 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9954 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9955 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9956 TYPE_UNSIGNED (scalar_type));
6d7971b8 9957
ccbf5bb4
RG
9958 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9959 When the component mode passes the above test simply use a type
9960 corresponding to that mode. The theory is that any use that
9961 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9962 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9963 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9964 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9965
9966 /* We can't build a vector type of elements with alignment bigger than
9967 their size. */
dfc2e2ac 9968 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9969 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9970 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9971
dfc2e2ac
RB
9972 /* If we felt back to using the mode fail if there was
9973 no scalar type for it. */
9974 if (scalar_type == NULL_TREE)
9975 return NULL_TREE;
9976
bb67d9c7
RG
9977 /* If no size was supplied use the mode the target prefers. Otherwise
9978 lookup a vector mode of the specified size. */
86e36728 9979 if (known_eq (size, 0U))
bb67d9c7 9980 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9981 else if (!multiple_p (size, nbytes, &nunits)
9982 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9983 return NULL_TREE;
4c8fd8ac 9984 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9985 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9986 return NULL_TREE;
ebfd146a
IR
9987
9988 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9989
9990 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9991 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9992 return NULL_TREE;
ebfd146a 9993
c7d97b28
RB
9994 /* Re-attach the address-space qualifier if we canonicalized the scalar
9995 type. */
9996 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9997 return build_qualified_type
9998 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9999
ebfd146a
IR
10000 return vectype;
10001}
10002
86e36728 10003poly_uint64 current_vector_size;
bb67d9c7
RG
10004
10005/* Function get_vectype_for_scalar_type.
10006
10007 Returns the vector type corresponding to SCALAR_TYPE as supported
10008 by the target. */
10009
10010tree
10011get_vectype_for_scalar_type (tree scalar_type)
10012{
10013 tree vectype;
10014 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10015 current_vector_size);
10016 if (vectype
86e36728 10017 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
10018 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10019 return vectype;
10020}
10021
42fd8198
IE
10022/* Function get_mask_type_for_scalar_type.
10023
10024 Returns the mask type corresponding to a result of comparison
10025 of vectors of specified SCALAR_TYPE as supported by target. */
10026
10027tree
10028get_mask_type_for_scalar_type (tree scalar_type)
10029{
10030 tree vectype = get_vectype_for_scalar_type (scalar_type);
10031
10032 if (!vectype)
10033 return NULL;
10034
10035 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10036 current_vector_size);
10037}
10038
b690cc0f
RG
10039/* Function get_same_sized_vectype
10040
10041 Returns a vector type corresponding to SCALAR_TYPE of size
10042 VECTOR_TYPE if supported by the target. */
10043
10044tree
bb67d9c7 10045get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10046{
2568d8a1 10047 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10048 return build_same_sized_truth_vector_type (vector_type);
10049
bb67d9c7
RG
10050 return get_vectype_for_scalar_type_and_size
10051 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10052}
10053
ebfd146a
IR
10054/* Function vect_is_simple_use.
10055
10056 Input:
81c40241
RB
10057 VINFO - the vect info of the loop or basic block that is being vectorized.
10058 OPERAND - operand in the loop or bb.
10059 Output:
10060 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
10061 DT - the type of definition
ebfd146a
IR
10062
10063 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10064 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10065 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10066 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10067 is the case in reduction/induction computations).
10068 For basic blocks, supportable operands are constants and bb invariants.
10069 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10070
10071bool
81c40241
RB
10072vect_is_simple_use (tree operand, vec_info *vinfo,
10073 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 10074{
ebfd146a 10075 *def_stmt = NULL;
3fc356dc 10076 *dt = vect_unknown_def_type;
b8698a0f 10077
73fbfcad 10078 if (dump_enabled_p ())
ebfd146a 10079 {
78c60e3d
SS
10080 dump_printf_loc (MSG_NOTE, vect_location,
10081 "vect_is_simple_use: operand ");
10082 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 10083 dump_printf (MSG_NOTE, "\n");
ebfd146a 10084 }
b8698a0f 10085
b758f602 10086 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
10087 {
10088 *dt = vect_constant_def;
10089 return true;
10090 }
b8698a0f 10091
ebfd146a
IR
10092 if (is_gimple_min_invariant (operand))
10093 {
8644a673 10094 *dt = vect_external_def;
ebfd146a
IR
10095 return true;
10096 }
10097
ebfd146a
IR
10098 if (TREE_CODE (operand) != SSA_NAME)
10099 {
73fbfcad 10100 if (dump_enabled_p ())
af29617a
AH
10101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10102 "not ssa-name.\n");
ebfd146a
IR
10103 return false;
10104 }
b8698a0f 10105
3fc356dc 10106 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 10107 {
3fc356dc
RB
10108 *dt = vect_external_def;
10109 return true;
ebfd146a
IR
10110 }
10111
3fc356dc 10112 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 10113 if (dump_enabled_p ())
ebfd146a 10114 {
78c60e3d
SS
10115 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
10116 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
10117 }
10118
61d371eb 10119 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 10120 *dt = vect_external_def;
ebfd146a
IR
10121 else
10122 {
3fc356dc 10123 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 10124 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
10125 }
10126
2e8ab70c
RB
10127 if (dump_enabled_p ())
10128 {
10129 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10130 switch (*dt)
10131 {
10132 case vect_uninitialized_def:
10133 dump_printf (MSG_NOTE, "uninitialized\n");
10134 break;
10135 case vect_constant_def:
10136 dump_printf (MSG_NOTE, "constant\n");
10137 break;
10138 case vect_external_def:
10139 dump_printf (MSG_NOTE, "external\n");
10140 break;
10141 case vect_internal_def:
10142 dump_printf (MSG_NOTE, "internal\n");
10143 break;
10144 case vect_induction_def:
10145 dump_printf (MSG_NOTE, "induction\n");
10146 break;
10147 case vect_reduction_def:
10148 dump_printf (MSG_NOTE, "reduction\n");
10149 break;
10150 case vect_double_reduction_def:
10151 dump_printf (MSG_NOTE, "double reduction\n");
10152 break;
10153 case vect_nested_cycle:
10154 dump_printf (MSG_NOTE, "nested cycle\n");
10155 break;
10156 case vect_unknown_def_type:
10157 dump_printf (MSG_NOTE, "unknown\n");
10158 break;
10159 }
10160 }
10161
81c40241 10162 if (*dt == vect_unknown_def_type)
ebfd146a 10163 {
73fbfcad 10164 if (dump_enabled_p ())
78c60e3d 10165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10166 "Unsupported pattern.\n");
ebfd146a
IR
10167 return false;
10168 }
10169
ebfd146a
IR
10170 switch (gimple_code (*def_stmt))
10171 {
10172 case GIMPLE_PHI:
ebfd146a 10173 case GIMPLE_ASSIGN:
ebfd146a 10174 case GIMPLE_CALL:
81c40241 10175 break;
ebfd146a 10176 default:
73fbfcad 10177 if (dump_enabled_p ())
78c60e3d 10178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10179 "unsupported defining stmt:\n");
ebfd146a
IR
10180 return false;
10181 }
10182
10183 return true;
10184}
10185
81c40241 10186/* Function vect_is_simple_use.
b690cc0f 10187
81c40241 10188 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10189 type of OPERAND and stores it to *VECTYPE. If the definition of
10190 OPERAND is vect_uninitialized_def, vect_constant_def or
10191 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10192 is responsible to compute the best suited vector type for the
10193 scalar operand. */
10194
10195bool
81c40241
RB
10196vect_is_simple_use (tree operand, vec_info *vinfo,
10197 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 10198{
81c40241 10199 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
10200 return false;
10201
10202 /* Now get a vector type if the def is internal, otherwise supply
10203 NULL_TREE and leave it up to the caller to figure out a proper
10204 type for the use stmt. */
10205 if (*dt == vect_internal_def
10206 || *dt == vect_induction_def
10207 || *dt == vect_reduction_def
10208 || *dt == vect_double_reduction_def
10209 || *dt == vect_nested_cycle)
10210 {
10211 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
10212
10213 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10214 && !STMT_VINFO_RELEVANT (stmt_info)
10215 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 10216 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 10217
b690cc0f
RG
10218 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10219 gcc_assert (*vectype != NULL_TREE);
10220 }
10221 else if (*dt == vect_uninitialized_def
10222 || *dt == vect_constant_def
10223 || *dt == vect_external_def)
10224 *vectype = NULL_TREE;
10225 else
10226 gcc_unreachable ();
10227
10228 return true;
10229}
10230
ebfd146a
IR
10231
10232/* Function supportable_widening_operation
10233
b8698a0f
L
10234 Check whether an operation represented by the code CODE is a
10235 widening operation that is supported by the target platform in
b690cc0f
RG
10236 vector form (i.e., when operating on arguments of type VECTYPE_IN
10237 producing a result of type VECTYPE_OUT).
b8698a0f 10238
1bda738b
JJ
10239 Widening operations we currently support are NOP (CONVERT), FLOAT,
10240 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10241 are supported by the target platform either directly (via vector
10242 tree-codes), or via target builtins.
ebfd146a
IR
10243
10244 Output:
b8698a0f
L
10245 - CODE1 and CODE2 are codes of vector operations to be used when
10246 vectorizing the operation, if available.
ebfd146a
IR
10247 - MULTI_STEP_CVT determines the number of required intermediate steps in
10248 case of multi-step conversion (like char->short->int - in that case
10249 MULTI_STEP_CVT will be 1).
b8698a0f
L
10250 - INTERM_TYPES contains the intermediate type required to perform the
10251 widening operation (short in the above example). */
ebfd146a
IR
10252
10253bool
355fe088 10254supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10255 tree vectype_out, tree vectype_in,
ebfd146a
IR
10256 enum tree_code *code1, enum tree_code *code2,
10257 int *multi_step_cvt,
9771b263 10258 vec<tree> *interm_types)
ebfd146a
IR
10259{
10260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10261 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10262 struct loop *vect_loop = NULL;
ef4bddc2 10263 machine_mode vec_mode;
81f40b79 10264 enum insn_code icode1, icode2;
ebfd146a 10265 optab optab1, optab2;
b690cc0f
RG
10266 tree vectype = vectype_in;
10267 tree wide_vectype = vectype_out;
ebfd146a 10268 enum tree_code c1, c2;
4a00c761
JJ
10269 int i;
10270 tree prev_type, intermediate_type;
ef4bddc2 10271 machine_mode intermediate_mode, prev_mode;
4a00c761 10272 optab optab3, optab4;
ebfd146a 10273
4a00c761 10274 *multi_step_cvt = 0;
4ef69dfc
IR
10275 if (loop_info)
10276 vect_loop = LOOP_VINFO_LOOP (loop_info);
10277
ebfd146a
IR
10278 switch (code)
10279 {
10280 case WIDEN_MULT_EXPR:
6ae6116f
RH
10281 /* The result of a vectorized widening operation usually requires
10282 two vectors (because the widened results do not fit into one vector).
10283 The generated vector results would normally be expected to be
10284 generated in the same order as in the original scalar computation,
10285 i.e. if 8 results are generated in each vector iteration, they are
10286 to be organized as follows:
10287 vect1: [res1,res2,res3,res4],
10288 vect2: [res5,res6,res7,res8].
10289
10290 However, in the special case that the result of the widening
10291 operation is used in a reduction computation only, the order doesn't
10292 matter (because when vectorizing a reduction we change the order of
10293 the computation). Some targets can take advantage of this and
10294 generate more efficient code. For example, targets like Altivec,
10295 that support widen_mult using a sequence of {mult_even,mult_odd}
10296 generate the following vectors:
10297 vect1: [res1,res3,res5,res7],
10298 vect2: [res2,res4,res6,res8].
10299
10300 When vectorizing outer-loops, we execute the inner-loop sequentially
10301 (each vectorized inner-loop iteration contributes to VF outer-loop
10302 iterations in parallel). We therefore don't allow to change the
10303 order of the computation in the inner-loop during outer-loop
10304 vectorization. */
10305 /* TODO: Another case in which order doesn't *really* matter is when we
10306 widen and then contract again, e.g. (short)((int)x * y >> 8).
10307 Normally, pack_trunc performs an even/odd permute, whereas the
10308 repack from an even/odd expansion would be an interleave, which
10309 would be significantly simpler for e.g. AVX2. */
10310 /* In any case, in order to avoid duplicating the code below, recurse
10311 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10312 are properly set up for the caller. If we fail, we'll continue with
10313 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10314 if (vect_loop
10315 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10316 && !nested_in_vect_loop_p (vect_loop, stmt)
10317 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10318 stmt, vectype_out, vectype_in,
a86ec597
RH
10319 code1, code2, multi_step_cvt,
10320 interm_types))
ebc047a2
CH
10321 {
10322 /* Elements in a vector with vect_used_by_reduction property cannot
10323 be reordered if the use chain with this property does not have the
10324 same operation. One such an example is s += a * b, where elements
10325 in a and b cannot be reordered. Here we check if the vector defined
10326 by STMT is only directly used in the reduction statement. */
10327 tree lhs = gimple_assign_lhs (stmt);
10328 use_operand_p dummy;
355fe088 10329 gimple *use_stmt;
ebc047a2
CH
10330 stmt_vec_info use_stmt_info = NULL;
10331 if (single_imm_use (lhs, &dummy, &use_stmt)
10332 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10333 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10334 return true;
10335 }
4a00c761
JJ
10336 c1 = VEC_WIDEN_MULT_LO_EXPR;
10337 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10338 break;
10339
81c40241
RB
10340 case DOT_PROD_EXPR:
10341 c1 = DOT_PROD_EXPR;
10342 c2 = DOT_PROD_EXPR;
10343 break;
10344
10345 case SAD_EXPR:
10346 c1 = SAD_EXPR;
10347 c2 = SAD_EXPR;
10348 break;
10349
6ae6116f
RH
10350 case VEC_WIDEN_MULT_EVEN_EXPR:
10351 /* Support the recursion induced just above. */
10352 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10353 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10354 break;
10355
36ba4aae 10356 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10357 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10358 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10359 break;
10360
ebfd146a 10361 CASE_CONVERT:
4a00c761
JJ
10362 c1 = VEC_UNPACK_LO_EXPR;
10363 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10364 break;
10365
10366 case FLOAT_EXPR:
4a00c761
JJ
10367 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10368 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10369 break;
10370
10371 case FIX_TRUNC_EXPR:
1bda738b
JJ
10372 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10373 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10374 break;
ebfd146a
IR
10375
10376 default:
10377 gcc_unreachable ();
10378 }
10379
6ae6116f 10380 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10381 std::swap (c1, c2);
4a00c761 10382
ebfd146a
IR
10383 if (code == FIX_TRUNC_EXPR)
10384 {
10385 /* The signedness is determined from output operand. */
b690cc0f
RG
10386 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10387 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10388 }
10389 else
10390 {
10391 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10392 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10393 }
10394
10395 if (!optab1 || !optab2)
10396 return false;
10397
10398 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10399 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10400 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10401 return false;
10402
4a00c761
JJ
10403 *code1 = c1;
10404 *code2 = c2;
10405
10406 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10407 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10408 /* For scalar masks we may have different boolean
10409 vector types having the same QImode. Thus we
10410 add additional check for elements number. */
10411 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10412 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10413 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10414
b8698a0f 10415 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10416 types. */
ebfd146a 10417
4a00c761
JJ
10418 prev_type = vectype;
10419 prev_mode = vec_mode;
b8698a0f 10420
4a00c761
JJ
10421 if (!CONVERT_EXPR_CODE_P (code))
10422 return false;
b8698a0f 10423
4a00c761
JJ
10424 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10425 intermediate steps in promotion sequence. We try
10426 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10427 not. */
9771b263 10428 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10429 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10430 {
10431 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10432 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10433 {
7cfb4d93 10434 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10435 if (intermediate_mode != TYPE_MODE (intermediate_type))
10436 return false;
10437 }
10438 else
10439 intermediate_type
10440 = lang_hooks.types.type_for_mode (intermediate_mode,
10441 TYPE_UNSIGNED (prev_type));
10442
4a00c761
JJ
10443 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10444 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10445
10446 if (!optab3 || !optab4
10447 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10448 || insn_data[icode1].operand[0].mode != intermediate_mode
10449 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10450 || insn_data[icode2].operand[0].mode != intermediate_mode
10451 || ((icode1 = optab_handler (optab3, intermediate_mode))
10452 == CODE_FOR_nothing)
10453 || ((icode2 = optab_handler (optab4, intermediate_mode))
10454 == CODE_FOR_nothing))
10455 break;
ebfd146a 10456
9771b263 10457 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10458 (*multi_step_cvt)++;
10459
10460 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10461 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10462 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10463 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10464 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10465
10466 prev_type = intermediate_type;
10467 prev_mode = intermediate_mode;
ebfd146a
IR
10468 }
10469
9771b263 10470 interm_types->release ();
4a00c761 10471 return false;
ebfd146a
IR
10472}
10473
10474
10475/* Function supportable_narrowing_operation
10476
b8698a0f
L
10477 Check whether an operation represented by the code CODE is a
10478 narrowing operation that is supported by the target platform in
b690cc0f
RG
10479 vector form (i.e., when operating on arguments of type VECTYPE_IN
10480 and producing a result of type VECTYPE_OUT).
b8698a0f 10481
1bda738b
JJ
10482 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10483 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10484 the target platform directly via vector tree-codes.
10485
10486 Output:
b8698a0f
L
10487 - CODE1 is the code of a vector operation to be used when
10488 vectorizing the operation, if available.
ebfd146a
IR
10489 - MULTI_STEP_CVT determines the number of required intermediate steps in
10490 case of multi-step conversion (like int->short->char - in that case
10491 MULTI_STEP_CVT will be 1).
10492 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10493 narrowing operation (short in the above example). */
ebfd146a
IR
10494
10495bool
10496supportable_narrowing_operation (enum tree_code code,
b690cc0f 10497 tree vectype_out, tree vectype_in,
ebfd146a 10498 enum tree_code *code1, int *multi_step_cvt,
9771b263 10499 vec<tree> *interm_types)
ebfd146a 10500{
ef4bddc2 10501 machine_mode vec_mode;
ebfd146a
IR
10502 enum insn_code icode1;
10503 optab optab1, interm_optab;
b690cc0f
RG
10504 tree vectype = vectype_in;
10505 tree narrow_vectype = vectype_out;
ebfd146a 10506 enum tree_code c1;
3ae0661a 10507 tree intermediate_type, prev_type;
ef4bddc2 10508 machine_mode intermediate_mode, prev_mode;
ebfd146a 10509 int i;
4a00c761 10510 bool uns;
ebfd146a 10511
4a00c761 10512 *multi_step_cvt = 0;
ebfd146a
IR
10513 switch (code)
10514 {
10515 CASE_CONVERT:
10516 c1 = VEC_PACK_TRUNC_EXPR;
10517 break;
10518
10519 case FIX_TRUNC_EXPR:
10520 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10521 break;
10522
10523 case FLOAT_EXPR:
1bda738b
JJ
10524 c1 = VEC_PACK_FLOAT_EXPR;
10525 break;
ebfd146a
IR
10526
10527 default:
10528 gcc_unreachable ();
10529 }
10530
10531 if (code == FIX_TRUNC_EXPR)
10532 /* The signedness is determined from output operand. */
b690cc0f 10533 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10534 else
10535 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10536
10537 if (!optab1)
10538 return false;
10539
10540 vec_mode = TYPE_MODE (vectype);
947131ba 10541 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10542 return false;
10543
4a00c761
JJ
10544 *code1 = c1;
10545
10546 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10547 /* For scalar masks we may have different boolean
10548 vector types having the same QImode. Thus we
10549 add additional check for elements number. */
10550 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10551 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10552 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10553
1bda738b
JJ
10554 if (code == FLOAT_EXPR)
10555 return false;
10556
ebfd146a
IR
10557 /* Check if it's a multi-step conversion that can be done using intermediate
10558 types. */
4a00c761 10559 prev_mode = vec_mode;
3ae0661a 10560 prev_type = vectype;
4a00c761
JJ
10561 if (code == FIX_TRUNC_EXPR)
10562 uns = TYPE_UNSIGNED (vectype_out);
10563 else
10564 uns = TYPE_UNSIGNED (vectype);
10565
10566 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10567 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10568 costly than signed. */
10569 if (code == FIX_TRUNC_EXPR && uns)
10570 {
10571 enum insn_code icode2;
10572
10573 intermediate_type
10574 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10575 interm_optab
10576 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10577 if (interm_optab != unknown_optab
4a00c761
JJ
10578 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10579 && insn_data[icode1].operand[0].mode
10580 == insn_data[icode2].operand[0].mode)
10581 {
10582 uns = false;
10583 optab1 = interm_optab;
10584 icode1 = icode2;
10585 }
10586 }
ebfd146a 10587
4a00c761
JJ
10588 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10589 intermediate steps in promotion sequence. We try
10590 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10591 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10592 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10593 {
10594 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10595 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10596 {
7cfb4d93 10597 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10598 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10599 return false;
3ae0661a
IE
10600 }
10601 else
10602 intermediate_type
10603 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10604 interm_optab
10605 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10606 optab_default);
10607 if (!interm_optab
10608 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10609 || insn_data[icode1].operand[0].mode != intermediate_mode
10610 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10611 == CODE_FOR_nothing))
10612 break;
10613
9771b263 10614 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10615 (*multi_step_cvt)++;
10616
10617 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10618 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10619 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10620 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10621
10622 prev_mode = intermediate_mode;
3ae0661a 10623 prev_type = intermediate_type;
4a00c761 10624 optab1 = interm_optab;
ebfd146a
IR
10625 }
10626
9771b263 10627 interm_types->release ();
4a00c761 10628 return false;
ebfd146a 10629}
7cfb4d93
RS
10630
10631/* Generate and return a statement that sets vector mask MASK such that
10632 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10633
10634gcall *
10635vect_gen_while (tree mask, tree start_index, tree end_index)
10636{
10637 tree cmp_type = TREE_TYPE (start_index);
10638 tree mask_type = TREE_TYPE (mask);
10639 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10640 cmp_type, mask_type,
10641 OPTIMIZE_FOR_SPEED));
10642 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10643 start_index, end_index,
10644 build_zero_cst (mask_type));
10645 gimple_call_set_lhs (call, mask);
10646 return call;
10647}
535e7c11
RS
10648
10649/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10650 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10651
10652tree
10653vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10654 tree end_index)
10655{
10656 tree tmp = make_ssa_name (mask_type);
10657 gcall *call = vect_gen_while (tmp, start_index, end_index);
10658 gimple_seq_add_stmt (seq, call);
10659 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10660}
1f3cb663
RS
10661
10662/* Try to compute the vector types required to vectorize STMT_INFO,
10663 returning true on success and false if vectorization isn't possible.
10664
10665 On success:
10666
10667 - Set *STMT_VECTYPE_OUT to:
10668 - NULL_TREE if the statement doesn't need to be vectorized;
10669 - boolean_type_node if the statement is a boolean operation whose
10670 vector type can only be determined once all the other vector types
10671 are known; and
10672 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10673
10674 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10675 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10676 statement does not help to determine the overall number of units. */
10677
10678bool
10679vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10680 tree *stmt_vectype_out,
10681 tree *nunits_vectype_out)
10682{
10683 gimple *stmt = stmt_info->stmt;
10684
10685 *stmt_vectype_out = NULL_TREE;
10686 *nunits_vectype_out = NULL_TREE;
10687
10688 if (gimple_get_lhs (stmt) == NULL_TREE
10689 /* MASK_STORE has no lhs, but is ok. */
10690 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10691 {
10692 if (is_a <gcall *> (stmt))
10693 {
10694 /* Ignore calls with no lhs. These must be calls to
10695 #pragma omp simd functions, and what vectorization factor
10696 it really needs can't be determined until
10697 vectorizable_simd_clone_call. */
10698 if (dump_enabled_p ())
10699 dump_printf_loc (MSG_NOTE, vect_location,
10700 "defer to SIMD clone analysis.\n");
10701 return true;
10702 }
10703
10704 if (dump_enabled_p ())
10705 {
10706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10707 "not vectorized: irregular stmt.");
10708 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10709 }
10710 return false;
10711 }
10712
10713 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10714 {
10715 if (dump_enabled_p ())
10716 {
10717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10718 "not vectorized: vector stmt in loop:");
10719 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10720 }
10721 return false;
10722 }
10723
10724 tree vectype;
10725 tree scalar_type = NULL_TREE;
10726 if (STMT_VINFO_VECTYPE (stmt_info))
10727 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10728 else
10729 {
10730 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10731 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10732 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10733 else
10734 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10735
10736 /* Pure bool ops don't participate in number-of-units computation.
10737 For comparisons use the types being compared. */
10738 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10739 && is_gimple_assign (stmt)
10740 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10741 {
10742 *stmt_vectype_out = boolean_type_node;
10743
10744 tree rhs1 = gimple_assign_rhs1 (stmt);
10745 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10746 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10747 scalar_type = TREE_TYPE (rhs1);
10748 else
10749 {
10750 if (dump_enabled_p ())
10751 dump_printf_loc (MSG_NOTE, vect_location,
10752 "pure bool operation.\n");
10753 return true;
10754 }
10755 }
10756
10757 if (dump_enabled_p ())
10758 {
10759 dump_printf_loc (MSG_NOTE, vect_location,
10760 "get vectype for scalar type: ");
10761 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10762 dump_printf (MSG_NOTE, "\n");
10763 }
10764 vectype = get_vectype_for_scalar_type (scalar_type);
10765 if (!vectype)
10766 {
10767 if (dump_enabled_p ())
10768 {
10769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10770 "not vectorized: unsupported data-type ");
10771 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10772 scalar_type);
10773 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10774 }
10775 return false;
10776 }
10777
10778 if (!*stmt_vectype_out)
10779 *stmt_vectype_out = vectype;
10780
10781 if (dump_enabled_p ())
10782 {
10783 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10784 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10785 dump_printf (MSG_NOTE, "\n");
10786 }
10787 }
10788
10789 /* Don't try to compute scalar types if the stmt produces a boolean
10790 vector; use the existing vector type instead. */
10791 tree nunits_vectype;
10792 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10793 nunits_vectype = vectype;
10794 else
10795 {
10796 /* The number of units is set according to the smallest scalar
10797 type (or the largest vector size, but we only support one
10798 vector size per vectorization). */
10799 if (*stmt_vectype_out != boolean_type_node)
10800 {
10801 HOST_WIDE_INT dummy;
10802 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10803 }
10804 if (dump_enabled_p ())
10805 {
10806 dump_printf_loc (MSG_NOTE, vect_location,
10807 "get vectype for scalar type: ");
10808 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10809 dump_printf (MSG_NOTE, "\n");
10810 }
10811 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10812 }
10813 if (!nunits_vectype)
10814 {
10815 if (dump_enabled_p ())
10816 {
10817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10818 "not vectorized: unsupported data-type ");
10819 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10820 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10821 }
10822 return false;
10823 }
10824
10825 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10826 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10827 {
10828 if (dump_enabled_p ())
10829 {
10830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10831 "not vectorized: different sized vector "
10832 "types in statement, ");
10833 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10834 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10835 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10836 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10837 }
10838 return false;
10839 }
10840
10841 if (dump_enabled_p ())
10842 {
10843 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10844 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10845 dump_printf (MSG_NOTE, "\n");
10846
10847 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10848 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10849 dump_printf (MSG_NOTE, "\n");
10850 }
10851
10852 *nunits_vectype_out = nunits_vectype;
10853 return true;
10854}
10855
10856/* Try to determine the correct vector type for STMT_INFO, which is a
10857 statement that produces a scalar boolean result. Return the vector
10858 type on success, otherwise return NULL_TREE. */
10859
10860tree
10861vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10862{
10863 gimple *stmt = stmt_info->stmt;
10864 tree mask_type = NULL;
10865 tree vectype, scalar_type;
10866
10867 if (is_gimple_assign (stmt)
10868 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10869 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10870 {
10871 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10872 mask_type = get_mask_type_for_scalar_type (scalar_type);
10873
10874 if (!mask_type)
10875 {
10876 if (dump_enabled_p ())
10877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10878 "not vectorized: unsupported mask\n");
10879 return NULL_TREE;
10880 }
10881 }
10882 else
10883 {
10884 tree rhs;
10885 ssa_op_iter iter;
10886 gimple *def_stmt;
10887 enum vect_def_type dt;
10888
10889 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10890 {
10891 if (!vect_is_simple_use (rhs, stmt_info->vinfo,
10892 &def_stmt, &dt, &vectype))
10893 {
10894 if (dump_enabled_p ())
10895 {
10896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10897 "not vectorized: can't compute mask type "
10898 "for statement, ");
10899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10900 0);
10901 }
10902 return NULL_TREE;
10903 }
10904
10905 /* No vectype probably means external definition.
10906 Allow it in case there is another operand which
10907 allows to determine mask type. */
10908 if (!vectype)
10909 continue;
10910
10911 if (!mask_type)
10912 mask_type = vectype;
10913 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10914 TYPE_VECTOR_SUBPARTS (vectype)))
10915 {
10916 if (dump_enabled_p ())
10917 {
10918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10919 "not vectorized: different sized masks "
10920 "types in statement, ");
10921 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10922 mask_type);
10923 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10924 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10925 vectype);
10926 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10927 }
10928 return NULL_TREE;
10929 }
10930 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10931 != VECTOR_BOOLEAN_TYPE_P (vectype))
10932 {
10933 if (dump_enabled_p ())
10934 {
10935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10936 "not vectorized: mixed mask and "
10937 "nonmask vector types in statement, ");
10938 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10939 mask_type);
10940 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10941 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10942 vectype);
10943 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10944 }
10945 return NULL_TREE;
10946 }
10947 }
10948
10949 /* We may compare boolean value loaded as vector of integers.
10950 Fix mask_type in such case. */
10951 if (mask_type
10952 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10953 && gimple_code (stmt) == GIMPLE_ASSIGN
10954 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10955 mask_type = build_same_sized_truth_vector_type (mask_type);
10956 }
10957
10958 /* No mask_type should mean loop invariant predicate.
10959 This is probably a subject for optimization in if-conversion. */
10960 if (!mask_type && dump_enabled_p ())
10961 {
10962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10963 "not vectorized: can't compute mask type "
10964 "for statement, ");
10965 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10966 }
10967 return mask_type;
10968}