]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Support SHF_EXCLUDE on non-x86 and with Solaris as
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
263 gimple *def_stmt;
264 ssa_op_iter iter;
265
266 if (!is_gimple_assign (stmt))
267 return false;
268
269 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 {
271 enum vect_def_type dt = vect_uninitialized_def;
272
273 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
274 {
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "use not simple.\n");
278 return false;
279 }
280
281 if (dt != vect_external_def && dt != vect_constant_def)
282 return false;
283 }
284 return true;
285}
286
ebfd146a
IR
287/* Function vect_stmt_relevant_p.
288
289 Return true if STMT in loop that is represented by LOOP_VINFO is
290 "relevant for vectorization".
291
292 A stmt is considered "relevant for vectorization" if:
293 - it has uses outside the loop.
294 - it has vdefs (it alters memory).
295 - control stmts in the loop (except for the exit condition).
296
297 CHECKME: what other side effects would the vectorizer allow? */
298
299static bool
355fe088 300vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
301 enum vect_relevant *relevant, bool *live_p)
302{
303 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
304 ssa_op_iter op_iter;
305 imm_use_iterator imm_iter;
306 use_operand_p use_p;
307 def_operand_p def_p;
308
8644a673 309 *relevant = vect_unused_in_scope;
ebfd146a
IR
310 *live_p = false;
311
312 /* cond stmt other than loop exit cond. */
b8698a0f
L
313 if (is_ctrl_stmt (stmt)
314 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
315 != loop_exit_ctrl_vec_info_type)
8644a673 316 *relevant = vect_used_in_scope;
ebfd146a
IR
317
318 /* changing memory. */
319 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
320 if (gimple_vdef (stmt)
321 && !gimple_clobber_p (stmt))
ebfd146a 322 {
73fbfcad 323 if (dump_enabled_p ())
78c60e3d 324 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 325 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 326 *relevant = vect_used_in_scope;
ebfd146a
IR
327 }
328
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
331 {
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
333 {
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
336 {
73fbfcad 337 if (dump_enabled_p ())
78c60e3d 338 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 339 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 340
3157b0c2
AO
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
343
ebfd146a
IR
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
348
349 *live_p = true;
350 }
351 }
352 }
353
3a2edf4c
AH
354 if (*live_p && *relevant == vect_unused_in_scope
355 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
356 {
357 if (dump_enabled_p ())
358 dump_printf_loc (MSG_NOTE, vect_location,
359 "vec_stmt_relevant_p: stmt live but not relevant.\n");
360 *relevant = vect_used_only_live;
361 }
362
ebfd146a
IR
363 return (*live_p || *relevant);
364}
365
366
b8698a0f 367/* Function exist_non_indexing_operands_for_use_p
ebfd146a 368
ff802fa1 369 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
370 used in STMT for anything other than indexing an array. */
371
372static bool
355fe088 373exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
374{
375 tree operand;
376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 377
ff802fa1 378 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
379 reference in STMT, then any operand that corresponds to USE
380 is not indexing an array. */
381 if (!STMT_VINFO_DATA_REF (stmt_info))
382 return true;
59a05b0c 383
ebfd146a
IR
384 /* STMT has a data_ref. FORNOW this means that its of one of
385 the following forms:
386 -1- ARRAY_REF = var
387 -2- var = ARRAY_REF
388 (This should have been verified in analyze_data_refs).
389
390 'var' in the second case corresponds to a def, not a use,
b8698a0f 391 so USE cannot correspond to any operands that are not used
ebfd146a
IR
392 for array indexing.
393
394 Therefore, all we need to check is if STMT falls into the
395 first case, and whether var corresponds to USE. */
ebfd146a
IR
396
397 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
398 {
399 if (is_gimple_call (stmt)
400 && gimple_call_internal_p (stmt))
bfaa08b7
RS
401 {
402 internal_fn ifn = gimple_call_internal_fn (stmt);
403 int mask_index = internal_fn_mask_index (ifn);
404 if (mask_index >= 0
405 && use == gimple_call_arg (stmt, mask_index))
406 return true;
f307441a
RS
407 int stored_value_index = internal_fn_stored_value_index (ifn);
408 if (stored_value_index >= 0
409 && use == gimple_call_arg (stmt, stored_value_index))
410 return true;
bfaa08b7
RS
411 if (internal_gather_scatter_fn_p (ifn)
412 && use == gimple_call_arg (stmt, 1))
413 return true;
bfaa08b7 414 }
5ce9450f
JJ
415 return false;
416 }
417
59a05b0c
EB
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
ebfd146a 420 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
423
424 if (operand == use)
425 return true;
426
427 return false;
428}
429
430
b8698a0f 431/*
ebfd146a
IR
432 Function process_use.
433
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 437 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
ebfd146a
IR
441
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 449 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 450 of the respective DEF_STMT is left unchanged.
b8698a0f
L
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
455
456 Return true if everything is as expected. Return false otherwise. */
457
458static bool
b28ead45 459process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 460 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 461 bool force)
ebfd146a
IR
462{
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
355fe088 467 gimple *def_stmt;
ebfd146a
IR
468 enum vect_def_type dt;
469
b8698a0f 470 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 471 that are used for address computation are not considered relevant. */
aec7ae7d 472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
473 return true;
474
81c40241 475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 476 {
73fbfcad 477 if (dump_enabled_p ())
78c60e3d 478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 479 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
480 return false;
481 }
482
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
485
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
488 {
73fbfcad 489 if (dump_enabled_p ())
e645e942 490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
491 return true;
492 }
493
b8698a0f
L
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
506 {
73fbfcad 507 if (dump_enabled_p ())
78c60e3d 508 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 509 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
515 return true;
516 }
517
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
526 {
73fbfcad 527 if (dump_enabled_p ())
78c60e3d 528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 529 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 530
ebfd146a
IR
531 switch (relevant)
532 {
8644a673 533 case vect_unused_in_scope:
7c5222ff
IR
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 536 break;
7c5222ff 537
ebfd146a 538 case vect_used_in_outer_by_reduction:
7c5222ff 539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
540 relevant = vect_used_by_reduction;
541 break;
7c5222ff 542
ebfd146a 543 case vect_used_in_outer:
7c5222ff 544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 545 relevant = vect_used_in_scope;
ebfd146a 546 break;
7c5222ff 547
8644a673 548 case vect_used_in_scope:
ebfd146a
IR
549 break;
550
551 default:
552 gcc_unreachable ();
b8698a0f 553 }
ebfd146a
IR
554 }
555
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
558 ...
559 inner-loop:
560 d = def_stmt
06066f92 561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
564 {
73fbfcad 565 if (dump_enabled_p ())
78c60e3d 566 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 567 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 568
ebfd146a
IR
569 switch (relevant)
570 {
8644a673 571 case vect_unused_in_scope:
b8698a0f 572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
575 break;
576
ebfd146a 577 case vect_used_by_reduction:
b28ead45 578 case vect_used_only_live:
ebfd146a
IR
579 relevant = vect_used_in_outer_by_reduction;
580 break;
581
8644a673 582 case vect_used_in_scope:
ebfd146a
IR
583 relevant = vect_used_in_outer;
584 break;
585
586 default:
587 gcc_unreachable ();
588 }
589 }
643a9684
RB
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
643a9684
RB
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
599 {
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
604 }
605
ebfd146a 606
b28ead45 607 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
608 return true;
609}
610
611
612/* Function vect_mark_stmts_to_be_vectorized.
613
614 Not all stmts in the loop need to be vectorized. For example:
615
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
620
621 3. j = j + 1
622
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
625
626 This pass detects such stmts. */
627
628bool
629vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630{
ebfd146a
IR
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
355fe088 635 gimple *stmt;
ebfd146a
IR
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
355fe088 639 gimple *phi;
ebfd146a 640 bool live_p;
b28ead45 641 enum vect_relevant relevant;
ebfd146a 642
73fbfcad 643 if (dump_enabled_p ())
78c60e3d 644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 646
355fe088 647 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 654 {
ebfd146a 655 phi = gsi_stmt (si);
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
660 }
661
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 663 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
664 }
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
666 {
667 stmt = gsi_stmt (si);
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 672 }
ebfd146a
IR
673
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
676 }
677 }
678
679 /* 2. Process_worklist */
9771b263 680 while (worklist.length () > 0)
ebfd146a
IR
681 {
682 use_operand_p use_p;
683 ssa_op_iter iter;
684
9771b263 685 stmt = worklist.pop ();
73fbfcad 686 if (dump_enabled_p ())
ebfd146a 687 {
78c60e3d
SS
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
690 }
691
b8698a0f 692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
ebfd146a
IR
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 697
b28ead45
AH
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
700
701 One exception is when STMT has been identified as defining a reduction
b28ead45 702 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 703 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 704 those that are used by a reduction computation, and those that are
ff802fa1 705 (also) used by a regular computation. This allows us later on to
b8698a0f 706 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 707 order of the results that they produce does not have to be kept. */
ebfd146a 708
b28ead45 709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 710 {
06066f92 711 case vect_reduction_def:
b28ead45
AH
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
06066f92 717 {
b28ead45
AH
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
06066f92 722 }
06066f92 723 break;
b8698a0f 724
06066f92 725 case vect_nested_cycle:
b28ead45
AH
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
06066f92 729 {
73fbfcad 730 if (dump_enabled_p ())
78c60e3d 731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 732 "unsupported use of nested cycle.\n");
7c5222ff 733
06066f92
IR
734 return false;
735 }
b8698a0f
L
736 break;
737
06066f92 738 case vect_double_reduction_def:
b28ead45
AH
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
06066f92 742 {
73fbfcad 743 if (dump_enabled_p ())
78c60e3d 744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 745 "unsupported use of double reduction.\n");
7c5222ff 746
7c5222ff 747 return false;
06066f92 748 }
b8698a0f 749 break;
7c5222ff 750
06066f92
IR
751 default:
752 break;
7c5222ff 753 }
b8698a0f 754
aec7ae7d 755 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
756 {
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
761 {
69d2aade
JJ
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
764
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
767 {
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 769 relevant, &worklist, false)
69d2aade 770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 771 relevant, &worklist, false))
566d377a 772 return false;
69d2aade
JJ
773 i = 2;
774 }
775 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 776 {
69d2aade 777 op = gimple_op (stmt, i);
afbe6325 778 if (TREE_CODE (op) == SSA_NAME
b28ead45 779 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 780 &worklist, false))
07687835 781 return false;
9d5e7640
IR
782 }
783 }
784 else if (is_gimple_call (stmt))
785 {
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
787 {
788 tree arg = gimple_call_arg (stmt, i);
b28ead45 789 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 790 &worklist, false))
07687835 791 return false;
9d5e7640
IR
792 }
793 }
794 }
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
797 {
798 tree op = USE_FROM_PTR (use_p);
b28ead45 799 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 800 &worklist, false))
07687835 801 return false;
9d5e7640 802 }
aec7ae7d 803
3bab6342 804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 805 {
134c85ca
RS
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
566d377a 811 return false;
aec7ae7d 812 }
ebfd146a
IR
813 } /* while worklist */
814
ebfd146a
IR
815 return true;
816}
817
68435eb2
RB
818/* Compute the prologue cost for invariant or constant operands. */
819
820static unsigned
821vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
822 unsigned opno, enum vect_def_type dt,
823 stmt_vector_for_cost *cost_vec)
824{
825 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
826 tree op = gimple_op (stmt, opno);
827 unsigned prologue_cost = 0;
828
829 /* Without looking at the actual initializer a vector of
830 constants can be implemented as load from the constant pool.
831 When all elements are the same we can use a splat. */
832 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
833 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
834 unsigned num_vects_to_check;
835 unsigned HOST_WIDE_INT const_nunits;
836 unsigned nelt_limit;
837 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
838 && ! multiple_p (const_nunits, group_size))
839 {
840 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
841 nelt_limit = const_nunits;
842 }
843 else
844 {
845 /* If either the vector has variable length or the vectors
846 are composed of repeated whole groups we only need to
847 cost construction once. All vectors will be the same. */
848 num_vects_to_check = 1;
849 nelt_limit = group_size;
850 }
851 tree elt = NULL_TREE;
852 unsigned nelt = 0;
853 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
854 {
855 unsigned si = j % group_size;
856 if (nelt == 0)
857 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
858 /* ??? We're just tracking whether all operands of a single
859 vector initializer are the same, ideally we'd check if
860 we emitted the same one already. */
861 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
862 opno))
863 elt = NULL_TREE;
864 nelt++;
865 if (nelt == nelt_limit)
866 {
867 /* ??? We need to pass down stmt_info for a vector type
868 even if it points to the wrong stmt. */
869 prologue_cost += record_stmt_cost
870 (cost_vec, 1,
871 dt == vect_external_def
872 ? (elt ? scalar_to_vec : vec_construct)
873 : vector_load,
874 stmt_info, 0, vect_prologue);
875 nelt = 0;
876 }
877 }
878
879 return prologue_cost;
880}
ebfd146a 881
b8698a0f 882/* Function vect_model_simple_cost.
ebfd146a 883
b8698a0f 884 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
885 single op. Right now, this does not account for multiple insns that could
886 be generated for the single vector op. We will handle that shortly. */
887
68435eb2 888static void
b8698a0f 889vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 890 enum vect_def_type *dt,
4fc5ebf1 891 int ndts,
68435eb2
RB
892 slp_tree node,
893 stmt_vector_for_cost *cost_vec)
ebfd146a 894{
92345349 895 int inside_cost = 0, prologue_cost = 0;
ebfd146a 896
68435eb2 897 gcc_assert (cost_vec != NULL);
ebfd146a 898
68435eb2
RB
899 /* ??? Somehow we need to fix this at the callers. */
900 if (node)
901 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
902
903 if (node)
904 {
905 /* Scan operands and account for prologue cost of constants/externals.
906 ??? This over-estimates cost for multiple uses and should be
907 re-engineered. */
908 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
909 tree lhs = gimple_get_lhs (stmt);
910 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
911 {
912 tree op = gimple_op (stmt, i);
913 gimple *def_stmt;
914 enum vect_def_type dt;
915 if (!op || op == lhs)
916 continue;
917 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
918 && (dt == vect_constant_def || dt == vect_external_def))
919 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
920 i, dt, cost_vec);
921 }
922 }
923 else
924 /* Cost the "broadcast" of a scalar operand in to a vector operand.
925 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
926 cost model. */
927 for (int i = 0; i < ndts; i++)
928 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
929 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
930 stmt_info, 0, vect_prologue);
931
932 /* Adjust for two-operator SLP nodes. */
933 if (node && SLP_TREE_TWO_OPERATORS (node))
934 {
935 ncopies *= 2;
936 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
937 stmt_info, 0, vect_body);
938 }
c3e7ee41
BS
939
940 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
941 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
942 stmt_info, 0, vect_body);
c3e7ee41 943
73fbfcad 944 if (dump_enabled_p ())
78c60e3d
SS
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "vect_model_simple_cost: inside_cost = %d, "
e645e942 947 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
948}
949
950
8bd37302
BS
951/* Model cost for type demotion and promotion operations. PWR is normally
952 zero for single-step promotions and demotions. It will be one if
953 two-step promotion/demotion is required, and so on. Each additional
954 step doubles the number of instructions required. */
955
956static void
957vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
958 enum vect_def_type *dt, int pwr,
959 stmt_vector_for_cost *cost_vec)
8bd37302
BS
960{
961 int i, tmp;
92345349 962 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 963
8bd37302
BS
964 for (i = 0; i < pwr + 1; i++)
965 {
966 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
967 (i + 1) : i;
68435eb2
RB
968 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
969 vec_promote_demote, stmt_info, 0,
970 vect_body);
8bd37302
BS
971 }
972
973 /* FORNOW: Assuming maximum 2 args per stmts. */
974 for (i = 0; i < 2; i++)
92345349 975 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
976 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
977 stmt_info, 0, vect_prologue);
8bd37302 978
73fbfcad 979 if (dump_enabled_p ())
78c60e3d
SS
980 dump_printf_loc (MSG_NOTE, vect_location,
981 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 982 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
983}
984
ebfd146a
IR
985/* Function vect_model_store_cost
986
0d0293ac
MM
987 Models cost for stores. In the case of grouped accesses, one access
988 has the overhead of the grouped access attributed to it. */
ebfd146a 989
68435eb2 990static void
b8698a0f 991vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 992 enum vect_def_type dt,
2de001ee 993 vect_memory_access_type memory_access_type,
9ce4345a 994 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 995 stmt_vector_for_cost *cost_vec)
ebfd146a 996{
92345349 997 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
999 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1000 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1001
68435eb2
RB
1002 /* ??? Somehow we need to fix this at the callers. */
1003 if (slp_node)
1004 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1005
9ce4345a 1006 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1007 {
1008 if (slp_node)
1009 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1010 1, dt, cost_vec);
1011 else
1012 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1013 stmt_info, 0, vect_prologue);
1014 }
ebfd146a 1015
892a981f
RS
1016 /* Grouped stores update all elements in the group at once,
1017 so we want the DR for the first statement. */
1018 if (!slp_node && grouped_access_p)
720f5239 1019 {
892a981f
RS
1020 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1021 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 1022 }
ebfd146a 1023
892a981f
RS
1024 /* True if we should include any once-per-group costs as well as
1025 the cost of the statement itself. For SLP we only get called
1026 once per group anyhow. */
1027 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1028
272c6793 1029 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 1030 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 1031 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1032 include the cost of the permutes. */
1033 if (first_stmt_p
1034 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1035 {
e1377713
ES
1036 /* Uses a high and low interleave or shuffle operations for each
1037 needed permute. */
892a981f 1038 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1039 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1040 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1041 stmt_info, 0, vect_body);
ebfd146a 1042
73fbfcad 1043 if (dump_enabled_p ())
78c60e3d 1044 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1045 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1046 group_size);
ebfd146a
IR
1047 }
1048
cee62fee 1049 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1050 /* Costs of the stores. */
067bc855
RB
1051 if (memory_access_type == VMAT_ELEMENTWISE
1052 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1053 {
1054 /* N scalar stores plus extracting the elements. */
1055 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1056 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1057 ncopies * assumed_nunits,
1058 scalar_store, stmt_info, 0, vect_body);
1059 }
f2e2a985 1060 else
68435eb2 1061 vect_get_store_cost (dr, ncopies, &inside_cost, cost_vec);
ebfd146a 1062
2de001ee
RS
1063 if (memory_access_type == VMAT_ELEMENTWISE
1064 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1065 {
1066 /* N scalar stores plus extracting the elements. */
1067 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1068 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1069 ncopies * assumed_nunits,
1070 vec_to_scalar, stmt_info, 0, vect_body);
1071 }
cee62fee 1072
73fbfcad 1073 if (dump_enabled_p ())
78c60e3d
SS
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: inside_cost = %d, "
e645e942 1076 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1077}
1078
1079
720f5239
IR
1080/* Calculate cost of DR's memory access. */
1081void
1082vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1083 unsigned int *inside_cost,
92345349 1084 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1085{
1086 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1087 gimple *stmt = DR_STMT (dr);
c3e7ee41 1088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1089
1090 switch (alignment_support_scheme)
1091 {
1092 case dr_aligned:
1093 {
92345349
BS
1094 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1095 vector_store, stmt_info, 0,
1096 vect_body);
720f5239 1097
73fbfcad 1098 if (dump_enabled_p ())
78c60e3d 1099 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1100 "vect_model_store_cost: aligned.\n");
720f5239
IR
1101 break;
1102 }
1103
1104 case dr_unaligned_supported:
1105 {
720f5239 1106 /* Here, we assign an additional cost for the unaligned store. */
92345349 1107 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1108 unaligned_store, stmt_info,
92345349 1109 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1110 if (dump_enabled_p ())
78c60e3d
SS
1111 dump_printf_loc (MSG_NOTE, vect_location,
1112 "vect_model_store_cost: unaligned supported by "
e645e942 1113 "hardware.\n");
720f5239
IR
1114 break;
1115 }
1116
38eec4c6
UW
1117 case dr_unaligned_unsupported:
1118 {
1119 *inside_cost = VECT_MAX_COST;
1120
73fbfcad 1121 if (dump_enabled_p ())
78c60e3d 1122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1123 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1124 break;
1125 }
1126
720f5239
IR
1127 default:
1128 gcc_unreachable ();
1129 }
1130}
1131
1132
ebfd146a
IR
1133/* Function vect_model_load_cost
1134
892a981f
RS
1135 Models cost for loads. In the case of grouped accesses, one access has
1136 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1137 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1138 access scheme chosen. */
1139
68435eb2
RB
1140static void
1141vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1142 vect_memory_access_type memory_access_type,
68435eb2 1143 slp_instance instance,
2de001ee 1144 slp_tree slp_node,
68435eb2 1145 stmt_vector_for_cost *cost_vec)
ebfd146a 1146{
892a981f
RS
1147 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1148 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1149 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1150 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1151
68435eb2
RB
1152 gcc_assert (cost_vec);
1153
1154 /* ??? Somehow we need to fix this at the callers. */
1155 if (slp_node)
1156 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1157
1158 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1159 {
1160 /* If the load is permuted then the alignment is determined by
1161 the first group element not by the first scalar stmt DR. */
1162 gimple *stmt = GROUP_FIRST_ELEMENT (stmt_info);
1163 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1164 /* Record the cost for the permutation. */
1165 unsigned n_perms;
1166 unsigned assumed_nunits
1167 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1168 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1169 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1170 slp_vf, instance, true,
1171 &n_perms);
1172 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1173 stmt_info, 0, vect_body);
1174 /* And adjust the number of loads performed. This handles
1175 redundancies as well as loads that are later dead. */
1176 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1177 bitmap_clear (perm);
1178 for (unsigned i = 0;
1179 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1180 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1181 ncopies = 0;
1182 bool load_seen = false;
1183 for (unsigned i = 0; i < GROUP_SIZE (stmt_info); ++i)
1184 {
1185 if (i % assumed_nunits == 0)
1186 {
1187 if (load_seen)
1188 ncopies++;
1189 load_seen = false;
1190 }
1191 if (bitmap_bit_p (perm, i))
1192 load_seen = true;
1193 }
1194 if (load_seen)
1195 ncopies++;
1196 gcc_assert (ncopies
1197 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1198 + assumed_nunits - 1) / assumed_nunits);
1199 }
1200
1201 /* ??? Need to transition load permutation (and load cost) handling
1202 from vect_analyze_slp_cost_1 to here. */
1203
892a981f
RS
1204 /* Grouped loads read all elements in the group at once,
1205 so we want the DR for the first statement. */
1206 if (!slp_node && grouped_access_p)
ebfd146a 1207 {
892a981f
RS
1208 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1209 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1210 }
1211
892a981f
RS
1212 /* True if we should include any once-per-group costs as well as
1213 the cost of the statement itself. For SLP we only get called
1214 once per group anyhow. */
1215 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1216
272c6793 1217 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1218 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1219 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1220 include the cost of the permutes. */
1221 if (first_stmt_p
1222 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1223 {
2c23db6d
ES
1224 /* Uses an even and odd extract operations or shuffle operations
1225 for each needed permute. */
892a981f 1226 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1227 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1228 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1229 stmt_info, 0, vect_body);
ebfd146a 1230
73fbfcad 1231 if (dump_enabled_p ())
e645e942
TJ
1232 dump_printf_loc (MSG_NOTE, vect_location,
1233 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1234 group_size);
ebfd146a
IR
1235 }
1236
1237 /* The loads themselves. */
067bc855
RB
1238 if (memory_access_type == VMAT_ELEMENTWISE
1239 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1240 {
a21892ad
BS
1241 /* N scalar loads plus gathering them into a vector. */
1242 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1243 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1244 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1245 ncopies * assumed_nunits,
92345349 1246 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1247 }
1248 else
892a981f 1249 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349 1250 &inside_cost, &prologue_cost,
68435eb2 1251 cost_vec, cost_vec, true);
2de001ee
RS
1252 if (memory_access_type == VMAT_ELEMENTWISE
1253 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1254 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1255 stmt_info, 0, vect_body);
720f5239 1256
73fbfcad 1257 if (dump_enabled_p ())
78c60e3d
SS
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "vect_model_load_cost: inside_cost = %d, "
e645e942 1260 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1261}
1262
1263
1264/* Calculate cost of DR's memory access. */
1265void
1266vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1267 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1268 unsigned int *prologue_cost,
1269 stmt_vector_for_cost *prologue_cost_vec,
1270 stmt_vector_for_cost *body_cost_vec,
1271 bool record_prologue_costs)
720f5239
IR
1272{
1273 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1274 gimple *stmt = DR_STMT (dr);
c3e7ee41 1275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1276
1277 switch (alignment_support_scheme)
ebfd146a
IR
1278 {
1279 case dr_aligned:
1280 {
92345349
BS
1281 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1282 stmt_info, 0, vect_body);
ebfd146a 1283
73fbfcad 1284 if (dump_enabled_p ())
78c60e3d 1285 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1286 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1287
1288 break;
1289 }
1290 case dr_unaligned_supported:
1291 {
720f5239 1292 /* Here, we assign an additional cost for the unaligned load. */
92345349 1293 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1294 unaligned_load, stmt_info,
92345349 1295 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1296
73fbfcad 1297 if (dump_enabled_p ())
78c60e3d
SS
1298 dump_printf_loc (MSG_NOTE, vect_location,
1299 "vect_model_load_cost: unaligned supported by "
e645e942 1300 "hardware.\n");
ebfd146a
IR
1301
1302 break;
1303 }
1304 case dr_explicit_realign:
1305 {
92345349
BS
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1307 vector_load, stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1309 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1310
1311 /* FIXME: If the misalignment remains fixed across the iterations of
1312 the containing loop, the following cost should be added to the
92345349 1313 prologue costs. */
ebfd146a 1314 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1315 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1316 stmt_info, 0, vect_body);
ebfd146a 1317
73fbfcad 1318 if (dump_enabled_p ())
e645e942
TJ
1319 dump_printf_loc (MSG_NOTE, vect_location,
1320 "vect_model_load_cost: explicit realign\n");
8bd37302 1321
ebfd146a
IR
1322 break;
1323 }
1324 case dr_explicit_realign_optimized:
1325 {
73fbfcad 1326 if (dump_enabled_p ())
e645e942 1327 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1328 "vect_model_load_cost: unaligned software "
e645e942 1329 "pipelined.\n");
ebfd146a
IR
1330
1331 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1332 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1333 if this is an access in a group of loads, which provide grouped
ebfd146a 1334 access, then the above cost should only be considered for one
ff802fa1 1335 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1336 and a realignment op. */
1337
92345349 1338 if (add_realign_cost && record_prologue_costs)
ebfd146a 1339 {
92345349
BS
1340 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1341 vector_stmt, stmt_info,
1342 0, vect_prologue);
ebfd146a 1343 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1344 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1345 vector_stmt, stmt_info,
1346 0, vect_prologue);
ebfd146a
IR
1347 }
1348
92345349
BS
1349 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1350 stmt_info, 0, vect_body);
1351 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1352 stmt_info, 0, vect_body);
8bd37302 1353
73fbfcad 1354 if (dump_enabled_p ())
78c60e3d 1355 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1356 "vect_model_load_cost: explicit realign optimized"
1357 "\n");
8bd37302 1358
ebfd146a
IR
1359 break;
1360 }
1361
38eec4c6
UW
1362 case dr_unaligned_unsupported:
1363 {
1364 *inside_cost = VECT_MAX_COST;
1365
73fbfcad 1366 if (dump_enabled_p ())
78c60e3d 1367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1368 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1369 break;
1370 }
1371
ebfd146a
IR
1372 default:
1373 gcc_unreachable ();
1374 }
ebfd146a
IR
1375}
1376
418b7df3
RG
1377/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1378 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1379
418b7df3 1380static void
355fe088 1381vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1382{
ebfd146a 1383 if (gsi)
418b7df3 1384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1385 else
1386 {
418b7df3 1387 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1389
a70d6342
IR
1390 if (loop_vinfo)
1391 {
1392 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1393 basic_block new_bb;
1394 edge pe;
a70d6342
IR
1395
1396 if (nested_in_vect_loop_p (loop, stmt))
1397 loop = loop->inner;
b8698a0f 1398
a70d6342 1399 pe = loop_preheader_edge (loop);
418b7df3 1400 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1401 gcc_assert (!new_bb);
1402 }
1403 else
1404 {
1405 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1406 basic_block bb;
1407 gimple_stmt_iterator gsi_bb_start;
1408
1409 gcc_assert (bb_vinfo);
1410 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1411 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1412 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1413 }
ebfd146a
IR
1414 }
1415
73fbfcad 1416 if (dump_enabled_p ())
ebfd146a 1417 {
78c60e3d
SS
1418 dump_printf_loc (MSG_NOTE, vect_location,
1419 "created new init_stmt: ");
1420 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1421 }
418b7df3
RG
1422}
1423
1424/* Function vect_init_vector.
ebfd146a 1425
5467ee52
RG
1426 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1427 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1428 vector type a vector with all elements equal to VAL is created first.
1429 Place the initialization at BSI if it is not NULL. Otherwise, place the
1430 initialization at the loop preheader.
418b7df3
RG
1431 Return the DEF of INIT_STMT.
1432 It will be used in the vectorization of STMT. */
1433
1434tree
355fe088 1435vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1436{
355fe088 1437 gimple *init_stmt;
418b7df3
RG
1438 tree new_temp;
1439
e412ece4
RB
1440 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1441 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1442 {
e412ece4
RB
1443 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1444 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1445 {
5a308cf1
IE
1446 /* Scalar boolean value should be transformed into
1447 all zeros or all ones value before building a vector. */
1448 if (VECTOR_BOOLEAN_TYPE_P (type))
1449 {
b3d51f23
IE
1450 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1451 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1452
1453 if (CONSTANT_CLASS_P (val))
1454 val = integer_zerop (val) ? false_val : true_val;
1455 else
1456 {
1457 new_temp = make_ssa_name (TREE_TYPE (type));
1458 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1459 val, true_val, false_val);
1460 vect_init_vector_1 (stmt, init_stmt, gsi);
1461 val = new_temp;
1462 }
1463 }
1464 else if (CONSTANT_CLASS_P (val))
42fd8198 1465 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1466 else
1467 {
b731b390 1468 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1469 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1470 init_stmt = gimple_build_assign (new_temp,
1471 fold_build1 (VIEW_CONVERT_EXPR,
1472 TREE_TYPE (type),
1473 val));
1474 else
1475 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1476 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1477 val = new_temp;
418b7df3
RG
1478 }
1479 }
5467ee52 1480 val = build_vector_from_val (type, val);
418b7df3
RG
1481 }
1482
0e22bb5a
RB
1483 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1484 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1485 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1486 return new_temp;
ebfd146a
IR
1487}
1488
c83a894c 1489/* Function vect_get_vec_def_for_operand_1.
a70d6342 1490
c83a894c
AH
1491 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1492 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1493
1494tree
c83a894c 1495vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1496{
1497 tree vec_oprnd;
355fe088 1498 gimple *vec_stmt;
ebfd146a 1499 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1500
1501 switch (dt)
1502 {
81c40241 1503 /* operand is a constant or a loop invariant. */
ebfd146a 1504 case vect_constant_def:
81c40241 1505 case vect_external_def:
c83a894c
AH
1506 /* Code should use vect_get_vec_def_for_operand. */
1507 gcc_unreachable ();
ebfd146a 1508
81c40241 1509 /* operand is defined inside the loop. */
8644a673 1510 case vect_internal_def:
ebfd146a 1511 {
ebfd146a
IR
1512 /* Get the def from the vectorized stmt. */
1513 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1514
ebfd146a 1515 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1516 /* Get vectorized pattern statement. */
1517 if (!vec_stmt
1518 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1519 && !STMT_VINFO_RELEVANT (def_stmt_info))
1520 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1521 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1522 gcc_assert (vec_stmt);
1523 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1524 vec_oprnd = PHI_RESULT (vec_stmt);
1525 else if (is_gimple_call (vec_stmt))
1526 vec_oprnd = gimple_call_lhs (vec_stmt);
1527 else
1528 vec_oprnd = gimple_assign_lhs (vec_stmt);
1529 return vec_oprnd;
1530 }
1531
c78e3652 1532 /* operand is defined by a loop header phi. */
ebfd146a 1533 case vect_reduction_def:
06066f92 1534 case vect_double_reduction_def:
7c5222ff 1535 case vect_nested_cycle:
ebfd146a
IR
1536 case vect_induction_def:
1537 {
1538 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1539
1540 /* Get the def from the vectorized stmt. */
1541 def_stmt_info = vinfo_for_stmt (def_stmt);
1542 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1543 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1544 vec_oprnd = PHI_RESULT (vec_stmt);
1545 else
1546 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1547 return vec_oprnd;
1548 }
1549
1550 default:
1551 gcc_unreachable ();
1552 }
1553}
1554
1555
c83a894c
AH
1556/* Function vect_get_vec_def_for_operand.
1557
1558 OP is an operand in STMT. This function returns a (vector) def that will be
1559 used in the vectorized stmt for STMT.
1560
1561 In the case that OP is an SSA_NAME which is defined in the loop, then
1562 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1563
1564 In case OP is an invariant or constant, a new stmt that creates a vector def
1565 needs to be introduced. VECTYPE may be used to specify a required type for
1566 vector invariant. */
1567
1568tree
1569vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1570{
1571 gimple *def_stmt;
1572 enum vect_def_type dt;
1573 bool is_simple_use;
1574 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1575 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1576
1577 if (dump_enabled_p ())
1578 {
1579 dump_printf_loc (MSG_NOTE, vect_location,
1580 "vect_get_vec_def_for_operand: ");
1581 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1582 dump_printf (MSG_NOTE, "\n");
1583 }
1584
1585 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1586 gcc_assert (is_simple_use);
1587 if (def_stmt && dump_enabled_p ())
1588 {
1589 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1590 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1591 }
1592
1593 if (dt == vect_constant_def || dt == vect_external_def)
1594 {
1595 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1596 tree vector_type;
1597
1598 if (vectype)
1599 vector_type = vectype;
2568d8a1 1600 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1601 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1602 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1603 else
1604 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1605
1606 gcc_assert (vector_type);
1607 return vect_init_vector (stmt, op, vector_type, NULL);
1608 }
1609 else
1610 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1611}
1612
1613
ebfd146a
IR
1614/* Function vect_get_vec_def_for_stmt_copy
1615
ff802fa1 1616 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1617 vectorized stmt to be created (by the caller to this function) is a "copy"
1618 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1619 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1620 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1621 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1622 DT is the type of the vector def VEC_OPRND.
1623
1624 Context:
1625 In case the vectorization factor (VF) is bigger than the number
1626 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1627 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1628 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1629 smallest data-type determines the VF, and as a result, when vectorizing
1630 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1631 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1632 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1633 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1634 which VF=16 and nunits=4, so the number of copies required is 4):
1635
1636 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1637
ebfd146a
IR
1638 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1639 VS1.1: vx.1 = memref1 VS1.2
1640 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1641 VS1.3: vx.3 = memref3
ebfd146a
IR
1642
1643 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1644 VSnew.1: vz1 = vx.1 + ... VSnew.2
1645 VSnew.2: vz2 = vx.2 + ... VSnew.3
1646 VSnew.3: vz3 = vx.3 + ...
1647
1648 The vectorization of S1 is explained in vectorizable_load.
1649 The vectorization of S2:
b8698a0f
L
1650 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1651 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1652 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1653 returns the vector-def 'vx.0'.
1654
b8698a0f
L
1655 To create the remaining copies of the vector-stmt (VSnew.j), this
1656 function is called to get the relevant vector-def for each operand. It is
1657 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1658 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1659
b8698a0f
L
1660 For example, to obtain the vector-def 'vx.1' in order to create the
1661 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1662 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1663 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1664 and return its def ('vx.1').
1665 Overall, to create the above sequence this function will be called 3 times:
1666 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1667 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1668 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1669
1670tree
1671vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1672{
355fe088 1673 gimple *vec_stmt_for_operand;
ebfd146a
IR
1674 stmt_vec_info def_stmt_info;
1675
1676 /* Do nothing; can reuse same def. */
8644a673 1677 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1678 return vec_oprnd;
1679
1680 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1681 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1682 gcc_assert (def_stmt_info);
1683 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1684 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1685 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1686 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1687 else
1688 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1689 return vec_oprnd;
1690}
1691
1692
1693/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1694 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1695
c78e3652 1696void
b8698a0f 1697vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1698 vec<tree> *vec_oprnds0,
1699 vec<tree> *vec_oprnds1)
ebfd146a 1700{
9771b263 1701 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1702
1703 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1704 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1705
9771b263 1706 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1707 {
9771b263 1708 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1709 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1710 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1711 }
1712}
1713
1714
c78e3652 1715/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1716
c78e3652 1717void
355fe088 1718vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1719 vec<tree> *vec_oprnds0,
1720 vec<tree> *vec_oprnds1,
306b0c92 1721 slp_tree slp_node)
ebfd146a
IR
1722{
1723 if (slp_node)
d092494c
IR
1724 {
1725 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1726 auto_vec<tree> ops (nops);
1727 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1728
9771b263 1729 ops.quick_push (op0);
d092494c 1730 if (op1)
9771b263 1731 ops.quick_push (op1);
d092494c 1732
306b0c92 1733 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1734
37b5ec8f 1735 *vec_oprnds0 = vec_defs[0];
d092494c 1736 if (op1)
37b5ec8f 1737 *vec_oprnds1 = vec_defs[1];
d092494c 1738 }
ebfd146a
IR
1739 else
1740 {
1741 tree vec_oprnd;
1742
9771b263 1743 vec_oprnds0->create (1);
81c40241 1744 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1745 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1746
1747 if (op1)
1748 {
9771b263 1749 vec_oprnds1->create (1);
81c40241 1750 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1751 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1752 }
1753 }
1754}
1755
bb6c2b68
RS
1756/* Helper function called by vect_finish_replace_stmt and
1757 vect_finish_stmt_generation. Set the location of the new
1758 statement and create a stmt_vec_info for it. */
1759
1760static void
1761vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1762{
1763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764 vec_info *vinfo = stmt_info->vinfo;
1765
1766 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1767
1768 if (dump_enabled_p ())
1769 {
1770 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1771 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1772 }
1773
1774 gimple_set_location (vec_stmt, gimple_location (stmt));
1775
1776 /* While EH edges will generally prevent vectorization, stmt might
1777 e.g. be in a must-not-throw region. Ensure newly created stmts
1778 that could throw are part of the same region. */
1779 int lp_nr = lookup_stmt_eh_lp (stmt);
1780 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1781 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1782}
1783
1784/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1785 which sets the same scalar result as STMT did. */
1786
1787void
1788vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1789{
1790 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1791
1792 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1793 gsi_replace (&gsi, vec_stmt, false);
1794
1795 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1796}
ebfd146a
IR
1797
1798/* Function vect_finish_stmt_generation.
1799
1800 Insert a new stmt. */
1801
1802void
355fe088 1803vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1804 gimple_stmt_iterator *gsi)
1805{
ebfd146a
IR
1806 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1807
54e8e2c3
RG
1808 if (!gsi_end_p (*gsi)
1809 && gimple_has_mem_ops (vec_stmt))
1810 {
355fe088 1811 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1812 tree vuse = gimple_vuse (at_stmt);
1813 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1814 {
1815 tree vdef = gimple_vdef (at_stmt);
1816 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1817 /* If we have an SSA vuse and insert a store, update virtual
1818 SSA form to avoid triggering the renamer. Do so only
1819 if we can easily see all uses - which is what almost always
1820 happens with the way vectorized stmts are inserted. */
1821 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1822 && ((is_gimple_assign (vec_stmt)
1823 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1824 || (is_gimple_call (vec_stmt)
1825 && !(gimple_call_flags (vec_stmt)
1826 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1827 {
1828 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1829 gimple_set_vdef (vec_stmt, new_vdef);
1830 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1831 }
1832 }
1833 }
ebfd146a 1834 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1835 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1836}
1837
70439f0d
RS
1838/* We want to vectorize a call to combined function CFN with function
1839 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1840 as the types of all inputs. Check whether this is possible using
1841 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1842
70439f0d
RS
1843static internal_fn
1844vectorizable_internal_function (combined_fn cfn, tree fndecl,
1845 tree vectype_out, tree vectype_in)
ebfd146a 1846{
70439f0d
RS
1847 internal_fn ifn;
1848 if (internal_fn_p (cfn))
1849 ifn = as_internal_fn (cfn);
1850 else
1851 ifn = associated_internal_fn (fndecl);
1852 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1853 {
1854 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1855 if (info.vectorizable)
1856 {
1857 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1858 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1859 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1860 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1861 return ifn;
1862 }
1863 }
1864 return IFN_LAST;
ebfd146a
IR
1865}
1866
5ce9450f 1867
355fe088 1868static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1869 gimple_stmt_iterator *);
1870
7cfb4d93
RS
1871/* Check whether a load or store statement in the loop described by
1872 LOOP_VINFO is possible in a fully-masked loop. This is testing
1873 whether the vectorizer pass has the appropriate support, as well as
1874 whether the target does.
1875
1876 VLS_TYPE says whether the statement is a load or store and VECTYPE
1877 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1878 says how the load or store is going to be implemented and GROUP_SIZE
1879 is the number of load or store statements in the containing group.
bfaa08b7
RS
1880 If the access is a gather load or scatter store, GS_INFO describes
1881 its arguments.
7cfb4d93
RS
1882
1883 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1884 supported, otherwise record the required mask types. */
1885
1886static void
1887check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1888 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1889 vect_memory_access_type memory_access_type,
1890 gather_scatter_info *gs_info)
7cfb4d93
RS
1891{
1892 /* Invariant loads need no special support. */
1893 if (memory_access_type == VMAT_INVARIANT)
1894 return;
1895
1896 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1897 machine_mode vecmode = TYPE_MODE (vectype);
1898 bool is_load = (vls_type == VLS_LOAD);
1899 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1900 {
1901 if (is_load
1902 ? !vect_load_lanes_supported (vectype, group_size, true)
1903 : !vect_store_lanes_supported (vectype, group_size, true))
1904 {
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907 "can't use a fully-masked loop because the"
1908 " target doesn't have an appropriate masked"
1909 " load/store-lanes instruction.\n");
1910 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1911 return;
1912 }
1913 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1914 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1915 return;
1916 }
1917
bfaa08b7
RS
1918 if (memory_access_type == VMAT_GATHER_SCATTER)
1919 {
f307441a
RS
1920 internal_fn ifn = (is_load
1921 ? IFN_MASK_GATHER_LOAD
1922 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1923 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1924 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1925 gs_info->memory_type,
1926 TYPE_SIGN (offset_type),
1927 gs_info->scale))
1928 {
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1931 "can't use a fully-masked loop because the"
1932 " target doesn't have an appropriate masked"
f307441a 1933 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1934 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1935 return;
1936 }
1937 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1938 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1939 return;
1940 }
1941
7cfb4d93
RS
1942 if (memory_access_type != VMAT_CONTIGUOUS
1943 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1944 {
1945 /* Element X of the data must come from iteration i * VF + X of the
1946 scalar loop. We need more work to support other mappings. */
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "can't use a fully-masked loop because an access"
1950 " isn't contiguous.\n");
1951 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1952 return;
1953 }
1954
1955 machine_mode mask_mode;
1956 if (!(targetm.vectorize.get_mask_mode
1957 (GET_MODE_NUNITS (vecmode),
1958 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1959 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1960 {
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1963 "can't use a fully-masked loop because the target"
1964 " doesn't have the appropriate masked load or"
1965 " store.\n");
1966 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1967 return;
1968 }
1969 /* We might load more scalars than we need for permuting SLP loads.
1970 We checked in get_group_load_store_type that the extra elements
1971 don't leak into a new vector. */
1972 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1973 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1974 unsigned int nvectors;
1975 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1976 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1977 else
1978 gcc_unreachable ();
1979}
1980
1981/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1982 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1983 that needs to be applied to all loads and stores in a vectorized loop.
1984 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1985
1986 MASK_TYPE is the type of both masks. If new statements are needed,
1987 insert them before GSI. */
1988
1989static tree
1990prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1991 gimple_stmt_iterator *gsi)
1992{
1993 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1994 if (!loop_mask)
1995 return vec_mask;
1996
1997 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1998 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1999 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
2000 vec_mask, loop_mask);
2001 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
2002 return and_res;
2003}
2004
429ef523
RS
2005/* Determine whether we can use a gather load or scatter store to vectorize
2006 strided load or store STMT by truncating the current offset to a smaller
2007 width. We need to be able to construct an offset vector:
2008
2009 { 0, X, X*2, X*3, ... }
2010
2011 without loss of precision, where X is STMT's DR_STEP.
2012
2013 Return true if this is possible, describing the gather load or scatter
2014 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2015
2016static bool
2017vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
2018 bool masked_p,
2019 gather_scatter_info *gs_info)
2020{
2021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2022 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2023 tree step = DR_STEP (dr);
2024 if (TREE_CODE (step) != INTEGER_CST)
2025 {
2026 /* ??? Perhaps we could use range information here? */
2027 if (dump_enabled_p ())
2028 dump_printf_loc (MSG_NOTE, vect_location,
2029 "cannot truncate variable step.\n");
2030 return false;
2031 }
2032
2033 /* Get the number of bits in an element. */
2034 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2035 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2036 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2037
2038 /* Set COUNT to the upper limit on the number of elements - 1.
2039 Start with the maximum vectorization factor. */
2040 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2041
2042 /* Try lowering COUNT to the number of scalar latch iterations. */
2043 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2044 widest_int max_iters;
2045 if (max_loop_iterations (loop, &max_iters)
2046 && max_iters < count)
2047 count = max_iters.to_shwi ();
2048
2049 /* Try scales of 1 and the element size. */
2050 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2051 bool overflow_p = false;
2052 for (int i = 0; i < 2; ++i)
2053 {
2054 int scale = scales[i];
2055 widest_int factor;
2056 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2057 continue;
2058
2059 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2060 in OFFSET_BITS bits. */
2061 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2062 if (overflow_p)
2063 continue;
2064 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2065 if (wi::min_precision (range, sign) > element_bits)
2066 {
2067 overflow_p = true;
2068 continue;
2069 }
2070
2071 /* See whether the target supports the operation. */
2072 tree memory_type = TREE_TYPE (DR_REF (dr));
2073 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2074 memory_type, element_bits, sign, scale,
2075 &gs_info->ifn, &gs_info->element_type))
2076 continue;
2077
2078 tree offset_type = build_nonstandard_integer_type (element_bits,
2079 sign == UNSIGNED);
2080
2081 gs_info->decl = NULL_TREE;
2082 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2083 but we don't need to store that here. */
2084 gs_info->base = NULL_TREE;
2085 gs_info->offset = fold_convert (offset_type, step);
929b4411 2086 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2087 gs_info->offset_vectype = NULL_TREE;
2088 gs_info->scale = scale;
2089 gs_info->memory_type = memory_type;
2090 return true;
2091 }
2092
2093 if (overflow_p && dump_enabled_p ())
2094 dump_printf_loc (MSG_NOTE, vect_location,
2095 "truncating gather/scatter offset to %d bits"
2096 " might change its value.\n", element_bits);
2097
2098 return false;
2099}
2100
ab2fc782
RS
2101/* Return true if we can use gather/scatter internal functions to
2102 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2103 MASKED_P is true if load or store is conditional. When returning
2104 true, fill in GS_INFO with the information required to perform the
2105 operation. */
ab2fc782
RS
2106
2107static bool
2108vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2109 bool masked_p,
ab2fc782
RS
2110 gather_scatter_info *gs_info)
2111{
2112 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2113 || gs_info->decl)
429ef523
RS
2114 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2115 masked_p, gs_info);
ab2fc782
RS
2116
2117 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2118 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2119 tree offset_type = TREE_TYPE (gs_info->offset);
2120 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2121
2122 /* Enforced by vect_check_gather_scatter. */
2123 gcc_assert (element_bits >= offset_bits);
2124
2125 /* If the elements are wider than the offset, convert the offset to the
2126 same width, without changing its sign. */
2127 if (element_bits > offset_bits)
2128 {
2129 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2130 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2131 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2132 }
2133
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_NOTE, vect_location,
2136 "using gather/scatter for strided/grouped access,"
2137 " scale = %d\n", gs_info->scale);
2138
2139 return true;
2140}
2141
62da9e14
RS
2142/* STMT is a non-strided load or store, meaning that it accesses
2143 elements with a known constant step. Return -1 if that step
2144 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2145
2146static int
2147compare_step_with_zero (gimple *stmt)
2148{
2149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2150 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2151 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2152 size_zero_node);
62da9e14
RS
2153}
2154
2155/* If the target supports a permute mask that reverses the elements in
2156 a vector of type VECTYPE, return that mask, otherwise return null. */
2157
2158static tree
2159perm_mask_for_reverse (tree vectype)
2160{
928686b1 2161 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2162
d980067b
RS
2163 /* The encoding has a single stepped pattern. */
2164 vec_perm_builder sel (nunits, 1, 3);
928686b1 2165 for (int i = 0; i < 3; ++i)
908a1a16 2166 sel.quick_push (nunits - 1 - i);
62da9e14 2167
e3342de4
RS
2168 vec_perm_indices indices (sel, 1, nunits);
2169 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2170 return NULL_TREE;
e3342de4 2171 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2172}
5ce9450f 2173
c3a8f964
RS
2174/* STMT is either a masked or unconditional store. Return the value
2175 being stored. */
2176
f307441a 2177tree
c3a8f964
RS
2178vect_get_store_rhs (gimple *stmt)
2179{
2180 if (gassign *assign = dyn_cast <gassign *> (stmt))
2181 {
2182 gcc_assert (gimple_assign_single_p (assign));
2183 return gimple_assign_rhs1 (assign);
2184 }
2185 if (gcall *call = dyn_cast <gcall *> (stmt))
2186 {
2187 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2188 int index = internal_fn_stored_value_index (ifn);
2189 gcc_assert (index >= 0);
2190 return gimple_call_arg (stmt, index);
c3a8f964
RS
2191 }
2192 gcc_unreachable ();
2193}
2194
2de001ee
RS
2195/* A subroutine of get_load_store_type, with a subset of the same
2196 arguments. Handle the case where STMT is part of a grouped load
2197 or store.
2198
2199 For stores, the statements in the group are all consecutive
2200 and there is no gap at the end. For loads, the statements in the
2201 group might not be consecutive; there can be gaps between statements
2202 as well as at the end. */
2203
2204static bool
2205get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2206 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2207 vect_memory_access_type *memory_access_type,
2208 gather_scatter_info *gs_info)
2de001ee
RS
2209{
2210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2211 vec_info *vinfo = stmt_info->vinfo;
2212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2213 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2214 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2215 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
2216 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2217 bool single_element_p = (stmt == first_stmt
2218 && !GROUP_NEXT_ELEMENT (stmt_info));
2219 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2220 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2221
2222 /* True if the vectorized statements would access beyond the last
2223 statement in the group. */
2224 bool overrun_p = false;
2225
2226 /* True if we can cope with such overrun by peeling for gaps, so that
2227 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2228 bool can_overrun_p = (!masked_p
2229 && vls_type == VLS_LOAD
2230 && loop_vinfo
2231 && !loop->inner);
2de001ee
RS
2232
2233 /* There can only be a gap at the end of the group if the stride is
2234 known at compile time. */
2235 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2236
2237 /* Stores can't yet have gaps. */
2238 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2239
2240 if (slp)
2241 {
2242 if (STMT_VINFO_STRIDED_P (stmt_info))
2243 {
2244 /* Try to use consecutive accesses of GROUP_SIZE elements,
2245 separated by the stride, until we have a complete vector.
2246 Fall back to scalar accesses if that isn't possible. */
928686b1 2247 if (multiple_p (nunits, group_size))
2de001ee
RS
2248 *memory_access_type = VMAT_STRIDED_SLP;
2249 else
2250 *memory_access_type = VMAT_ELEMENTWISE;
2251 }
2252 else
2253 {
2254 overrun_p = loop_vinfo && gap != 0;
2255 if (overrun_p && vls_type != VLS_LOAD)
2256 {
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2258 "Grouped store with gaps requires"
2259 " non-consecutive accesses\n");
2260 return false;
2261 }
f702e7d4
RS
2262 /* An overrun is fine if the trailing elements are smaller
2263 than the alignment boundary B. Every vector access will
2264 be a multiple of B and so we are guaranteed to access a
2265 non-gap element in the same B-sized block. */
f9ef2c76 2266 if (overrun_p
f702e7d4
RS
2267 && gap < (vect_known_alignment_in_bytes (first_dr)
2268 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2269 overrun_p = false;
2de001ee
RS
2270 if (overrun_p && !can_overrun_p)
2271 {
2272 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2274 "Peeling for outer loop is not supported\n");
2275 return false;
2276 }
2277 *memory_access_type = VMAT_CONTIGUOUS;
2278 }
2279 }
2280 else
2281 {
2282 /* We can always handle this case using elementwise accesses,
2283 but see if something more efficient is available. */
2284 *memory_access_type = VMAT_ELEMENTWISE;
2285
2286 /* If there is a gap at the end of the group then these optimizations
2287 would access excess elements in the last iteration. */
2288 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2289 /* An overrun is fine if the trailing elements are smaller than the
2290 alignment boundary B. Every vector access will be a multiple of B
2291 and so we are guaranteed to access a non-gap element in the
2292 same B-sized block. */
f9ef2c76 2293 if (would_overrun_p
7e11fc7f 2294 && !masked_p
f702e7d4
RS
2295 && gap < (vect_known_alignment_in_bytes (first_dr)
2296 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2297 would_overrun_p = false;
f702e7d4 2298
2de001ee 2299 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2300 && (can_overrun_p || !would_overrun_p)
2301 && compare_step_with_zero (stmt) > 0)
2de001ee 2302 {
6737facb
RS
2303 /* First cope with the degenerate case of a single-element
2304 vector. */
2305 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2306 *memory_access_type = VMAT_CONTIGUOUS;
2307
2308 /* Otherwise try using LOAD/STORE_LANES. */
2309 if (*memory_access_type == VMAT_ELEMENTWISE
2310 && (vls_type == VLS_LOAD
7e11fc7f
RS
2311 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2312 : vect_store_lanes_supported (vectype, group_size,
2313 masked_p)))
2de001ee
RS
2314 {
2315 *memory_access_type = VMAT_LOAD_STORE_LANES;
2316 overrun_p = would_overrun_p;
2317 }
2318
2319 /* If that fails, try using permuting loads. */
2320 if (*memory_access_type == VMAT_ELEMENTWISE
2321 && (vls_type == VLS_LOAD
2322 ? vect_grouped_load_supported (vectype, single_element_p,
2323 group_size)
2324 : vect_grouped_store_supported (vectype, group_size)))
2325 {
2326 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2327 overrun_p = would_overrun_p;
2328 }
2329 }
429ef523
RS
2330
2331 /* As a last resort, trying using a gather load or scatter store.
2332
2333 ??? Although the code can handle all group sizes correctly,
2334 it probably isn't a win to use separate strided accesses based
2335 on nearby locations. Or, even if it's a win over scalar code,
2336 it might not be a win over vectorizing at a lower VF, if that
2337 allows us to use contiguous accesses. */
2338 if (*memory_access_type == VMAT_ELEMENTWISE
2339 && single_element_p
2340 && loop_vinfo
2341 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2342 masked_p, gs_info))
2343 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2344 }
2345
2346 if (vls_type != VLS_LOAD && first_stmt == stmt)
2347 {
2348 /* STMT is the leader of the group. Check the operands of all the
2349 stmts of the group. */
2350 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2351 while (next_stmt)
2352 {
7e11fc7f 2353 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
2354 gimple *def_stmt;
2355 enum vect_def_type dt;
2356 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2357 {
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "use not simple.\n");
2361 return false;
2362 }
2363 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2364 }
2365 }
2366
2367 if (overrun_p)
2368 {
2369 gcc_assert (can_overrun_p);
2370 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2372 "Data access with gaps requires scalar "
2373 "epilogue loop\n");
2374 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2375 }
2376
2377 return true;
2378}
2379
62da9e14
RS
2380/* A subroutine of get_load_store_type, with a subset of the same
2381 arguments. Handle the case where STMT is a load or store that
2382 accesses consecutive elements with a negative step. */
2383
2384static vect_memory_access_type
2385get_negative_load_store_type (gimple *stmt, tree vectype,
2386 vec_load_store_type vls_type,
2387 unsigned int ncopies)
2388{
2389 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2390 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2391 dr_alignment_support alignment_support_scheme;
2392
2393 if (ncopies > 1)
2394 {
2395 if (dump_enabled_p ())
2396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2397 "multiple types with negative step.\n");
2398 return VMAT_ELEMENTWISE;
2399 }
2400
2401 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2402 if (alignment_support_scheme != dr_aligned
2403 && alignment_support_scheme != dr_unaligned_supported)
2404 {
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "negative step but alignment required.\n");
2408 return VMAT_ELEMENTWISE;
2409 }
2410
2411 if (vls_type == VLS_STORE_INVARIANT)
2412 {
2413 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_NOTE, vect_location,
2415 "negative step with invariant source;"
2416 " no permute needed.\n");
2417 return VMAT_CONTIGUOUS_DOWN;
2418 }
2419
2420 if (!perm_mask_for_reverse (vectype))
2421 {
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424 "negative step and reversing not supported.\n");
2425 return VMAT_ELEMENTWISE;
2426 }
2427
2428 return VMAT_CONTIGUOUS_REVERSE;
2429}
2430
2de001ee
RS
2431/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2432 if there is a memory access type that the vectorized form can use,
2433 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2434 or scatters, fill in GS_INFO accordingly.
2435
2436 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2437 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2438 VECTYPE is the vector type that the vectorized statements will use.
2439 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2440
2441static bool
7e11fc7f 2442get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2443 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2444 vect_memory_access_type *memory_access_type,
2445 gather_scatter_info *gs_info)
2446{
2447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2448 vec_info *vinfo = stmt_info->vinfo;
2449 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2450 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2451 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2452 {
2453 *memory_access_type = VMAT_GATHER_SCATTER;
2454 gimple *def_stmt;
2455 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2456 gcc_unreachable ();
2457 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2458 &gs_info->offset_dt,
2459 &gs_info->offset_vectype))
2460 {
2461 if (dump_enabled_p ())
2462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2463 "%s index use not simple.\n",
2464 vls_type == VLS_LOAD ? "gather" : "scatter");
2465 return false;
2466 }
2467 }
2468 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2469 {
7e11fc7f 2470 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2471 memory_access_type, gs_info))
2de001ee
RS
2472 return false;
2473 }
2474 else if (STMT_VINFO_STRIDED_P (stmt_info))
2475 {
2476 gcc_assert (!slp);
ab2fc782 2477 if (loop_vinfo
429ef523
RS
2478 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2479 masked_p, gs_info))
ab2fc782
RS
2480 *memory_access_type = VMAT_GATHER_SCATTER;
2481 else
2482 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2483 }
2484 else
62da9e14
RS
2485 {
2486 int cmp = compare_step_with_zero (stmt);
2487 if (cmp < 0)
2488 *memory_access_type = get_negative_load_store_type
2489 (stmt, vectype, vls_type, ncopies);
2490 else if (cmp == 0)
2491 {
2492 gcc_assert (vls_type == VLS_LOAD);
2493 *memory_access_type = VMAT_INVARIANT;
2494 }
2495 else
2496 *memory_access_type = VMAT_CONTIGUOUS;
2497 }
2de001ee 2498
4d694b27
RS
2499 if ((*memory_access_type == VMAT_ELEMENTWISE
2500 || *memory_access_type == VMAT_STRIDED_SLP)
2501 && !nunits.is_constant ())
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "Not using elementwise accesses due to variable "
2506 "vectorization factor.\n");
2507 return false;
2508 }
2509
2de001ee
RS
2510 /* FIXME: At the moment the cost model seems to underestimate the
2511 cost of using elementwise accesses. This check preserves the
2512 traditional behavior until that can be fixed. */
2513 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8
RS
2514 && !STMT_VINFO_STRIDED_P (stmt_info)
2515 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2516 && !GROUP_NEXT_ELEMENT (stmt_info)
2517 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2de001ee
RS
2518 {
2519 if (dump_enabled_p ())
2520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2521 "not falling back to elementwise accesses\n");
2522 return false;
2523 }
2524 return true;
2525}
2526
aaeefd88 2527/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2528 conditional load or store STMT. When returning true, store the type
2529 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2530 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2531
2532static bool
929b4411
RS
2533vect_check_load_store_mask (gimple *stmt, tree mask,
2534 vect_def_type *mask_dt_out,
2535 tree *mask_vectype_out)
aaeefd88
RS
2536{
2537 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2538 {
2539 if (dump_enabled_p ())
2540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2541 "mask argument is not a boolean.\n");
2542 return false;
2543 }
2544
2545 if (TREE_CODE (mask) != SSA_NAME)
2546 {
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "mask argument is not an SSA name.\n");
2550 return false;
2551 }
2552
2553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2554 gimple *def_stmt;
929b4411 2555 enum vect_def_type mask_dt;
aaeefd88 2556 tree mask_vectype;
929b4411 2557 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
aaeefd88
RS
2558 &mask_vectype))
2559 {
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562 "mask use not simple.\n");
2563 return false;
2564 }
2565
2566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2567 if (!mask_vectype)
2568 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2569
2570 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2571 {
2572 if (dump_enabled_p ())
2573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574 "could not find an appropriate vector mask type.\n");
2575 return false;
2576 }
2577
2578 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2579 TYPE_VECTOR_SUBPARTS (vectype)))
2580 {
2581 if (dump_enabled_p ())
2582 {
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2584 "vector mask type ");
2585 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2586 dump_printf (MSG_MISSED_OPTIMIZATION,
2587 " does not match vector data type ");
2588 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2589 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2590 }
2591 return false;
2592 }
2593
929b4411 2594 *mask_dt_out = mask_dt;
aaeefd88
RS
2595 *mask_vectype_out = mask_vectype;
2596 return true;
2597}
2598
3133c3b6
RS
2599/* Return true if stored value RHS is suitable for vectorizing store
2600 statement STMT. When returning true, store the type of the
929b4411
RS
2601 definition in *RHS_DT_OUT, the type of the vectorized store value in
2602 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2603
2604static bool
929b4411
RS
2605vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2606 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2607{
2608 /* In the case this is a store from a constant make sure
2609 native_encode_expr can handle it. */
2610 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2611 {
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2614 "cannot encode constant as a byte sequence.\n");
2615 return false;
2616 }
2617
2618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2619 gimple *def_stmt;
929b4411 2620 enum vect_def_type rhs_dt;
3133c3b6 2621 tree rhs_vectype;
929b4411 2622 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
3133c3b6
RS
2623 &rhs_vectype))
2624 {
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2627 "use not simple.\n");
2628 return false;
2629 }
2630
2631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2632 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2633 {
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2636 "incompatible vector types.\n");
2637 return false;
2638 }
2639
929b4411 2640 *rhs_dt_out = rhs_dt;
3133c3b6 2641 *rhs_vectype_out = rhs_vectype;
929b4411 2642 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2643 *vls_type_out = VLS_STORE_INVARIANT;
2644 else
2645 *vls_type_out = VLS_STORE;
2646 return true;
2647}
2648
bc9587eb
RS
2649/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2650 Note that we support masks with floating-point type, in which case the
2651 floats are interpreted as a bitmask. */
2652
2653static tree
2654vect_build_all_ones_mask (gimple *stmt, tree masktype)
2655{
2656 if (TREE_CODE (masktype) == INTEGER_TYPE)
2657 return build_int_cst (masktype, -1);
2658 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2659 {
2660 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2661 mask = build_vector_from_val (masktype, mask);
2662 return vect_init_vector (stmt, mask, masktype, NULL);
2663 }
2664 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2665 {
2666 REAL_VALUE_TYPE r;
2667 long tmp[6];
2668 for (int j = 0; j < 6; ++j)
2669 tmp[j] = -1;
2670 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2671 tree mask = build_real (TREE_TYPE (masktype), r);
2672 mask = build_vector_from_val (masktype, mask);
2673 return vect_init_vector (stmt, mask, masktype, NULL);
2674 }
2675 gcc_unreachable ();
2676}
2677
2678/* Build an all-zero merge value of type VECTYPE while vectorizing
2679 STMT as a gather load. */
2680
2681static tree
2682vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2683{
2684 tree merge;
2685 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2686 merge = build_int_cst (TREE_TYPE (vectype), 0);
2687 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2688 {
2689 REAL_VALUE_TYPE r;
2690 long tmp[6];
2691 for (int j = 0; j < 6; ++j)
2692 tmp[j] = 0;
2693 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2694 merge = build_real (TREE_TYPE (vectype), r);
2695 }
2696 else
2697 gcc_unreachable ();
2698 merge = build_vector_from_val (vectype, merge);
2699 return vect_init_vector (stmt, merge, vectype, NULL);
2700}
2701
c48d2d35
RS
2702/* Build a gather load call while vectorizing STMT. Insert new instructions
2703 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2704 operation. If the load is conditional, MASK is the unvectorized
929b4411 2705 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2706
2707static void
2708vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2709 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2710 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2711{
2712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2713 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2714 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2717 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2718 edge pe = loop_preheader_edge (loop);
2719 enum { NARROW, NONE, WIDEN } modifier;
2720 poly_uint64 gather_off_nunits
2721 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2722
2723 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2724 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2725 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2726 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2727 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2728 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2729 tree scaletype = TREE_VALUE (arglist);
2730 gcc_checking_assert (types_compatible_p (srctype, rettype)
2731 && (!mask || types_compatible_p (srctype, masktype)));
2732
2733 tree perm_mask = NULL_TREE;
2734 tree mask_perm_mask = NULL_TREE;
2735 if (known_eq (nunits, gather_off_nunits))
2736 modifier = NONE;
2737 else if (known_eq (nunits * 2, gather_off_nunits))
2738 {
2739 modifier = WIDEN;
2740
2741 /* Currently widening gathers and scatters are only supported for
2742 fixed-length vectors. */
2743 int count = gather_off_nunits.to_constant ();
2744 vec_perm_builder sel (count, count, 1);
2745 for (int i = 0; i < count; ++i)
2746 sel.quick_push (i | (count / 2));
2747
2748 vec_perm_indices indices (sel, 1, count);
2749 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2750 indices);
2751 }
2752 else if (known_eq (nunits, gather_off_nunits * 2))
2753 {
2754 modifier = NARROW;
2755
2756 /* Currently narrowing gathers and scatters are only supported for
2757 fixed-length vectors. */
2758 int count = nunits.to_constant ();
2759 vec_perm_builder sel (count, count, 1);
2760 sel.quick_grow (count);
2761 for (int i = 0; i < count; ++i)
2762 sel[i] = i < count / 2 ? i : i + count / 2;
2763 vec_perm_indices indices (sel, 2, count);
2764 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2765
2766 ncopies *= 2;
2767
2768 if (mask)
2769 {
2770 for (int i = 0; i < count; ++i)
2771 sel[i] = i | (count / 2);
2772 indices.new_vector (sel, 2, count);
2773 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2774 }
2775 }
2776 else
2777 gcc_unreachable ();
2778
2779 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2780 vectype);
2781
2782 tree ptr = fold_convert (ptrtype, gs_info->base);
2783 if (!is_gimple_min_invariant (ptr))
2784 {
2785 gimple_seq seq;
2786 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2787 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2788 gcc_assert (!new_bb);
2789 }
2790
2791 tree scale = build_int_cst (scaletype, gs_info->scale);
2792
2793 tree vec_oprnd0 = NULL_TREE;
2794 tree vec_mask = NULL_TREE;
2795 tree src_op = NULL_TREE;
2796 tree mask_op = NULL_TREE;
2797 tree prev_res = NULL_TREE;
2798 stmt_vec_info prev_stmt_info = NULL;
2799
2800 if (!mask)
2801 {
2802 src_op = vect_build_zero_merge_argument (stmt, rettype);
2803 mask_op = vect_build_all_ones_mask (stmt, masktype);
2804 }
2805
2806 for (int j = 0; j < ncopies; ++j)
2807 {
2808 tree op, var;
2809 gimple *new_stmt;
2810 if (modifier == WIDEN && (j & 1))
2811 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2812 perm_mask, stmt, gsi);
2813 else if (j == 0)
2814 op = vec_oprnd0
2815 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2816 else
2817 op = vec_oprnd0
2818 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2819
2820 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2821 {
2822 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2823 TYPE_VECTOR_SUBPARTS (idxtype)));
2824 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2825 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2826 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2827 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2828 op = var;
2829 }
2830
2831 if (mask)
2832 {
2833 if (mask_perm_mask && (j & 1))
2834 mask_op = permute_vec_elements (mask_op, mask_op,
2835 mask_perm_mask, stmt, gsi);
2836 else
2837 {
2838 if (j == 0)
2839 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2840 else
929b4411 2841 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2842
2843 mask_op = vec_mask;
2844 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2845 {
2846 gcc_assert
2847 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2848 TYPE_VECTOR_SUBPARTS (masktype)));
2849 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2850 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2851 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2852 mask_op);
2853 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2854 mask_op = var;
2855 }
2856 }
2857 src_op = mask_op;
2858 }
2859
2860 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2861 mask_op, scale);
2862
2863 if (!useless_type_conversion_p (vectype, rettype))
2864 {
2865 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2866 TYPE_VECTOR_SUBPARTS (rettype)));
2867 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2868 gimple_call_set_lhs (new_stmt, op);
2869 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2870 var = make_ssa_name (vec_dest);
2871 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2872 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2873 }
2874 else
2875 {
2876 var = make_ssa_name (vec_dest, new_stmt);
2877 gimple_call_set_lhs (new_stmt, var);
2878 }
2879
2880 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2881
2882 if (modifier == NARROW)
2883 {
2884 if ((j & 1) == 0)
2885 {
2886 prev_res = var;
2887 continue;
2888 }
2889 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2890 new_stmt = SSA_NAME_DEF_STMT (var);
2891 }
2892
2893 if (prev_stmt_info == NULL)
2894 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2895 else
2896 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2897 prev_stmt_info = vinfo_for_stmt (new_stmt);
2898 }
2899}
2900
bfaa08b7
RS
2901/* Prepare the base and offset in GS_INFO for vectorization.
2902 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2903 to the vectorized offset argument for the first copy of STMT. STMT
2904 is the statement described by GS_INFO and LOOP is the containing loop. */
2905
2906static void
2907vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2908 gather_scatter_info *gs_info,
2909 tree *dataref_ptr, tree *vec_offset)
2910{
2911 gimple_seq stmts = NULL;
2912 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2913 if (stmts != NULL)
2914 {
2915 basic_block new_bb;
2916 edge pe = loop_preheader_edge (loop);
2917 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2918 gcc_assert (!new_bb);
2919 }
2920 tree offset_type = TREE_TYPE (gs_info->offset);
2921 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2922 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2923 offset_vectype);
2924}
2925
ab2fc782
RS
2926/* Prepare to implement a grouped or strided load or store using
2927 the gather load or scatter store operation described by GS_INFO.
2928 STMT is the load or store statement.
2929
2930 Set *DATAREF_BUMP to the amount that should be added to the base
2931 address after each copy of the vectorized statement. Set *VEC_OFFSET
2932 to an invariant offset vector in which element I has the value
2933 I * DR_STEP / SCALE. */
2934
2935static void
2936vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2937 gather_scatter_info *gs_info,
2938 tree *dataref_bump, tree *vec_offset)
2939{
2940 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2942 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2943 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2944 gimple_seq stmts;
2945
2946 tree bump = size_binop (MULT_EXPR,
2947 fold_convert (sizetype, DR_STEP (dr)),
2948 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2949 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2950 if (stmts)
2951 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2952
2953 /* The offset given in GS_INFO can have pointer type, so use the element
2954 type of the vector instead. */
2955 tree offset_type = TREE_TYPE (gs_info->offset);
2956 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2957 offset_type = TREE_TYPE (offset_vectype);
2958
2959 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2960 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2961 ssize_int (gs_info->scale));
2962 step = fold_convert (offset_type, step);
2963 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2964
2965 /* Create {0, X, X*2, X*3, ...}. */
2966 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2967 build_zero_cst (offset_type), step);
2968 if (stmts)
2969 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2970}
2971
2972/* Return the amount that should be added to a vector pointer to move
2973 to the next or previous copy of AGGR_TYPE. DR is the data reference
2974 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2975 vectorization. */
2976
2977static tree
2978vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2979 vect_memory_access_type memory_access_type)
2980{
2981 if (memory_access_type == VMAT_INVARIANT)
2982 return size_zero_node;
2983
2984 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2985 tree step = vect_dr_behavior (dr)->step;
2986 if (tree_int_cst_sgn (step) == -1)
2987 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2988 return iv_step;
2989}
2990
37b14185
RB
2991/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2992
2993static bool
2994vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2995 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2996 tree vectype_in, enum vect_def_type *dt,
2997 stmt_vector_for_cost *cost_vec)
37b14185
RB
2998{
2999 tree op, vectype;
3000 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3001 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
3002 unsigned ncopies;
3003 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
3004
3005 op = gimple_call_arg (stmt, 0);
3006 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
3007
3008 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
3009 return false;
37b14185
RB
3010
3011 /* Multiple types in SLP are handled by creating the appropriate number of
3012 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3013 case of SLP. */
3014 if (slp_node)
3015 ncopies = 1;
3016 else
e8f142e2 3017 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
3018
3019 gcc_assert (ncopies >= 1);
3020
3021 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3022 if (! char_vectype)
3023 return false;
3024
928686b1
RS
3025 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3026 return false;
3027
794e3180 3028 unsigned word_bytes = num_bytes / nunits;
908a1a16 3029
d980067b
RS
3030 /* The encoding uses one stepped pattern for each byte in the word. */
3031 vec_perm_builder elts (num_bytes, word_bytes, 3);
3032 for (unsigned i = 0; i < 3; ++i)
37b14185 3033 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3034 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3035
e3342de4
RS
3036 vec_perm_indices indices (elts, 1, num_bytes);
3037 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3038 return false;
3039
3040 if (! vec_stmt)
3041 {
3042 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3043 if (dump_enabled_p ())
3044 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
3045 "\n");
78604de0 3046 if (! slp_node)
37b14185 3047 {
68435eb2
RB
3048 record_stmt_cost (cost_vec,
3049 1, vector_stmt, stmt_info, 0, vect_prologue);
3050 record_stmt_cost (cost_vec,
3051 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3052 }
3053 return true;
3054 }
3055
736d0f28 3056 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3057
3058 /* Transform. */
3059 vec<tree> vec_oprnds = vNULL;
3060 gimple *new_stmt = NULL;
3061 stmt_vec_info prev_stmt_info = NULL;
3062 for (unsigned j = 0; j < ncopies; j++)
3063 {
3064 /* Handle uses. */
3065 if (j == 0)
306b0c92 3066 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3067 else
3068 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3069
3070 /* Arguments are ready. create the new vector stmt. */
3071 unsigned i;
3072 tree vop;
3073 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3074 {
3075 tree tem = make_ssa_name (char_vectype);
3076 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3077 char_vectype, vop));
3078 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3079 tree tem2 = make_ssa_name (char_vectype);
3080 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3081 tem, tem, bswap_vconst);
3082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083 tem = make_ssa_name (vectype);
3084 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3085 vectype, tem2));
3086 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3087 if (slp_node)
3088 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3089 }
3090
3091 if (slp_node)
3092 continue;
3093
3094 if (j == 0)
3095 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3096 else
3097 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3098
3099 prev_stmt_info = vinfo_for_stmt (new_stmt);
3100 }
3101
3102 vec_oprnds.release ();
3103 return true;
3104}
3105
b1b6836e
RS
3106/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3107 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3108 in a single step. On success, store the binary pack code in
3109 *CONVERT_CODE. */
3110
3111static bool
3112simple_integer_narrowing (tree vectype_out, tree vectype_in,
3113 tree_code *convert_code)
3114{
3115 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3116 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3117 return false;
3118
3119 tree_code code;
3120 int multi_step_cvt = 0;
3121 auto_vec <tree, 8> interm_types;
3122 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3123 &code, &multi_step_cvt,
3124 &interm_types)
3125 || multi_step_cvt)
3126 return false;
3127
3128 *convert_code = code;
3129 return true;
3130}
5ce9450f 3131
ebfd146a
IR
3132/* Function vectorizable_call.
3133
538dd0b7 3134 Check if GS performs a function call that can be vectorized.
b8698a0f 3135 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3136 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3137 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3138
3139static bool
355fe088 3140vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3141 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3142{
538dd0b7 3143 gcall *stmt;
ebfd146a
IR
3144 tree vec_dest;
3145 tree scalar_dest;
3146 tree op, type;
3147 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3148 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3149 tree vectype_out, vectype_in;
c7bda0f4
RS
3150 poly_uint64 nunits_in;
3151 poly_uint64 nunits_out;
ebfd146a 3152 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3153 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3154 vec_info *vinfo = stmt_info->vinfo;
81c40241 3155 tree fndecl, new_temp, rhs_type;
355fe088 3156 gimple *def_stmt;
0502fb85
UB
3157 enum vect_def_type dt[3]
3158 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3159 int ndts = 3;
355fe088 3160 gimple *new_stmt = NULL;
ebfd146a 3161 int ncopies, j;
6e1aa848 3162 vec<tree> vargs = vNULL;
ebfd146a
IR
3163 enum { NARROW, NONE, WIDEN } modifier;
3164 size_t i, nargs;
9d5e7640 3165 tree lhs;
ebfd146a 3166
190c2236 3167 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3168 return false;
3169
66c16fd9
RB
3170 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3171 && ! vec_stmt)
ebfd146a
IR
3172 return false;
3173
538dd0b7
DM
3174 /* Is GS a vectorizable call? */
3175 stmt = dyn_cast <gcall *> (gs);
3176 if (!stmt)
ebfd146a
IR
3177 return false;
3178
5ce9450f 3179 if (gimple_call_internal_p (stmt)
bfaa08b7 3180 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3181 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3182 /* Handled by vectorizable_load and vectorizable_store. */
3183 return false;
5ce9450f 3184
0136f8f0
AH
3185 if (gimple_call_lhs (stmt) == NULL_TREE
3186 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3187 return false;
3188
0136f8f0 3189 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3190
b690cc0f
RG
3191 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3192
ebfd146a
IR
3193 /* Process function arguments. */
3194 rhs_type = NULL_TREE;
b690cc0f 3195 vectype_in = NULL_TREE;
ebfd146a
IR
3196 nargs = gimple_call_num_args (stmt);
3197
1b1562a5
MM
3198 /* Bail out if the function has more than three arguments, we do not have
3199 interesting builtin functions to vectorize with more than two arguments
3200 except for fma. No arguments is also not good. */
3201 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3202 return false;
3203
74bf76ed
JJ
3204 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3205 if (gimple_call_internal_p (stmt)
3206 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3207 {
3208 nargs = 0;
3209 rhs_type = unsigned_type_node;
3210 }
3211
ebfd146a
IR
3212 for (i = 0; i < nargs; i++)
3213 {
b690cc0f
RG
3214 tree opvectype;
3215
ebfd146a
IR
3216 op = gimple_call_arg (stmt, i);
3217
3218 /* We can only handle calls with arguments of the same type. */
3219 if (rhs_type
8533c9d8 3220 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3221 {
73fbfcad 3222 if (dump_enabled_p ())
78c60e3d 3223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3224 "argument types differ.\n");
ebfd146a
IR
3225 return false;
3226 }
b690cc0f
RG
3227 if (!rhs_type)
3228 rhs_type = TREE_TYPE (op);
ebfd146a 3229
81c40241 3230 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 3231 {
73fbfcad 3232 if (dump_enabled_p ())
78c60e3d 3233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3234 "use not simple.\n");
ebfd146a
IR
3235 return false;
3236 }
ebfd146a 3237
b690cc0f
RG
3238 if (!vectype_in)
3239 vectype_in = opvectype;
3240 else if (opvectype
3241 && opvectype != vectype_in)
3242 {
73fbfcad 3243 if (dump_enabled_p ())
78c60e3d 3244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3245 "argument vector types differ.\n");
b690cc0f
RG
3246 return false;
3247 }
3248 }
3249 /* If all arguments are external or constant defs use a vector type with
3250 the same size as the output vector type. */
ebfd146a 3251 if (!vectype_in)
b690cc0f 3252 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3253 if (vec_stmt)
3254 gcc_assert (vectype_in);
3255 if (!vectype_in)
3256 {
73fbfcad 3257 if (dump_enabled_p ())
7d8930a0 3258 {
78c60e3d
SS
3259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3260 "no vectype for scalar type ");
3261 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3262 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3263 }
3264
3265 return false;
3266 }
ebfd146a
IR
3267
3268 /* FORNOW */
b690cc0f
RG
3269 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3270 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3271 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3272 modifier = NARROW;
c7bda0f4 3273 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3274 modifier = NONE;
c7bda0f4 3275 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3276 modifier = WIDEN;
3277 else
3278 return false;
3279
70439f0d
RS
3280 /* We only handle functions that do not read or clobber memory. */
3281 if (gimple_vuse (stmt))
3282 {
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "function reads from or writes to memory.\n");
3286 return false;
3287 }
3288
ebfd146a
IR
3289 /* For now, we only vectorize functions if a target specific builtin
3290 is available. TODO -- in some cases, it might be profitable to
3291 insert the calls for pieces of the vector, in order to be able
3292 to vectorize other operations in the loop. */
70439f0d
RS
3293 fndecl = NULL_TREE;
3294 internal_fn ifn = IFN_LAST;
3295 combined_fn cfn = gimple_call_combined_fn (stmt);
3296 tree callee = gimple_call_fndecl (stmt);
3297
3298 /* First try using an internal function. */
b1b6836e
RS
3299 tree_code convert_code = ERROR_MARK;
3300 if (cfn != CFN_LAST
3301 && (modifier == NONE
3302 || (modifier == NARROW
3303 && simple_integer_narrowing (vectype_out, vectype_in,
3304 &convert_code))))
70439f0d
RS
3305 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3306 vectype_in);
3307
3308 /* If that fails, try asking for a target-specific built-in function. */
3309 if (ifn == IFN_LAST)
3310 {
3311 if (cfn != CFN_LAST)
3312 fndecl = targetm.vectorize.builtin_vectorized_function
3313 (cfn, vectype_out, vectype_in);
7672aa9b 3314 else if (callee)
70439f0d
RS
3315 fndecl = targetm.vectorize.builtin_md_vectorized_function
3316 (callee, vectype_out, vectype_in);
3317 }
3318
3319 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3320 {
70439f0d 3321 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3322 && !slp_node
3323 && loop_vinfo
3324 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3325 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3326 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3327 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3328 {
3329 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3330 { 0, 1, 2, ... vf - 1 } vector. */
3331 gcc_assert (nargs == 0);
3332 }
37b14185
RB
3333 else if (modifier == NONE
3334 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3335 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3336 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3337 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3338 vectype_in, dt, cost_vec);
74bf76ed
JJ
3339 else
3340 {
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3343 "function is not vectorizable.\n");
74bf76ed
JJ
3344 return false;
3345 }
ebfd146a
IR
3346 }
3347
fce57248 3348 if (slp_node)
190c2236 3349 ncopies = 1;
b1b6836e 3350 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3351 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3352 else
e8f142e2 3353 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3354
3355 /* Sanity check: make sure that at least one copy of the vectorized stmt
3356 needs to be generated. */
3357 gcc_assert (ncopies >= 1);
3358
3359 if (!vec_stmt) /* transformation not required. */
3360 {
3361 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 3362 if (dump_enabled_p ())
e645e942
TJ
3363 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3364 "\n");
68435eb2
RB
3365 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3366 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3367 record_stmt_cost (cost_vec, ncopies / 2,
3368 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3369
ebfd146a
IR
3370 return true;
3371 }
3372
67b8dbac 3373 /* Transform. */
ebfd146a 3374
73fbfcad 3375 if (dump_enabled_p ())
e645e942 3376 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3377
3378 /* Handle def. */
3379 scalar_dest = gimple_call_lhs (stmt);
3380 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3381
3382 prev_stmt_info = NULL;
b1b6836e 3383 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3384 {
b1b6836e 3385 tree prev_res = NULL_TREE;
ebfd146a
IR
3386 for (j = 0; j < ncopies; ++j)
3387 {
3388 /* Build argument list for the vectorized call. */
3389 if (j == 0)
9771b263 3390 vargs.create (nargs);
ebfd146a 3391 else
9771b263 3392 vargs.truncate (0);
ebfd146a 3393
190c2236
JJ
3394 if (slp_node)
3395 {
ef062b13 3396 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3397 vec<tree> vec_oprnds0;
190c2236
JJ
3398
3399 for (i = 0; i < nargs; i++)
9771b263 3400 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3401 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3402 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3403
3404 /* Arguments are ready. Create the new vector stmt. */
9771b263 3405 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3406 {
3407 size_t k;
3408 for (k = 0; k < nargs; k++)
3409 {
37b5ec8f 3410 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3411 vargs[k] = vec_oprndsk[i];
190c2236 3412 }
b1b6836e
RS
3413 if (modifier == NARROW)
3414 {
3415 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3416 gcall *call
3417 = gimple_build_call_internal_vec (ifn, vargs);
3418 gimple_call_set_lhs (call, half_res);
3419 gimple_call_set_nothrow (call, true);
3420 new_stmt = call;
b1b6836e
RS
3421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3422 if ((i & 1) == 0)
3423 {
3424 prev_res = half_res;
3425 continue;
3426 }
3427 new_temp = make_ssa_name (vec_dest);
3428 new_stmt = gimple_build_assign (new_temp, convert_code,
3429 prev_res, half_res);
3430 }
70439f0d 3431 else
b1b6836e 3432 {
a844293d 3433 gcall *call;
b1b6836e 3434 if (ifn != IFN_LAST)
a844293d 3435 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3436 else
a844293d
RS
3437 call = gimple_build_call_vec (fndecl, vargs);
3438 new_temp = make_ssa_name (vec_dest, call);
3439 gimple_call_set_lhs (call, new_temp);
3440 gimple_call_set_nothrow (call, true);
3441 new_stmt = call;
b1b6836e 3442 }
190c2236 3443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3444 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3445 }
3446
3447 for (i = 0; i < nargs; i++)
3448 {
37b5ec8f 3449 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3450 vec_oprndsi.release ();
190c2236 3451 }
190c2236
JJ
3452 continue;
3453 }
3454
ebfd146a
IR
3455 for (i = 0; i < nargs; i++)
3456 {
3457 op = gimple_call_arg (stmt, i);
3458 if (j == 0)
3459 vec_oprnd0
81c40241 3460 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3461 else
63827fb8
IR
3462 {
3463 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3464 vec_oprnd0
3465 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3466 }
ebfd146a 3467
9771b263 3468 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3469 }
3470
74bf76ed
JJ
3471 if (gimple_call_internal_p (stmt)
3472 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3473 {
c7bda0f4 3474 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3475 tree new_var
0e22bb5a 3476 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3477 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3478 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3479 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3480 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3481 }
b1b6836e
RS
3482 else if (modifier == NARROW)
3483 {
3484 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3485 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3486 gimple_call_set_lhs (call, half_res);
3487 gimple_call_set_nothrow (call, true);
3488 new_stmt = call;
b1b6836e
RS
3489 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3490 if ((j & 1) == 0)
3491 {
3492 prev_res = half_res;
3493 continue;
3494 }
3495 new_temp = make_ssa_name (vec_dest);
3496 new_stmt = gimple_build_assign (new_temp, convert_code,
3497 prev_res, half_res);
3498 }
74bf76ed
JJ
3499 else
3500 {
a844293d 3501 gcall *call;
70439f0d 3502 if (ifn != IFN_LAST)
a844293d 3503 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3504 else
a844293d 3505 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3506 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3507 gimple_call_set_lhs (call, new_temp);
3508 gimple_call_set_nothrow (call, true);
3509 new_stmt = call;
74bf76ed 3510 }
ebfd146a
IR
3511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3512
b1b6836e 3513 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3514 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3515 else
3516 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3517
3518 prev_stmt_info = vinfo_for_stmt (new_stmt);
3519 }
b1b6836e
RS
3520 }
3521 else if (modifier == NARROW)
3522 {
ebfd146a
IR
3523 for (j = 0; j < ncopies; ++j)
3524 {
3525 /* Build argument list for the vectorized call. */
3526 if (j == 0)
9771b263 3527 vargs.create (nargs * 2);
ebfd146a 3528 else
9771b263 3529 vargs.truncate (0);
ebfd146a 3530
190c2236
JJ
3531 if (slp_node)
3532 {
ef062b13 3533 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3534 vec<tree> vec_oprnds0;
190c2236
JJ
3535
3536 for (i = 0; i < nargs; i++)
9771b263 3537 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3538 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3539 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3540
3541 /* Arguments are ready. Create the new vector stmt. */
9771b263 3542 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3543 {
3544 size_t k;
9771b263 3545 vargs.truncate (0);
190c2236
JJ
3546 for (k = 0; k < nargs; k++)
3547 {
37b5ec8f 3548 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3549 vargs.quick_push (vec_oprndsk[i]);
3550 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3551 }
a844293d 3552 gcall *call;
70439f0d 3553 if (ifn != IFN_LAST)
a844293d 3554 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3555 else
a844293d
RS
3556 call = gimple_build_call_vec (fndecl, vargs);
3557 new_temp = make_ssa_name (vec_dest, call);
3558 gimple_call_set_lhs (call, new_temp);
3559 gimple_call_set_nothrow (call, true);
3560 new_stmt = call;
190c2236 3561 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3562 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3563 }
3564
3565 for (i = 0; i < nargs; i++)
3566 {
37b5ec8f 3567 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3568 vec_oprndsi.release ();
190c2236 3569 }
190c2236
JJ
3570 continue;
3571 }
3572
ebfd146a
IR
3573 for (i = 0; i < nargs; i++)
3574 {
3575 op = gimple_call_arg (stmt, i);
3576 if (j == 0)
3577 {
3578 vec_oprnd0
81c40241 3579 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3580 vec_oprnd1
63827fb8 3581 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3582 }
3583 else
3584 {
336ecb65 3585 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3586 vec_oprnd0
63827fb8 3587 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3588 vec_oprnd1
63827fb8 3589 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3590 }
3591
9771b263
DN
3592 vargs.quick_push (vec_oprnd0);
3593 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3594 }
3595
b1b6836e 3596 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3597 new_temp = make_ssa_name (vec_dest, new_stmt);
3598 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3599 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3600
3601 if (j == 0)
3602 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3603 else
3604 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3605
3606 prev_stmt_info = vinfo_for_stmt (new_stmt);
3607 }
3608
3609 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3610 }
b1b6836e
RS
3611 else
3612 /* No current target implements this case. */
3613 return false;
ebfd146a 3614
9771b263 3615 vargs.release ();
ebfd146a 3616
ebfd146a
IR
3617 /* The call in STMT might prevent it from being removed in dce.
3618 We however cannot remove it here, due to the way the ssa name
3619 it defines is mapped to the new definition. So just replace
3620 rhs of the statement with something harmless. */
3621
dd34c087
JJ
3622 if (slp_node)
3623 return true;
3624
ebfd146a 3625 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3626 if (is_pattern_stmt_p (stmt_info))
3627 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3628 else
3629 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3630
9d5e7640 3631 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3632 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3633 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3634 STMT_VINFO_STMT (stmt_info) = new_stmt;
3635 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3636
3637 return true;
3638}
3639
3640
0136f8f0
AH
3641struct simd_call_arg_info
3642{
3643 tree vectype;
3644 tree op;
0136f8f0 3645 HOST_WIDE_INT linear_step;
34e82342 3646 enum vect_def_type dt;
0136f8f0 3647 unsigned int align;
17b658af 3648 bool simd_lane_linear;
0136f8f0
AH
3649};
3650
17b658af
JJ
3651/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3652 is linear within simd lane (but not within whole loop), note it in
3653 *ARGINFO. */
3654
3655static void
3656vect_simd_lane_linear (tree op, struct loop *loop,
3657 struct simd_call_arg_info *arginfo)
3658{
355fe088 3659 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3660
3661 if (!is_gimple_assign (def_stmt)
3662 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3663 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3664 return;
3665
3666 tree base = gimple_assign_rhs1 (def_stmt);
3667 HOST_WIDE_INT linear_step = 0;
3668 tree v = gimple_assign_rhs2 (def_stmt);
3669 while (TREE_CODE (v) == SSA_NAME)
3670 {
3671 tree t;
3672 def_stmt = SSA_NAME_DEF_STMT (v);
3673 if (is_gimple_assign (def_stmt))
3674 switch (gimple_assign_rhs_code (def_stmt))
3675 {
3676 case PLUS_EXPR:
3677 t = gimple_assign_rhs2 (def_stmt);
3678 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3679 return;
3680 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3681 v = gimple_assign_rhs1 (def_stmt);
3682 continue;
3683 case MULT_EXPR:
3684 t = gimple_assign_rhs2 (def_stmt);
3685 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3686 return;
3687 linear_step = tree_to_shwi (t);
3688 v = gimple_assign_rhs1 (def_stmt);
3689 continue;
3690 CASE_CONVERT:
3691 t = gimple_assign_rhs1 (def_stmt);
3692 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3693 || (TYPE_PRECISION (TREE_TYPE (v))
3694 < TYPE_PRECISION (TREE_TYPE (t))))
3695 return;
3696 if (!linear_step)
3697 linear_step = 1;
3698 v = t;
3699 continue;
3700 default:
3701 return;
3702 }
8e4284d0 3703 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3704 && loop->simduid
3705 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3706 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3707 == loop->simduid))
3708 {
3709 if (!linear_step)
3710 linear_step = 1;
3711 arginfo->linear_step = linear_step;
3712 arginfo->op = base;
3713 arginfo->simd_lane_linear = true;
3714 return;
3715 }
3716 }
3717}
3718
cf1b2ba4
RS
3719/* Return the number of elements in vector type VECTYPE, which is associated
3720 with a SIMD clone. At present these vectors always have a constant
3721 length. */
3722
3723static unsigned HOST_WIDE_INT
3724simd_clone_subparts (tree vectype)
3725{
928686b1 3726 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3727}
3728
0136f8f0
AH
3729/* Function vectorizable_simd_clone_call.
3730
3731 Check if STMT performs a function call that can be vectorized
3732 by calling a simd clone of the function.
3733 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3734 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3735 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3736
3737static bool
355fe088 3738vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3739 gimple **vec_stmt, slp_tree slp_node,
3740 stmt_vector_for_cost *)
0136f8f0
AH
3741{
3742 tree vec_dest;
3743 tree scalar_dest;
3744 tree op, type;
3745 tree vec_oprnd0 = NULL_TREE;
3746 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3747 tree vectype;
3748 unsigned int nunits;
3749 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3750 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3751 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3752 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3753 tree fndecl, new_temp;
355fe088
TS
3754 gimple *def_stmt;
3755 gimple *new_stmt = NULL;
0136f8f0 3756 int ncopies, j;
00426f9a 3757 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3758 vec<tree> vargs = vNULL;
3759 size_t i, nargs;
3760 tree lhs, rtype, ratype;
e7a74006 3761 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3762
3763 /* Is STMT a vectorizable call? */
3764 if (!is_gimple_call (stmt))
3765 return false;
3766
3767 fndecl = gimple_call_fndecl (stmt);
3768 if (fndecl == NULL_TREE)
3769 return false;
3770
d52f5295 3771 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3772 if (node == NULL || node->simd_clones == NULL)
3773 return false;
3774
3775 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3776 return false;
3777
66c16fd9
RB
3778 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3779 && ! vec_stmt)
0136f8f0
AH
3780 return false;
3781
3782 if (gimple_call_lhs (stmt)
3783 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3784 return false;
3785
3786 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3787
3788 vectype = STMT_VINFO_VECTYPE (stmt_info);
3789
3790 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3791 return false;
3792
3793 /* FORNOW */
fce57248 3794 if (slp_node)
0136f8f0
AH
3795 return false;
3796
3797 /* Process function arguments. */
3798 nargs = gimple_call_num_args (stmt);
3799
3800 /* Bail out if the function has zero arguments. */
3801 if (nargs == 0)
3802 return false;
3803
00426f9a 3804 arginfo.reserve (nargs, true);
0136f8f0
AH
3805
3806 for (i = 0; i < nargs; i++)
3807 {
3808 simd_call_arg_info thisarginfo;
3809 affine_iv iv;
3810
3811 thisarginfo.linear_step = 0;
3812 thisarginfo.align = 0;
3813 thisarginfo.op = NULL_TREE;
17b658af 3814 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3815
3816 op = gimple_call_arg (stmt, i);
81c40241
RB
3817 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3818 &thisarginfo.vectype)
0136f8f0
AH
3819 || thisarginfo.dt == vect_uninitialized_def)
3820 {
3821 if (dump_enabled_p ())
3822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3823 "use not simple.\n");
0136f8f0
AH
3824 return false;
3825 }
3826
3827 if (thisarginfo.dt == vect_constant_def
3828 || thisarginfo.dt == vect_external_def)
3829 gcc_assert (thisarginfo.vectype == NULL_TREE);
3830 else
3831 gcc_assert (thisarginfo.vectype != NULL_TREE);
3832
6c9e85fb
JJ
3833 /* For linear arguments, the analyze phase should have saved
3834 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3835 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3836 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3837 {
3838 gcc_assert (vec_stmt);
3839 thisarginfo.linear_step
17b658af 3840 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3841 thisarginfo.op
17b658af
JJ
3842 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3843 thisarginfo.simd_lane_linear
3844 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3845 == boolean_true_node);
6c9e85fb
JJ
3846 /* If loop has been peeled for alignment, we need to adjust it. */
3847 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3848 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3849 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3850 {
3851 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3852 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3853 tree opt = TREE_TYPE (thisarginfo.op);
3854 bias = fold_convert (TREE_TYPE (step), bias);
3855 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3856 thisarginfo.op
3857 = fold_build2 (POINTER_TYPE_P (opt)
3858 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3859 thisarginfo.op, bias);
3860 }
3861 }
3862 else if (!vec_stmt
3863 && thisarginfo.dt != vect_constant_def
3864 && thisarginfo.dt != vect_external_def
3865 && loop_vinfo
3866 && TREE_CODE (op) == SSA_NAME
3867 && simple_iv (loop, loop_containing_stmt (stmt), op,
3868 &iv, false)
3869 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3870 {
3871 thisarginfo.linear_step = tree_to_shwi (iv.step);
3872 thisarginfo.op = iv.base;
3873 }
3874 else if ((thisarginfo.dt == vect_constant_def
3875 || thisarginfo.dt == vect_external_def)
3876 && POINTER_TYPE_P (TREE_TYPE (op)))
3877 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3878 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3879 linear too. */
3880 if (POINTER_TYPE_P (TREE_TYPE (op))
3881 && !thisarginfo.linear_step
3882 && !vec_stmt
3883 && thisarginfo.dt != vect_constant_def
3884 && thisarginfo.dt != vect_external_def
3885 && loop_vinfo
3886 && !slp_node
3887 && TREE_CODE (op) == SSA_NAME)
3888 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3889
3890 arginfo.quick_push (thisarginfo);
3891 }
3892
d9f21f6a
RS
3893 unsigned HOST_WIDE_INT vf;
3894 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3895 {
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3898 "not considering SIMD clones; not yet supported"
3899 " for variable-width vectors.\n");
3900 return NULL;
3901 }
3902
0136f8f0
AH
3903 unsigned int badness = 0;
3904 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3905 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3906 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3907 else
3908 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3909 n = n->simdclone->next_clone)
3910 {
3911 unsigned int this_badness = 0;
d9f21f6a 3912 if (n->simdclone->simdlen > vf
0136f8f0
AH
3913 || n->simdclone->nargs != nargs)
3914 continue;
d9f21f6a
RS
3915 if (n->simdclone->simdlen < vf)
3916 this_badness += (exact_log2 (vf)
0136f8f0
AH
3917 - exact_log2 (n->simdclone->simdlen)) * 1024;
3918 if (n->simdclone->inbranch)
3919 this_badness += 2048;
3920 int target_badness = targetm.simd_clone.usable (n);
3921 if (target_badness < 0)
3922 continue;
3923 this_badness += target_badness * 512;
3924 /* FORNOW: Have to add code to add the mask argument. */
3925 if (n->simdclone->inbranch)
3926 continue;
3927 for (i = 0; i < nargs; i++)
3928 {
3929 switch (n->simdclone->args[i].arg_type)
3930 {
3931 case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 if (!useless_type_conversion_p
3933 (n->simdclone->args[i].orig_type,
3934 TREE_TYPE (gimple_call_arg (stmt, i))))
3935 i = -1;
3936 else if (arginfo[i].dt == vect_constant_def
3937 || arginfo[i].dt == vect_external_def
3938 || arginfo[i].linear_step)
3939 this_badness += 64;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3942 if (arginfo[i].dt != vect_constant_def
3943 && arginfo[i].dt != vect_external_def)
3944 i = -1;
3945 break;
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3948 if (arginfo[i].dt == vect_constant_def
3949 || arginfo[i].dt == vect_external_def
3950 || (arginfo[i].linear_step
3951 != n->simdclone->args[i].linear_step))
3952 i = -1;
3953 break;
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3960 /* FORNOW */
3961 i = -1;
3962 break;
3963 case SIMD_CLONE_ARG_TYPE_MASK:
3964 gcc_unreachable ();
3965 }
3966 if (i == (size_t) -1)
3967 break;
3968 if (n->simdclone->args[i].alignment > arginfo[i].align)
3969 {
3970 i = -1;
3971 break;
3972 }
3973 if (arginfo[i].align)
3974 this_badness += (exact_log2 (arginfo[i].align)
3975 - exact_log2 (n->simdclone->args[i].alignment));
3976 }
3977 if (i == (size_t) -1)
3978 continue;
3979 if (bestn == NULL || this_badness < badness)
3980 {
3981 bestn = n;
3982 badness = this_badness;
3983 }
3984 }
3985
3986 if (bestn == NULL)
00426f9a 3987 return false;
0136f8f0
AH
3988
3989 for (i = 0; i < nargs; i++)
3990 if ((arginfo[i].dt == vect_constant_def
3991 || arginfo[i].dt == vect_external_def)
3992 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3993 {
3994 arginfo[i].vectype
3995 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3996 i)));
3997 if (arginfo[i].vectype == NULL
cf1b2ba4 3998 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3999 > bestn->simdclone->simdlen))
00426f9a 4000 return false;
0136f8f0
AH
4001 }
4002
4003 fndecl = bestn->decl;
4004 nunits = bestn->simdclone->simdlen;
d9f21f6a 4005 ncopies = vf / nunits;
0136f8f0
AH
4006
4007 /* If the function isn't const, only allow it in simd loops where user
4008 has asserted that at least nunits consecutive iterations can be
4009 performed using SIMD instructions. */
4010 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4011 && gimple_vuse (stmt))
00426f9a 4012 return false;
0136f8f0
AH
4013
4014 /* Sanity check: make sure that at least one copy of the vectorized stmt
4015 needs to be generated. */
4016 gcc_assert (ncopies >= 1);
4017
4018 if (!vec_stmt) /* transformation not required. */
4019 {
6c9e85fb
JJ
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4021 for (i = 0; i < nargs; i++)
7adb26f2
JJ
4022 if ((bestn->simdclone->args[i].arg_type
4023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4024 || (bestn->simdclone->args[i].arg_type
4025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 4026 {
17b658af 4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
4028 + 1);
4029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4030 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4031 ? size_type_node : TREE_TYPE (arginfo[i].op);
4032 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4034 tree sll = arginfo[i].simd_lane_linear
4035 ? boolean_true_node : boolean_false_node;
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4037 }
0136f8f0
AH
4038 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4039 if (dump_enabled_p ())
4040 dump_printf_loc (MSG_NOTE, vect_location,
4041 "=== vectorizable_simd_clone_call ===\n");
68435eb2 4042/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4043 return true;
4044 }
4045
67b8dbac 4046 /* Transform. */
0136f8f0
AH
4047
4048 if (dump_enabled_p ())
4049 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4050
4051 /* Handle def. */
4052 scalar_dest = gimple_call_lhs (stmt);
4053 vec_dest = NULL_TREE;
4054 rtype = NULL_TREE;
4055 ratype = NULL_TREE;
4056 if (scalar_dest)
4057 {
4058 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4059 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4060 if (TREE_CODE (rtype) == ARRAY_TYPE)
4061 {
4062 ratype = rtype;
4063 rtype = TREE_TYPE (ratype);
4064 }
4065 }
4066
4067 prev_stmt_info = NULL;
4068 for (j = 0; j < ncopies; ++j)
4069 {
4070 /* Build argument list for the vectorized call. */
4071 if (j == 0)
4072 vargs.create (nargs);
4073 else
4074 vargs.truncate (0);
4075
4076 for (i = 0; i < nargs; i++)
4077 {
4078 unsigned int k, l, m, o;
4079 tree atype;
4080 op = gimple_call_arg (stmt, i);
4081 switch (bestn->simdclone->args[i].arg_type)
4082 {
4083 case SIMD_CLONE_ARG_TYPE_VECTOR:
4084 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4085 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4086 for (m = j * o; m < (j + 1) * o; m++)
4087 {
cf1b2ba4
RS
4088 if (simd_clone_subparts (atype)
4089 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4090 {
73a699ae 4091 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4092 k = (simd_clone_subparts (arginfo[i].vectype)
4093 / simd_clone_subparts (atype));
0136f8f0
AH
4094 gcc_assert ((k & (k - 1)) == 0);
4095 if (m == 0)
4096 vec_oprnd0
81c40241 4097 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4098 else
4099 {
4100 vec_oprnd0 = arginfo[i].op;
4101 if ((m & (k - 1)) == 0)
4102 vec_oprnd0
4103 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4104 vec_oprnd0);
4105 }
4106 arginfo[i].op = vec_oprnd0;
4107 vec_oprnd0
4108 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4109 bitsize_int (prec),
0136f8f0
AH
4110 bitsize_int ((m & (k - 1)) * prec));
4111 new_stmt
b731b390 4112 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4113 vec_oprnd0);
4114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4115 vargs.safe_push (gimple_assign_lhs (new_stmt));
4116 }
4117 else
4118 {
cf1b2ba4
RS
4119 k = (simd_clone_subparts (atype)
4120 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4121 gcc_assert ((k & (k - 1)) == 0);
4122 vec<constructor_elt, va_gc> *ctor_elts;
4123 if (k != 1)
4124 vec_alloc (ctor_elts, k);
4125 else
4126 ctor_elts = NULL;
4127 for (l = 0; l < k; l++)
4128 {
4129 if (m == 0 && l == 0)
4130 vec_oprnd0
81c40241 4131 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4132 else
4133 vec_oprnd0
4134 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4135 arginfo[i].op);
4136 arginfo[i].op = vec_oprnd0;
4137 if (k == 1)
4138 break;
4139 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4140 vec_oprnd0);
4141 }
4142 if (k == 1)
4143 vargs.safe_push (vec_oprnd0);
4144 else
4145 {
4146 vec_oprnd0 = build_constructor (atype, ctor_elts);
4147 new_stmt
b731b390 4148 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4149 vec_oprnd0);
4150 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4151 vargs.safe_push (gimple_assign_lhs (new_stmt));
4152 }
4153 }
4154 }
4155 break;
4156 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4157 vargs.safe_push (op);
4158 break;
4159 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4160 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4161 if (j == 0)
4162 {
4163 gimple_seq stmts;
4164 arginfo[i].op
4165 = force_gimple_operand (arginfo[i].op, &stmts, true,
4166 NULL_TREE);
4167 if (stmts != NULL)
4168 {
4169 basic_block new_bb;
4170 edge pe = loop_preheader_edge (loop);
4171 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4172 gcc_assert (!new_bb);
4173 }
17b658af
JJ
4174 if (arginfo[i].simd_lane_linear)
4175 {
4176 vargs.safe_push (arginfo[i].op);
4177 break;
4178 }
b731b390 4179 tree phi_res = copy_ssa_name (op);
538dd0b7 4180 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4181 set_vinfo_for_stmt (new_phi,
310213d4 4182 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4183 add_phi_arg (new_phi, arginfo[i].op,
4184 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4185 enum tree_code code
4186 = POINTER_TYPE_P (TREE_TYPE (op))
4187 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4188 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4189 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4190 widest_int cst
4191 = wi::mul (bestn->simdclone->args[i].linear_step,
4192 ncopies * nunits);
4193 tree tcst = wide_int_to_tree (type, cst);
b731b390 4194 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4195 new_stmt
4196 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4197 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4198 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4199 set_vinfo_for_stmt (new_stmt,
310213d4 4200 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4201 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4202 UNKNOWN_LOCATION);
4203 arginfo[i].op = phi_res;
4204 vargs.safe_push (phi_res);
4205 }
4206 else
4207 {
4208 enum tree_code code
4209 = POINTER_TYPE_P (TREE_TYPE (op))
4210 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4211 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4212 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4213 widest_int cst
4214 = wi::mul (bestn->simdclone->args[i].linear_step,
4215 j * nunits);
4216 tree tcst = wide_int_to_tree (type, cst);
b731b390 4217 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4218 new_stmt = gimple_build_assign (new_temp, code,
4219 arginfo[i].op, tcst);
0136f8f0
AH
4220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4221 vargs.safe_push (new_temp);
4222 }
4223 break;
7adb26f2
JJ
4224 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4225 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4226 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4228 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4230 default:
4231 gcc_unreachable ();
4232 }
4233 }
4234
4235 new_stmt = gimple_build_call_vec (fndecl, vargs);
4236 if (vec_dest)
4237 {
cf1b2ba4 4238 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4239 if (ratype)
b731b390 4240 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4241 else if (simd_clone_subparts (vectype)
4242 == simd_clone_subparts (rtype))
0136f8f0
AH
4243 new_temp = make_ssa_name (vec_dest, new_stmt);
4244 else
4245 new_temp = make_ssa_name (rtype, new_stmt);
4246 gimple_call_set_lhs (new_stmt, new_temp);
4247 }
4248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4249
4250 if (vec_dest)
4251 {
cf1b2ba4 4252 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4253 {
4254 unsigned int k, l;
73a699ae
RS
4255 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4256 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4257 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4258 gcc_assert ((k & (k - 1)) == 0);
4259 for (l = 0; l < k; l++)
4260 {
4261 tree t;
4262 if (ratype)
4263 {
4264 t = build_fold_addr_expr (new_temp);
4265 t = build2 (MEM_REF, vectype, t,
73a699ae 4266 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4267 }
4268 else
4269 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4270 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4271 new_stmt
b731b390 4272 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4274 if (j == 0 && l == 0)
4275 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4276 else
4277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4278
4279 prev_stmt_info = vinfo_for_stmt (new_stmt);
4280 }
4281
4282 if (ratype)
3ba4ff41 4283 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4284 continue;
4285 }
cf1b2ba4 4286 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4287 {
cf1b2ba4
RS
4288 unsigned int k = (simd_clone_subparts (vectype)
4289 / simd_clone_subparts (rtype));
0136f8f0
AH
4290 gcc_assert ((k & (k - 1)) == 0);
4291 if ((j & (k - 1)) == 0)
4292 vec_alloc (ret_ctor_elts, k);
4293 if (ratype)
4294 {
cf1b2ba4 4295 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4296 for (m = 0; m < o; m++)
4297 {
4298 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4299 size_int (m), NULL_TREE, NULL_TREE);
4300 new_stmt
b731b390 4301 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4303 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4304 gimple_assign_lhs (new_stmt));
4305 }
3ba4ff41 4306 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4307 }
4308 else
4309 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4310 if ((j & (k - 1)) != k - 1)
4311 continue;
4312 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4313 new_stmt
b731b390 4314 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4315 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4316
4317 if ((unsigned) j == k - 1)
4318 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4319 else
4320 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4321
4322 prev_stmt_info = vinfo_for_stmt (new_stmt);
4323 continue;
4324 }
4325 else if (ratype)
4326 {
4327 tree t = build_fold_addr_expr (new_temp);
4328 t = build2 (MEM_REF, vectype, t,
4329 build_int_cst (TREE_TYPE (t), 0));
4330 new_stmt
b731b390 4331 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4333 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4334 }
4335 }
4336
4337 if (j == 0)
4338 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4339 else
4340 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4341
4342 prev_stmt_info = vinfo_for_stmt (new_stmt);
4343 }
4344
4345 vargs.release ();
4346
4347 /* The call in STMT might prevent it from being removed in dce.
4348 We however cannot remove it here, due to the way the ssa name
4349 it defines is mapped to the new definition. So just replace
4350 rhs of the statement with something harmless. */
4351
4352 if (slp_node)
4353 return true;
4354
4355 if (scalar_dest)
4356 {
4357 type = TREE_TYPE (scalar_dest);
4358 if (is_pattern_stmt_p (stmt_info))
4359 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4360 else
4361 lhs = gimple_call_lhs (stmt);
4362 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4363 }
4364 else
4365 new_stmt = gimple_build_nop ();
4366 set_vinfo_for_stmt (new_stmt, stmt_info);
4367 set_vinfo_for_stmt (stmt, NULL);
4368 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4369 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4370 unlink_stmt_vdef (stmt);
4371
4372 return true;
4373}
4374
4375
ebfd146a
IR
4376/* Function vect_gen_widened_results_half
4377
4378 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4379 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4380 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4381 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4382 needs to be created (DECL is a function-decl of a target-builtin).
4383 STMT is the original scalar stmt that we are vectorizing. */
4384
355fe088 4385static gimple *
ebfd146a
IR
4386vect_gen_widened_results_half (enum tree_code code,
4387 tree decl,
4388 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4389 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4390 gimple *stmt)
b8698a0f 4391{
355fe088 4392 gimple *new_stmt;
b8698a0f
L
4393 tree new_temp;
4394
4395 /* Generate half of the widened result: */
4396 if (code == CALL_EXPR)
4397 {
4398 /* Target specific support */
ebfd146a
IR
4399 if (op_type == binary_op)
4400 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4401 else
4402 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4403 new_temp = make_ssa_name (vec_dest, new_stmt);
4404 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4405 }
4406 else
ebfd146a 4407 {
b8698a0f
L
4408 /* Generic support */
4409 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4410 if (op_type != binary_op)
4411 vec_oprnd1 = NULL;
0d0e4a03 4412 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4413 new_temp = make_ssa_name (vec_dest, new_stmt);
4414 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4415 }
ebfd146a
IR
4416 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4417
ebfd146a
IR
4418 return new_stmt;
4419}
4420
4a00c761
JJ
4421
4422/* Get vectorized definitions for loop-based vectorization. For the first
4423 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4424 scalar operand), and for the rest we get a copy with
4425 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4426 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4427 The vectors are collected into VEC_OPRNDS. */
4428
4429static void
355fe088 4430vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4431 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4432{
4433 tree vec_oprnd;
4434
4435 /* Get first vector operand. */
4436 /* All the vector operands except the very first one (that is scalar oprnd)
4437 are stmt copies. */
4438 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4439 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4440 else
4441 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4442
9771b263 4443 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4444
4445 /* Get second vector operand. */
4446 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4447 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4448
4449 *oprnd = vec_oprnd;
4450
4451 /* For conversion in multiple steps, continue to get operands
4452 recursively. */
4453 if (multi_step_cvt)
4454 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4455}
4456
4457
4458/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4459 For multi-step conversions store the resulting vectors and call the function
4460 recursively. */
4461
4462static void
9771b263 4463vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4464 int multi_step_cvt, gimple *stmt,
9771b263 4465 vec<tree> vec_dsts,
4a00c761
JJ
4466 gimple_stmt_iterator *gsi,
4467 slp_tree slp_node, enum tree_code code,
4468 stmt_vec_info *prev_stmt_info)
4469{
4470 unsigned int i;
4471 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4472 gimple *new_stmt;
4a00c761
JJ
4473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4474
9771b263 4475 vec_dest = vec_dsts.pop ();
4a00c761 4476
9771b263 4477 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4478 {
4479 /* Create demotion operation. */
9771b263
DN
4480 vop0 = (*vec_oprnds)[i];
4481 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4482 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4483 new_tmp = make_ssa_name (vec_dest, new_stmt);
4484 gimple_assign_set_lhs (new_stmt, new_tmp);
4485 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4486
4487 if (multi_step_cvt)
4488 /* Store the resulting vector for next recursive call. */
9771b263 4489 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4490 else
4491 {
4492 /* This is the last step of the conversion sequence. Store the
4493 vectors in SLP_NODE or in vector info of the scalar statement
4494 (or in STMT_VINFO_RELATED_STMT chain). */
4495 if (slp_node)
9771b263 4496 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4497 else
c689ce1e
RB
4498 {
4499 if (!*prev_stmt_info)
4500 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4501 else
4502 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4503
c689ce1e
RB
4504 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4505 }
4a00c761
JJ
4506 }
4507 }
4508
4509 /* For multi-step demotion operations we first generate demotion operations
4510 from the source type to the intermediate types, and then combine the
4511 results (stored in VEC_OPRNDS) in demotion operation to the destination
4512 type. */
4513 if (multi_step_cvt)
4514 {
4515 /* At each level of recursion we have half of the operands we had at the
4516 previous level. */
9771b263 4517 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4518 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4519 stmt, vec_dsts, gsi, slp_node,
4520 VEC_PACK_TRUNC_EXPR,
4521 prev_stmt_info);
4522 }
4523
9771b263 4524 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4525}
4526
4527
4528/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4529 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4530 the resulting vectors and call the function recursively. */
4531
4532static void
9771b263
DN
4533vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4534 vec<tree> *vec_oprnds1,
355fe088 4535 gimple *stmt, tree vec_dest,
4a00c761
JJ
4536 gimple_stmt_iterator *gsi,
4537 enum tree_code code1,
4538 enum tree_code code2, tree decl1,
4539 tree decl2, int op_type)
4540{
4541 int i;
4542 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4543 gimple *new_stmt1, *new_stmt2;
6e1aa848 4544 vec<tree> vec_tmp = vNULL;
4a00c761 4545
9771b263
DN
4546 vec_tmp.create (vec_oprnds0->length () * 2);
4547 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4548 {
4549 if (op_type == binary_op)
9771b263 4550 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4551 else
4552 vop1 = NULL_TREE;
4553
4554 /* Generate the two halves of promotion operation. */
4555 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4556 op_type, vec_dest, gsi, stmt);
4557 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4558 op_type, vec_dest, gsi, stmt);
4559 if (is_gimple_call (new_stmt1))
4560 {
4561 new_tmp1 = gimple_call_lhs (new_stmt1);
4562 new_tmp2 = gimple_call_lhs (new_stmt2);
4563 }
4564 else
4565 {
4566 new_tmp1 = gimple_assign_lhs (new_stmt1);
4567 new_tmp2 = gimple_assign_lhs (new_stmt2);
4568 }
4569
4570 /* Store the results for the next step. */
9771b263
DN
4571 vec_tmp.quick_push (new_tmp1);
4572 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4573 }
4574
689eaba3 4575 vec_oprnds0->release ();
4a00c761
JJ
4576 *vec_oprnds0 = vec_tmp;
4577}
4578
4579
b8698a0f
L
4580/* Check if STMT performs a conversion operation, that can be vectorized.
4581 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4582 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4583 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4584
4585static bool
355fe088 4586vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4587 gimple **vec_stmt, slp_tree slp_node,
4588 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4589{
4590 tree vec_dest;
4591 tree scalar_dest;
4a00c761 4592 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4593 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4596 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4597 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4598 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4599 tree new_temp;
355fe088 4600 gimple *def_stmt;
ebfd146a 4601 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4602 int ndts = 2;
355fe088 4603 gimple *new_stmt = NULL;
ebfd146a 4604 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4605 poly_uint64 nunits_in;
4606 poly_uint64 nunits_out;
ebfd146a 4607 tree vectype_out, vectype_in;
4a00c761
JJ
4608 int ncopies, i, j;
4609 tree lhs_type, rhs_type;
ebfd146a 4610 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4611 vec<tree> vec_oprnds0 = vNULL;
4612 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4613 tree vop0;
4a00c761 4614 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4615 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4616 int multi_step_cvt = 0;
6e1aa848 4617 vec<tree> interm_types = vNULL;
4a00c761
JJ
4618 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4619 int op_type;
4a00c761 4620 unsigned short fltsz;
ebfd146a
IR
4621
4622 /* Is STMT a vectorizable conversion? */
4623
4a00c761 4624 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4625 return false;
4626
66c16fd9
RB
4627 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4628 && ! vec_stmt)
ebfd146a
IR
4629 return false;
4630
4631 if (!is_gimple_assign (stmt))
4632 return false;
4633
4634 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4635 return false;
4636
4637 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4638 if (!CONVERT_EXPR_CODE_P (code)
4639 && code != FIX_TRUNC_EXPR
4640 && code != FLOAT_EXPR
4641 && code != WIDEN_MULT_EXPR
4642 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4643 return false;
4644
4a00c761
JJ
4645 op_type = TREE_CODE_LENGTH (code);
4646
ebfd146a 4647 /* Check types of lhs and rhs. */
b690cc0f 4648 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4649 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4650 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4651
ebfd146a
IR
4652 op0 = gimple_assign_rhs1 (stmt);
4653 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4654
4655 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4656 && !((INTEGRAL_TYPE_P (lhs_type)
4657 && INTEGRAL_TYPE_P (rhs_type))
4658 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4659 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4660 return false;
4661
e6f5c25d
IE
4662 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4663 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4664 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4665 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4666 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4667 {
73fbfcad 4668 if (dump_enabled_p ())
78c60e3d 4669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4670 "type conversion to/from bit-precision unsupported."
4671 "\n");
4a00c761
JJ
4672 return false;
4673 }
4674
b690cc0f 4675 /* Check the operands of the operation. */
81c40241 4676 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4677 {
73fbfcad 4678 if (dump_enabled_p ())
78c60e3d 4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4680 "use not simple.\n");
b690cc0f
RG
4681 return false;
4682 }
4a00c761
JJ
4683 if (op_type == binary_op)
4684 {
4685 bool ok;
4686
4687 op1 = gimple_assign_rhs2 (stmt);
4688 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4689 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4690 OP1. */
4691 if (CONSTANT_CLASS_P (op0))
81c40241 4692 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4693 else
81c40241 4694 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4695
4696 if (!ok)
4697 {
73fbfcad 4698 if (dump_enabled_p ())
78c60e3d 4699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4700 "use not simple.\n");
4a00c761
JJ
4701 return false;
4702 }
4703 }
4704
b690cc0f
RG
4705 /* If op0 is an external or constant defs use a vector type of
4706 the same size as the output vector type. */
ebfd146a 4707 if (!vectype_in)
b690cc0f 4708 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4709 if (vec_stmt)
4710 gcc_assert (vectype_in);
4711 if (!vectype_in)
4712 {
73fbfcad 4713 if (dump_enabled_p ())
4a00c761 4714 {
78c60e3d
SS
4715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4716 "no vectype for scalar type ");
4717 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4718 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4719 }
7d8930a0
IR
4720
4721 return false;
4722 }
ebfd146a 4723
e6f5c25d
IE
4724 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4725 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4726 {
4727 if (dump_enabled_p ())
4728 {
4729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4730 "can't convert between boolean and non "
4731 "boolean vectors");
4732 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4733 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4734 }
4735
4736 return false;
4737 }
4738
b690cc0f
RG
4739 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4740 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4741 if (known_eq (nunits_out, nunits_in))
ebfd146a 4742 modifier = NONE;
062d5ccc
RS
4743 else if (multiple_p (nunits_out, nunits_in))
4744 modifier = NARROW;
ebfd146a 4745 else
062d5ccc
RS
4746 {
4747 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4748 modifier = WIDEN;
4749 }
ebfd146a 4750
ff802fa1
IR
4751 /* Multiple types in SLP are handled by creating the appropriate number of
4752 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4753 case of SLP. */
fce57248 4754 if (slp_node)
ebfd146a 4755 ncopies = 1;
4a00c761 4756 else if (modifier == NARROW)
e8f142e2 4757 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4758 else
e8f142e2 4759 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4760
ebfd146a
IR
4761 /* Sanity check: make sure that at least one copy of the vectorized stmt
4762 needs to be generated. */
4763 gcc_assert (ncopies >= 1);
4764
16d22000
RS
4765 bool found_mode = false;
4766 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4767 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4768 opt_scalar_mode rhs_mode_iter;
b397965c 4769
ebfd146a 4770 /* Supportable by target? */
4a00c761 4771 switch (modifier)
ebfd146a 4772 {
4a00c761
JJ
4773 case NONE:
4774 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4775 return false;
4776 if (supportable_convert_operation (code, vectype_out, vectype_in,
4777 &decl1, &code1))
4778 break;
4779 /* FALLTHRU */
4780 unsupported:
73fbfcad 4781 if (dump_enabled_p ())
78c60e3d 4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4783 "conversion not supported by target.\n");
ebfd146a 4784 return false;
ebfd146a 4785
4a00c761
JJ
4786 case WIDEN:
4787 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4788 &code1, &code2, &multi_step_cvt,
4789 &interm_types))
4a00c761
JJ
4790 {
4791 /* Binary widening operation can only be supported directly by the
4792 architecture. */
4793 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4794 break;
4795 }
4796
4797 if (code != FLOAT_EXPR
b397965c 4798 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4799 goto unsupported;
4800
b397965c 4801 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4802 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4803 {
16d22000 4804 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4805 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4806 break;
4807
4a00c761
JJ
4808 cvt_type
4809 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4810 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4811 if (cvt_type == NULL_TREE)
4812 goto unsupported;
4813
4814 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4815 {
4816 if (!supportable_convert_operation (code, vectype_out,
4817 cvt_type, &decl1, &codecvt1))
4818 goto unsupported;
4819 }
4820 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4821 cvt_type, &codecvt1,
4822 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4823 &interm_types))
4824 continue;
4825 else
4826 gcc_assert (multi_step_cvt == 0);
4827
4828 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4829 vectype_in, &code1, &code2,
4830 &multi_step_cvt, &interm_types))
16d22000
RS
4831 {
4832 found_mode = true;
4833 break;
4834 }
4a00c761
JJ
4835 }
4836
16d22000 4837 if (!found_mode)
4a00c761
JJ
4838 goto unsupported;
4839
4840 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4841 codecvt2 = ERROR_MARK;
4842 else
4843 {
4844 multi_step_cvt++;
9771b263 4845 interm_types.safe_push (cvt_type);
4a00c761
JJ
4846 cvt_type = NULL_TREE;
4847 }
4848 break;
4849
4850 case NARROW:
4851 gcc_assert (op_type == unary_op);
4852 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4853 &code1, &multi_step_cvt,
4854 &interm_types))
4855 break;
4856
4857 if (code != FIX_TRUNC_EXPR
b397965c 4858 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4859 goto unsupported;
4860
4a00c761
JJ
4861 cvt_type
4862 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4863 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4864 if (cvt_type == NULL_TREE)
4865 goto unsupported;
4866 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4867 &decl1, &codecvt1))
4868 goto unsupported;
4869 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4870 &code1, &multi_step_cvt,
4871 &interm_types))
4872 break;
4873 goto unsupported;
4874
4875 default:
4876 gcc_unreachable ();
ebfd146a
IR
4877 }
4878
4879 if (!vec_stmt) /* transformation not required. */
4880 {
73fbfcad 4881 if (dump_enabled_p ())
78c60e3d 4882 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4883 "=== vectorizable_conversion ===\n");
4a00c761 4884 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4885 {
4886 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4887 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4888 cost_vec);
8bd37302 4889 }
4a00c761
JJ
4890 else if (modifier == NARROW)
4891 {
4892 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4893 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4894 cost_vec);
4a00c761
JJ
4895 }
4896 else
4897 {
4898 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4899 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4900 cost_vec);
4a00c761 4901 }
9771b263 4902 interm_types.release ();
ebfd146a
IR
4903 return true;
4904 }
4905
67b8dbac 4906 /* Transform. */
73fbfcad 4907 if (dump_enabled_p ())
78c60e3d 4908 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4909 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4910
4a00c761
JJ
4911 if (op_type == binary_op)
4912 {
4913 if (CONSTANT_CLASS_P (op0))
4914 op0 = fold_convert (TREE_TYPE (op1), op0);
4915 else if (CONSTANT_CLASS_P (op1))
4916 op1 = fold_convert (TREE_TYPE (op0), op1);
4917 }
4918
4919 /* In case of multi-step conversion, we first generate conversion operations
4920 to the intermediate types, and then from that types to the final one.
4921 We create vector destinations for the intermediate type (TYPES) received
4922 from supportable_*_operation, and store them in the correct order
4923 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4924 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4925 vec_dest = vect_create_destination_var (scalar_dest,
4926 (cvt_type && modifier == WIDEN)
4927 ? cvt_type : vectype_out);
9771b263 4928 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4929
4930 if (multi_step_cvt)
4931 {
9771b263
DN
4932 for (i = interm_types.length () - 1;
4933 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4934 {
4935 vec_dest = vect_create_destination_var (scalar_dest,
4936 intermediate_type);
9771b263 4937 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4938 }
4939 }
ebfd146a 4940
4a00c761 4941 if (cvt_type)
82294ec1
JJ
4942 vec_dest = vect_create_destination_var (scalar_dest,
4943 modifier == WIDEN
4944 ? vectype_out : cvt_type);
4a00c761
JJ
4945
4946 if (!slp_node)
4947 {
30862efc 4948 if (modifier == WIDEN)
4a00c761 4949 {
c3284718 4950 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4951 if (op_type == binary_op)
9771b263 4952 vec_oprnds1.create (1);
4a00c761 4953 }
30862efc 4954 else if (modifier == NARROW)
9771b263
DN
4955 vec_oprnds0.create (
4956 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4957 }
4958 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4959 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4960
4a00c761 4961 last_oprnd = op0;
ebfd146a
IR
4962 prev_stmt_info = NULL;
4963 switch (modifier)
4964 {
4965 case NONE:
4966 for (j = 0; j < ncopies; j++)
4967 {
ebfd146a 4968 if (j == 0)
306b0c92 4969 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4970 else
4971 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4972
9771b263 4973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4974 {
4975 /* Arguments are ready, create the new vector stmt. */
4976 if (code1 == CALL_EXPR)
4977 {
4978 new_stmt = gimple_build_call (decl1, 1, vop0);
4979 new_temp = make_ssa_name (vec_dest, new_stmt);
4980 gimple_call_set_lhs (new_stmt, new_temp);
4981 }
4982 else
4983 {
4984 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4985 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4986 new_temp = make_ssa_name (vec_dest, new_stmt);
4987 gimple_assign_set_lhs (new_stmt, new_temp);
4988 }
4989
4990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4991 if (slp_node)
9771b263 4992 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4993 else
4994 {
4995 if (!prev_stmt_info)
4996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4997 else
4998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4999 prev_stmt_info = vinfo_for_stmt (new_stmt);
5000 }
4a00c761 5001 }
ebfd146a
IR
5002 }
5003 break;
5004
5005 case WIDEN:
5006 /* In case the vectorization factor (VF) is bigger than the number
5007 of elements that we can fit in a vectype (nunits), we have to
5008 generate more than one vector stmt - i.e - we need to "unroll"
5009 the vector stmt by a factor VF/nunits. */
5010 for (j = 0; j < ncopies; j++)
5011 {
4a00c761 5012 /* Handle uses. */
ebfd146a 5013 if (j == 0)
4a00c761
JJ
5014 {
5015 if (slp_node)
5016 {
5017 if (code == WIDEN_LSHIFT_EXPR)
5018 {
5019 unsigned int k;
ebfd146a 5020
4a00c761
JJ
5021 vec_oprnd1 = op1;
5022 /* Store vec_oprnd1 for every vector stmt to be created
5023 for SLP_NODE. We check during the analysis that all
5024 the shift arguments are the same. */
5025 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5026 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5027
5028 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5029 slp_node);
4a00c761
JJ
5030 }
5031 else
5032 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 5033 &vec_oprnds1, slp_node);
4a00c761
JJ
5034 }
5035 else
5036 {
81c40241 5037 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5038 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5039 if (op_type == binary_op)
5040 {
5041 if (code == WIDEN_LSHIFT_EXPR)
5042 vec_oprnd1 = op1;
5043 else
81c40241 5044 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5045 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5046 }
5047 }
5048 }
ebfd146a 5049 else
4a00c761
JJ
5050 {
5051 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5052 vec_oprnds0.truncate (0);
5053 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5054 if (op_type == binary_op)
5055 {
5056 if (code == WIDEN_LSHIFT_EXPR)
5057 vec_oprnd1 = op1;
5058 else
5059 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5060 vec_oprnd1);
9771b263
DN
5061 vec_oprnds1.truncate (0);
5062 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5063 }
5064 }
ebfd146a 5065
4a00c761
JJ
5066 /* Arguments are ready. Create the new vector stmts. */
5067 for (i = multi_step_cvt; i >= 0; i--)
5068 {
9771b263 5069 tree this_dest = vec_dsts[i];
4a00c761
JJ
5070 enum tree_code c1 = code1, c2 = code2;
5071 if (i == 0 && codecvt2 != ERROR_MARK)
5072 {
5073 c1 = codecvt1;
5074 c2 = codecvt2;
5075 }
5076 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5077 &vec_oprnds1,
5078 stmt, this_dest, gsi,
5079 c1, c2, decl1, decl2,
5080 op_type);
5081 }
5082
9771b263 5083 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5084 {
5085 if (cvt_type)
5086 {
5087 if (codecvt1 == CALL_EXPR)
5088 {
5089 new_stmt = gimple_build_call (decl1, 1, vop0);
5090 new_temp = make_ssa_name (vec_dest, new_stmt);
5091 gimple_call_set_lhs (new_stmt, new_temp);
5092 }
5093 else
5094 {
5095 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5096 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5097 new_stmt = gimple_build_assign (new_temp, codecvt1,
5098 vop0);
4a00c761
JJ
5099 }
5100
5101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5102 }
5103 else
5104 new_stmt = SSA_NAME_DEF_STMT (vop0);
5105
5106 if (slp_node)
9771b263 5107 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5108 else
c689ce1e
RB
5109 {
5110 if (!prev_stmt_info)
5111 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5112 else
5113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5114 prev_stmt_info = vinfo_for_stmt (new_stmt);
5115 }
4a00c761 5116 }
ebfd146a 5117 }
4a00c761
JJ
5118
5119 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5120 break;
5121
5122 case NARROW:
5123 /* In case the vectorization factor (VF) is bigger than the number
5124 of elements that we can fit in a vectype (nunits), we have to
5125 generate more than one vector stmt - i.e - we need to "unroll"
5126 the vector stmt by a factor VF/nunits. */
5127 for (j = 0; j < ncopies; j++)
5128 {
5129 /* Handle uses. */
4a00c761
JJ
5130 if (slp_node)
5131 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5132 slp_node);
ebfd146a
IR
5133 else
5134 {
9771b263 5135 vec_oprnds0.truncate (0);
4a00c761
JJ
5136 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5137 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5138 }
5139
4a00c761
JJ
5140 /* Arguments are ready. Create the new vector stmts. */
5141 if (cvt_type)
9771b263 5142 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5143 {
5144 if (codecvt1 == CALL_EXPR)
5145 {
5146 new_stmt = gimple_build_call (decl1, 1, vop0);
5147 new_temp = make_ssa_name (vec_dest, new_stmt);
5148 gimple_call_set_lhs (new_stmt, new_temp);
5149 }
5150 else
5151 {
5152 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5153 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5154 new_stmt = gimple_build_assign (new_temp, codecvt1,
5155 vop0);
4a00c761 5156 }
ebfd146a 5157
4a00c761 5158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5159 vec_oprnds0[i] = new_temp;
4a00c761 5160 }
ebfd146a 5161
4a00c761
JJ
5162 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5163 stmt, vec_dsts, gsi,
5164 slp_node, code1,
5165 &prev_stmt_info);
ebfd146a
IR
5166 }
5167
5168 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5169 break;
ebfd146a
IR
5170 }
5171
9771b263
DN
5172 vec_oprnds0.release ();
5173 vec_oprnds1.release ();
9771b263 5174 interm_types.release ();
ebfd146a
IR
5175
5176 return true;
5177}
ff802fa1
IR
5178
5179
ebfd146a
IR
5180/* Function vectorizable_assignment.
5181
b8698a0f
L
5182 Check if STMT performs an assignment (copy) that can be vectorized.
5183 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5184 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5185 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5186
5187static bool
355fe088 5188vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5189 gimple **vec_stmt, slp_tree slp_node,
5190 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5191{
5192 tree vec_dest;
5193 tree scalar_dest;
5194 tree op;
5195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5197 tree new_temp;
355fe088 5198 gimple *def_stmt;
4fc5ebf1
JG
5199 enum vect_def_type dt[1] = {vect_unknown_def_type};
5200 int ndts = 1;
ebfd146a 5201 int ncopies;
f18b55bd 5202 int i, j;
6e1aa848 5203 vec<tree> vec_oprnds = vNULL;
ebfd146a 5204 tree vop;
a70d6342 5205 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5206 vec_info *vinfo = stmt_info->vinfo;
355fe088 5207 gimple *new_stmt = NULL;
f18b55bd 5208 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5209 enum tree_code code;
5210 tree vectype_in;
ebfd146a 5211
a70d6342 5212 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5213 return false;
5214
66c16fd9
RB
5215 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5216 && ! vec_stmt)
ebfd146a
IR
5217 return false;
5218
5219 /* Is vectorizable assignment? */
5220 if (!is_gimple_assign (stmt))
5221 return false;
5222
5223 scalar_dest = gimple_assign_lhs (stmt);
5224 if (TREE_CODE (scalar_dest) != SSA_NAME)
5225 return false;
5226
fde9c428 5227 code = gimple_assign_rhs_code (stmt);
ebfd146a 5228 if (gimple_assign_single_p (stmt)
fde9c428
RG
5229 || code == PAREN_EXPR
5230 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5231 op = gimple_assign_rhs1 (stmt);
5232 else
5233 return false;
5234
7b7ec6c5
RG
5235 if (code == VIEW_CONVERT_EXPR)
5236 op = TREE_OPERAND (op, 0);
5237
465c8c19 5238 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5239 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5240
5241 /* Multiple types in SLP are handled by creating the appropriate number of
5242 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5243 case of SLP. */
fce57248 5244 if (slp_node)
465c8c19
JJ
5245 ncopies = 1;
5246 else
e8f142e2 5247 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5248
5249 gcc_assert (ncopies >= 1);
5250
81c40241 5251 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 5252 {
73fbfcad 5253 if (dump_enabled_p ())
78c60e3d 5254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5255 "use not simple.\n");
ebfd146a
IR
5256 return false;
5257 }
5258
fde9c428
RG
5259 /* We can handle NOP_EXPR conversions that do not change the number
5260 of elements or the vector size. */
7b7ec6c5
RG
5261 if ((CONVERT_EXPR_CODE_P (code)
5262 || code == VIEW_CONVERT_EXPR)
fde9c428 5263 && (!vectype_in
928686b1 5264 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5265 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5266 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5267 return false;
5268
7b7b1813
RG
5269 /* We do not handle bit-precision changes. */
5270 if ((CONVERT_EXPR_CODE_P (code)
5271 || code == VIEW_CONVERT_EXPR)
5272 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5273 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5274 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5275 /* But a conversion that does not change the bit-pattern is ok. */
5276 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5277 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5278 && TYPE_UNSIGNED (TREE_TYPE (op)))
5279 /* Conversion between boolean types of different sizes is
5280 a simple assignment in case their vectypes are same
5281 boolean vectors. */
5282 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5283 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5284 {
73fbfcad 5285 if (dump_enabled_p ())
78c60e3d
SS
5286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5287 "type conversion to/from bit-precision "
e645e942 5288 "unsupported.\n");
7b7b1813
RG
5289 return false;
5290 }
5291
ebfd146a
IR
5292 if (!vec_stmt) /* transformation not required. */
5293 {
5294 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 5295 if (dump_enabled_p ())
78c60e3d 5296 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5297 "=== vectorizable_assignment ===\n");
68435eb2 5298 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5299 return true;
5300 }
5301
67b8dbac 5302 /* Transform. */
73fbfcad 5303 if (dump_enabled_p ())
e645e942 5304 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5305
5306 /* Handle def. */
5307 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5308
5309 /* Handle use. */
f18b55bd 5310 for (j = 0; j < ncopies; j++)
ebfd146a 5311 {
f18b55bd
IR
5312 /* Handle uses. */
5313 if (j == 0)
306b0c92 5314 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5315 else
5316 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5317
5318 /* Arguments are ready. create the new vector stmt. */
9771b263 5319 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5320 {
7b7ec6c5
RG
5321 if (CONVERT_EXPR_CODE_P (code)
5322 || code == VIEW_CONVERT_EXPR)
4a73490d 5323 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5324 new_stmt = gimple_build_assign (vec_dest, vop);
5325 new_temp = make_ssa_name (vec_dest, new_stmt);
5326 gimple_assign_set_lhs (new_stmt, new_temp);
5327 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5328 if (slp_node)
9771b263 5329 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5330 }
ebfd146a
IR
5331
5332 if (slp_node)
f18b55bd
IR
5333 continue;
5334
5335 if (j == 0)
5336 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5337 else
5338 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5339
5340 prev_stmt_info = vinfo_for_stmt (new_stmt);
5341 }
b8698a0f 5342
9771b263 5343 vec_oprnds.release ();
ebfd146a
IR
5344 return true;
5345}
5346
9dc3f7de 5347
1107f3ae
IR
5348/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5349 either as shift by a scalar or by a vector. */
5350
5351bool
5352vect_supportable_shift (enum tree_code code, tree scalar_type)
5353{
5354
ef4bddc2 5355 machine_mode vec_mode;
1107f3ae
IR
5356 optab optab;
5357 int icode;
5358 tree vectype;
5359
5360 vectype = get_vectype_for_scalar_type (scalar_type);
5361 if (!vectype)
5362 return false;
5363
5364 optab = optab_for_tree_code (code, vectype, optab_scalar);
5365 if (!optab
5366 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5367 {
5368 optab = optab_for_tree_code (code, vectype, optab_vector);
5369 if (!optab
5370 || (optab_handler (optab, TYPE_MODE (vectype))
5371 == CODE_FOR_nothing))
5372 return false;
5373 }
5374
5375 vec_mode = TYPE_MODE (vectype);
5376 icode = (int) optab_handler (optab, vec_mode);
5377 if (icode == CODE_FOR_nothing)
5378 return false;
5379
5380 return true;
5381}
5382
5383
9dc3f7de
IR
5384/* Function vectorizable_shift.
5385
5386 Check if STMT performs a shift operation that can be vectorized.
5387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5390
5391static bool
355fe088 5392vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5393 gimple **vec_stmt, slp_tree slp_node,
5394 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5395{
5396 tree vec_dest;
5397 tree scalar_dest;
5398 tree op0, op1 = NULL;
5399 tree vec_oprnd1 = NULL_TREE;
5400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5401 tree vectype;
5402 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5403 enum tree_code code;
ef4bddc2 5404 machine_mode vec_mode;
9dc3f7de
IR
5405 tree new_temp;
5406 optab optab;
5407 int icode;
ef4bddc2 5408 machine_mode optab_op2_mode;
355fe088 5409 gimple *def_stmt;
9dc3f7de 5410 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5411 int ndts = 2;
355fe088 5412 gimple *new_stmt = NULL;
9dc3f7de 5413 stmt_vec_info prev_stmt_info;
928686b1
RS
5414 poly_uint64 nunits_in;
5415 poly_uint64 nunits_out;
9dc3f7de 5416 tree vectype_out;
cede2577 5417 tree op1_vectype;
9dc3f7de
IR
5418 int ncopies;
5419 int j, i;
6e1aa848
DN
5420 vec<tree> vec_oprnds0 = vNULL;
5421 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5422 tree vop0, vop1;
5423 unsigned int k;
49eab32e 5424 bool scalar_shift_arg = true;
9dc3f7de 5425 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5426 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5427
5428 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5429 return false;
5430
66c16fd9
RB
5431 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5432 && ! vec_stmt)
9dc3f7de
IR
5433 return false;
5434
5435 /* Is STMT a vectorizable binary/unary operation? */
5436 if (!is_gimple_assign (stmt))
5437 return false;
5438
5439 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5440 return false;
5441
5442 code = gimple_assign_rhs_code (stmt);
5443
5444 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5445 || code == RROTATE_EXPR))
5446 return false;
5447
5448 scalar_dest = gimple_assign_lhs (stmt);
5449 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5450 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5451 {
73fbfcad 5452 if (dump_enabled_p ())
78c60e3d 5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5454 "bit-precision shifts not supported.\n");
7b7b1813
RG
5455 return false;
5456 }
9dc3f7de
IR
5457
5458 op0 = gimple_assign_rhs1 (stmt);
81c40241 5459 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 5460 {
73fbfcad 5461 if (dump_enabled_p ())
78c60e3d 5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5463 "use not simple.\n");
9dc3f7de
IR
5464 return false;
5465 }
5466 /* If op0 is an external or constant def use a vector type with
5467 the same size as the output vector type. */
5468 if (!vectype)
5469 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5470 if (vec_stmt)
5471 gcc_assert (vectype);
5472 if (!vectype)
5473 {
73fbfcad 5474 if (dump_enabled_p ())
78c60e3d 5475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5476 "no vectype for scalar type\n");
9dc3f7de
IR
5477 return false;
5478 }
5479
5480 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5481 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5482 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5483 return false;
5484
5485 op1 = gimple_assign_rhs2 (stmt);
81c40241 5486 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5487 {
73fbfcad 5488 if (dump_enabled_p ())
78c60e3d 5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5490 "use not simple.\n");
9dc3f7de
IR
5491 return false;
5492 }
5493
9dc3f7de
IR
5494 /* Multiple types in SLP are handled by creating the appropriate number of
5495 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5496 case of SLP. */
fce57248 5497 if (slp_node)
9dc3f7de
IR
5498 ncopies = 1;
5499 else
e8f142e2 5500 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5501
5502 gcc_assert (ncopies >= 1);
5503
5504 /* Determine whether the shift amount is a vector, or scalar. If the
5505 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5506
dbfa87aa
YR
5507 if ((dt[1] == vect_internal_def
5508 || dt[1] == vect_induction_def)
5509 && !slp_node)
49eab32e
JJ
5510 scalar_shift_arg = false;
5511 else if (dt[1] == vect_constant_def
5512 || dt[1] == vect_external_def
5513 || dt[1] == vect_internal_def)
5514 {
5515 /* In SLP, need to check whether the shift count is the same,
5516 in loops if it is a constant or invariant, it is always
5517 a scalar shift. */
5518 if (slp_node)
5519 {
355fe088
TS
5520 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5521 gimple *slpstmt;
49eab32e 5522
9771b263 5523 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5524 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5525 scalar_shift_arg = false;
5526 }
60d393e8
RB
5527
5528 /* If the shift amount is computed by a pattern stmt we cannot
5529 use the scalar amount directly thus give up and use a vector
5530 shift. */
5531 if (dt[1] == vect_internal_def)
5532 {
5533 gimple *def = SSA_NAME_DEF_STMT (op1);
5534 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5535 scalar_shift_arg = false;
5536 }
49eab32e
JJ
5537 }
5538 else
5539 {
73fbfcad 5540 if (dump_enabled_p ())
78c60e3d 5541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5542 "operand mode requires invariant argument.\n");
49eab32e
JJ
5543 return false;
5544 }
5545
9dc3f7de 5546 /* Vector shifted by vector. */
49eab32e 5547 if (!scalar_shift_arg)
9dc3f7de
IR
5548 {
5549 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5550 if (dump_enabled_p ())
78c60e3d 5551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5552 "vector/vector shift/rotate found.\n");
78c60e3d 5553
aa948027
JJ
5554 if (!op1_vectype)
5555 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5556 if (op1_vectype == NULL_TREE
5557 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5558 {
73fbfcad 5559 if (dump_enabled_p ())
78c60e3d
SS
5560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5561 "unusable type for last operand in"
e645e942 5562 " vector/vector shift/rotate.\n");
cede2577
JJ
5563 return false;
5564 }
9dc3f7de
IR
5565 }
5566 /* See if the machine has a vector shifted by scalar insn and if not
5567 then see if it has a vector shifted by vector insn. */
49eab32e 5568 else
9dc3f7de
IR
5569 {
5570 optab = optab_for_tree_code (code, vectype, optab_scalar);
5571 if (optab
5572 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5573 {
73fbfcad 5574 if (dump_enabled_p ())
78c60e3d 5575 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5576 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5577 }
5578 else
5579 {
5580 optab = optab_for_tree_code (code, vectype, optab_vector);
5581 if (optab
5582 && (optab_handler (optab, TYPE_MODE (vectype))
5583 != CODE_FOR_nothing))
5584 {
49eab32e
JJ
5585 scalar_shift_arg = false;
5586
73fbfcad 5587 if (dump_enabled_p ())
78c60e3d 5588 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5589 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5590
5591 /* Unlike the other binary operators, shifts/rotates have
5592 the rhs being int, instead of the same type as the lhs,
5593 so make sure the scalar is the right type if we are
aa948027 5594 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5595 if (dt[1] == vect_constant_def)
5596 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5597 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5598 TREE_TYPE (op1)))
5599 {
5600 if (slp_node
5601 && TYPE_MODE (TREE_TYPE (vectype))
5602 != TYPE_MODE (TREE_TYPE (op1)))
5603 {
73fbfcad 5604 if (dump_enabled_p ())
78c60e3d
SS
5605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5606 "unusable type for last operand in"
e645e942 5607 " vector/vector shift/rotate.\n");
21c0a521 5608 return false;
aa948027
JJ
5609 }
5610 if (vec_stmt && !slp_node)
5611 {
5612 op1 = fold_convert (TREE_TYPE (vectype), op1);
5613 op1 = vect_init_vector (stmt, op1,
5614 TREE_TYPE (vectype), NULL);
5615 }
5616 }
9dc3f7de
IR
5617 }
5618 }
5619 }
9dc3f7de
IR
5620
5621 /* Supportable by target? */
5622 if (!optab)
5623 {
73fbfcad 5624 if (dump_enabled_p ())
78c60e3d 5625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5626 "no optab.\n");
9dc3f7de
IR
5627 return false;
5628 }
5629 vec_mode = TYPE_MODE (vectype);
5630 icode = (int) optab_handler (optab, vec_mode);
5631 if (icode == CODE_FOR_nothing)
5632 {
73fbfcad 5633 if (dump_enabled_p ())
78c60e3d 5634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5635 "op not supported by target.\n");
9dc3f7de 5636 /* Check only during analysis. */
cf098191 5637 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5638 || (!vec_stmt
5639 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5640 return false;
73fbfcad 5641 if (dump_enabled_p ())
e645e942
TJ
5642 dump_printf_loc (MSG_NOTE, vect_location,
5643 "proceeding using word mode.\n");
9dc3f7de
IR
5644 }
5645
5646 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5647 if (!vec_stmt
5648 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5649 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5650 {
73fbfcad 5651 if (dump_enabled_p ())
78c60e3d 5652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5653 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5654 return false;
5655 }
5656
5657 if (!vec_stmt) /* transformation not required. */
5658 {
5659 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5660 if (dump_enabled_p ())
e645e942
TJ
5661 dump_printf_loc (MSG_NOTE, vect_location,
5662 "=== vectorizable_shift ===\n");
68435eb2 5663 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5664 return true;
5665 }
5666
67b8dbac 5667 /* Transform. */
9dc3f7de 5668
73fbfcad 5669 if (dump_enabled_p ())
78c60e3d 5670 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5671 "transform binary/unary operation.\n");
9dc3f7de
IR
5672
5673 /* Handle def. */
5674 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5675
9dc3f7de
IR
5676 prev_stmt_info = NULL;
5677 for (j = 0; j < ncopies; j++)
5678 {
5679 /* Handle uses. */
5680 if (j == 0)
5681 {
5682 if (scalar_shift_arg)
5683 {
5684 /* Vector shl and shr insn patterns can be defined with scalar
5685 operand 2 (shift operand). In this case, use constant or loop
5686 invariant op1 directly, without extending it to vector mode
5687 first. */
5688 optab_op2_mode = insn_data[icode].operand[2].mode;
5689 if (!VECTOR_MODE_P (optab_op2_mode))
5690 {
73fbfcad 5691 if (dump_enabled_p ())
78c60e3d 5692 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5693 "operand 1 using scalar mode.\n");
9dc3f7de 5694 vec_oprnd1 = op1;
8930f723 5695 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5696 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5697 if (slp_node)
5698 {
5699 /* Store vec_oprnd1 for every vector stmt to be created
5700 for SLP_NODE. We check during the analysis that all
5701 the shift arguments are the same.
5702 TODO: Allow different constants for different vector
5703 stmts generated for an SLP instance. */
5704 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5705 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5706 }
5707 }
5708 }
5709
5710 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5711 (a special case for certain kind of vector shifts); otherwise,
5712 operand 1 should be of a vector type (the usual case). */
5713 if (vec_oprnd1)
5714 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5715 slp_node);
9dc3f7de
IR
5716 else
5717 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5718 slp_node);
9dc3f7de
IR
5719 }
5720 else
5721 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5722
5723 /* Arguments are ready. Create the new vector stmt. */
9771b263 5724 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5725 {
9771b263 5726 vop1 = vec_oprnds1[i];
0d0e4a03 5727 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5728 new_temp = make_ssa_name (vec_dest, new_stmt);
5729 gimple_assign_set_lhs (new_stmt, new_temp);
5730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5731 if (slp_node)
9771b263 5732 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5733 }
5734
5735 if (slp_node)
5736 continue;
5737
5738 if (j == 0)
5739 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5740 else
5741 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5742 prev_stmt_info = vinfo_for_stmt (new_stmt);
5743 }
5744
9771b263
DN
5745 vec_oprnds0.release ();
5746 vec_oprnds1.release ();
9dc3f7de
IR
5747
5748 return true;
5749}
5750
5751
ebfd146a
IR
5752/* Function vectorizable_operation.
5753
16949072
RG
5754 Check if STMT performs a binary, unary or ternary operation that can
5755 be vectorized.
b8698a0f 5756 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5757 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5758 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5759
5760static bool
355fe088 5761vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5762 gimple **vec_stmt, slp_tree slp_node,
5763 stmt_vector_for_cost *cost_vec)
ebfd146a 5764{
00f07b86 5765 tree vec_dest;
ebfd146a 5766 tree scalar_dest;
16949072 5767 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5769 tree vectype;
ebfd146a 5770 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5771 enum tree_code code, orig_code;
ef4bddc2 5772 machine_mode vec_mode;
ebfd146a
IR
5773 tree new_temp;
5774 int op_type;
00f07b86 5775 optab optab;
523ba738 5776 bool target_support_p;
355fe088 5777 gimple *def_stmt;
16949072
RG
5778 enum vect_def_type dt[3]
5779 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5780 int ndts = 3;
355fe088 5781 gimple *new_stmt = NULL;
ebfd146a 5782 stmt_vec_info prev_stmt_info;
928686b1
RS
5783 poly_uint64 nunits_in;
5784 poly_uint64 nunits_out;
ebfd146a
IR
5785 tree vectype_out;
5786 int ncopies;
5787 int j, i;
6e1aa848
DN
5788 vec<tree> vec_oprnds0 = vNULL;
5789 vec<tree> vec_oprnds1 = vNULL;
5790 vec<tree> vec_oprnds2 = vNULL;
16949072 5791 tree vop0, vop1, vop2;
a70d6342 5792 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5793 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5794
a70d6342 5795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5796 return false;
5797
66c16fd9
RB
5798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5799 && ! vec_stmt)
ebfd146a
IR
5800 return false;
5801
5802 /* Is STMT a vectorizable binary/unary operation? */
5803 if (!is_gimple_assign (stmt))
5804 return false;
5805
5806 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5807 return false;
5808
0eb952ea 5809 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5810
1af4ebf5
MG
5811 /* For pointer addition and subtraction, we should use the normal
5812 plus and minus for the vector operation. */
ebfd146a
IR
5813 if (code == POINTER_PLUS_EXPR)
5814 code = PLUS_EXPR;
1af4ebf5
MG
5815 if (code == POINTER_DIFF_EXPR)
5816 code = MINUS_EXPR;
ebfd146a
IR
5817
5818 /* Support only unary or binary operations. */
5819 op_type = TREE_CODE_LENGTH (code);
16949072 5820 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5821 {
73fbfcad 5822 if (dump_enabled_p ())
78c60e3d 5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5824 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5825 op_type);
ebfd146a
IR
5826 return false;
5827 }
5828
b690cc0f
RG
5829 scalar_dest = gimple_assign_lhs (stmt);
5830 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5831
7b7b1813
RG
5832 /* Most operations cannot handle bit-precision types without extra
5833 truncations. */
045c1278 5834 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5835 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5836 /* Exception are bitwise binary operations. */
5837 && code != BIT_IOR_EXPR
5838 && code != BIT_XOR_EXPR
5839 && code != BIT_AND_EXPR)
5840 {
73fbfcad 5841 if (dump_enabled_p ())
78c60e3d 5842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5843 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5844 return false;
5845 }
5846
ebfd146a 5847 op0 = gimple_assign_rhs1 (stmt);
81c40241 5848 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5849 {
73fbfcad 5850 if (dump_enabled_p ())
78c60e3d 5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5852 "use not simple.\n");
ebfd146a
IR
5853 return false;
5854 }
b690cc0f
RG
5855 /* If op0 is an external or constant def use a vector type with
5856 the same size as the output vector type. */
5857 if (!vectype)
b036c6c5
IE
5858 {
5859 /* For boolean type we cannot determine vectype by
5860 invariant value (don't know whether it is a vector
5861 of booleans or vector of integers). We use output
5862 vectype because operations on boolean don't change
5863 type. */
2568d8a1 5864 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5865 {
2568d8a1 5866 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5867 {
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5870 "not supported operation on bool value.\n");
5871 return false;
5872 }
5873 vectype = vectype_out;
5874 }
5875 else
5876 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5877 }
7d8930a0
IR
5878 if (vec_stmt)
5879 gcc_assert (vectype);
5880 if (!vectype)
5881 {
73fbfcad 5882 if (dump_enabled_p ())
7d8930a0 5883 {
78c60e3d
SS
5884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5885 "no vectype for scalar type ");
5886 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5887 TREE_TYPE (op0));
e645e942 5888 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5889 }
5890
5891 return false;
5892 }
b690cc0f
RG
5893
5894 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5895 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5896 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5897 return false;
ebfd146a 5898
16949072 5899 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5900 {
5901 op1 = gimple_assign_rhs2 (stmt);
81c40241 5902 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5903 {
73fbfcad 5904 if (dump_enabled_p ())
78c60e3d 5905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5906 "use not simple.\n");
ebfd146a
IR
5907 return false;
5908 }
5909 }
16949072
RG
5910 if (op_type == ternary_op)
5911 {
5912 op2 = gimple_assign_rhs3 (stmt);
81c40241 5913 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5914 {
73fbfcad 5915 if (dump_enabled_p ())
78c60e3d 5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5917 "use not simple.\n");
16949072
RG
5918 return false;
5919 }
5920 }
ebfd146a 5921
b690cc0f 5922 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5923 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5924 case of SLP. */
fce57248 5925 if (slp_node)
b690cc0f
RG
5926 ncopies = 1;
5927 else
e8f142e2 5928 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5929
5930 gcc_assert (ncopies >= 1);
5931
9dc3f7de 5932 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5933 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5934 || code == RROTATE_EXPR)
9dc3f7de 5935 return false;
ebfd146a 5936
ebfd146a 5937 /* Supportable by target? */
00f07b86
RH
5938
5939 vec_mode = TYPE_MODE (vectype);
5940 if (code == MULT_HIGHPART_EXPR)
523ba738 5941 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5942 else
5943 {
5944 optab = optab_for_tree_code (code, vectype, optab_default);
5945 if (!optab)
5deb57cb 5946 {
73fbfcad 5947 if (dump_enabled_p ())
78c60e3d 5948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5949 "no optab.\n");
00f07b86 5950 return false;
5deb57cb 5951 }
523ba738
RS
5952 target_support_p = (optab_handler (optab, vec_mode)
5953 != CODE_FOR_nothing);
5deb57cb
JJ
5954 }
5955
523ba738 5956 if (!target_support_p)
ebfd146a 5957 {
73fbfcad 5958 if (dump_enabled_p ())
78c60e3d 5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5960 "op not supported by target.\n");
ebfd146a 5961 /* Check only during analysis. */
cf098191 5962 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5963 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5964 return false;
73fbfcad 5965 if (dump_enabled_p ())
e645e942
TJ
5966 dump_printf_loc (MSG_NOTE, vect_location,
5967 "proceeding using word mode.\n");
383d9c83
IR
5968 }
5969
4a00c761 5970 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5971 if (!VECTOR_MODE_P (vec_mode)
5972 && !vec_stmt
ca09abcb 5973 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5974 {
73fbfcad 5975 if (dump_enabled_p ())
78c60e3d 5976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5977 "not worthwhile without SIMD support.\n");
e34842c6 5978 return false;
7d8930a0 5979 }
ebfd146a 5980
ebfd146a
IR
5981 if (!vec_stmt) /* transformation not required. */
5982 {
4a00c761 5983 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5984 if (dump_enabled_p ())
78c60e3d 5985 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5986 "=== vectorizable_operation ===\n");
68435eb2 5987 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5988 return true;
5989 }
5990
67b8dbac 5991 /* Transform. */
ebfd146a 5992
73fbfcad 5993 if (dump_enabled_p ())
78c60e3d 5994 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5995 "transform binary/unary operation.\n");
383d9c83 5996
ebfd146a 5997 /* Handle def. */
00f07b86 5998 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5999
0eb952ea
JJ
6000 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6001 vectors with unsigned elements, but the result is signed. So, we
6002 need to compute the MINUS_EXPR into vectype temporary and
6003 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6004 tree vec_cvt_dest = NULL_TREE;
6005 if (orig_code == POINTER_DIFF_EXPR)
6006 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6007
ebfd146a
IR
6008 /* In case the vectorization factor (VF) is bigger than the number
6009 of elements that we can fit in a vectype (nunits), we have to generate
6010 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
6011 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6012 from one copy of the vector stmt to the next, in the field
6013 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6014 stages to find the correct vector defs to be used when vectorizing
6015 stmts that use the defs of the current stmt. The example below
6016 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6017 we need to create 4 vectorized stmts):
6018
6019 before vectorization:
6020 RELATED_STMT VEC_STMT
6021 S1: x = memref - -
6022 S2: z = x + 1 - -
6023
6024 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6025 there):
6026 RELATED_STMT VEC_STMT
6027 VS1_0: vx0 = memref0 VS1_1 -
6028 VS1_1: vx1 = memref1 VS1_2 -
6029 VS1_2: vx2 = memref2 VS1_3 -
6030 VS1_3: vx3 = memref3 - -
6031 S1: x = load - VS1_0
6032 S2: z = x + 1 - -
6033
6034 step2: vectorize stmt S2 (done here):
6035 To vectorize stmt S2 we first need to find the relevant vector
6036 def for the first operand 'x'. This is, as usual, obtained from
6037 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6038 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6039 relevant vector def 'vx0'. Having found 'vx0' we can generate
6040 the vector stmt VS2_0, and as usual, record it in the
6041 STMT_VINFO_VEC_STMT of stmt S2.
6042 When creating the second copy (VS2_1), we obtain the relevant vector
6043 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6044 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6045 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6046 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6047 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6048 chain of stmts and pointers:
6049 RELATED_STMT VEC_STMT
6050 VS1_0: vx0 = memref0 VS1_1 -
6051 VS1_1: vx1 = memref1 VS1_2 -
6052 VS1_2: vx2 = memref2 VS1_3 -
6053 VS1_3: vx3 = memref3 - -
6054 S1: x = load - VS1_0
6055 VS2_0: vz0 = vx0 + v1 VS2_1 -
6056 VS2_1: vz1 = vx1 + v1 VS2_2 -
6057 VS2_2: vz2 = vx2 + v1 VS2_3 -
6058 VS2_3: vz3 = vx3 + v1 - -
6059 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6060
6061 prev_stmt_info = NULL;
6062 for (j = 0; j < ncopies; j++)
6063 {
6064 /* Handle uses. */
6065 if (j == 0)
4a00c761 6066 {
d6476f90 6067 if (op_type == binary_op)
4a00c761 6068 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6069 slp_node);
d6476f90
RB
6070 else if (op_type == ternary_op)
6071 {
6072 if (slp_node)
6073 {
6074 auto_vec<tree> ops(3);
6075 ops.quick_push (op0);
6076 ops.quick_push (op1);
6077 ops.quick_push (op2);
6078 auto_vec<vec<tree> > vec_defs(3);
6079 vect_get_slp_defs (ops, slp_node, &vec_defs);
6080 vec_oprnds0 = vec_defs[0];
6081 vec_oprnds1 = vec_defs[1];
6082 vec_oprnds2 = vec_defs[2];
6083 }
6084 else
6085 {
6086 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6087 NULL);
6088 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6089 NULL);
6090 }
6091 }
4a00c761
JJ
6092 else
6093 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6094 slp_node);
4a00c761 6095 }
ebfd146a 6096 else
4a00c761
JJ
6097 {
6098 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6099 if (op_type == ternary_op)
6100 {
9771b263
DN
6101 tree vec_oprnd = vec_oprnds2.pop ();
6102 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6103 vec_oprnd));
4a00c761
JJ
6104 }
6105 }
6106
6107 /* Arguments are ready. Create the new vector stmt. */
9771b263 6108 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6109 {
4a00c761 6110 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6111 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6112 vop2 = ((op_type == ternary_op)
9771b263 6113 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6114 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6115 new_temp = make_ssa_name (vec_dest, new_stmt);
6116 gimple_assign_set_lhs (new_stmt, new_temp);
6117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6118 if (vec_cvt_dest)
6119 {
6120 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6121 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6122 new_temp);
6123 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6124 gimple_assign_set_lhs (new_stmt, new_temp);
6125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6126 }
4a00c761 6127 if (slp_node)
9771b263 6128 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6129 }
6130
4a00c761
JJ
6131 if (slp_node)
6132 continue;
6133
6134 if (j == 0)
6135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6136 else
6137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6138 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6139 }
6140
9771b263
DN
6141 vec_oprnds0.release ();
6142 vec_oprnds1.release ();
6143 vec_oprnds2.release ();
ebfd146a 6144
ebfd146a
IR
6145 return true;
6146}
6147
f702e7d4 6148/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6149
6150static void
f702e7d4 6151ensure_base_align (struct data_reference *dr)
c716e67f
XDL
6152{
6153 if (!dr->aux)
6154 return;
6155
52639a61 6156 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6157 {
52639a61 6158 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6159
f702e7d4
RS
6160 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6161
428f0c67 6162 if (decl_in_symtab_p (base_decl))
f702e7d4 6163 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6164 else
6165 {
f702e7d4 6166 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6167 DECL_USER_ALIGN (base_decl) = 1;
6168 }
52639a61 6169 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6170 }
6171}
6172
ebfd146a 6173
44fc7854
BE
6174/* Function get_group_alias_ptr_type.
6175
6176 Return the alias type for the group starting at FIRST_STMT. */
6177
6178static tree
6179get_group_alias_ptr_type (gimple *first_stmt)
6180{
6181 struct data_reference *first_dr, *next_dr;
6182 gimple *next_stmt;
6183
6184 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6185 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6186 while (next_stmt)
6187 {
6188 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6189 if (get_alias_set (DR_REF (first_dr))
6190 != get_alias_set (DR_REF (next_dr)))
6191 {
6192 if (dump_enabled_p ())
6193 dump_printf_loc (MSG_NOTE, vect_location,
6194 "conflicting alias set types.\n");
6195 return ptr_type_node;
6196 }
6197 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6198 }
6199 return reference_alias_ptr_type (DR_REF (first_dr));
6200}
6201
6202
ebfd146a
IR
6203/* Function vectorizable_store.
6204
b8698a0f
L
6205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6206 can be vectorized.
6207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6210
6211static bool
355fe088 6212vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6213 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6214{
ebfd146a
IR
6215 tree data_ref;
6216 tree op;
6217 tree vec_oprnd = NULL_TREE;
6218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6219 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6220 tree elem_type;
ebfd146a 6221 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6222 struct loop *loop = NULL;
ef4bddc2 6223 machine_mode vec_mode;
ebfd146a
IR
6224 tree dummy;
6225 enum dr_alignment_support alignment_support_scheme;
355fe088 6226 gimple *def_stmt;
929b4411
RS
6227 enum vect_def_type rhs_dt = vect_unknown_def_type;
6228 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6229 stmt_vec_info prev_stmt_info = NULL;
6230 tree dataref_ptr = NULL_TREE;
74bf76ed 6231 tree dataref_offset = NULL_TREE;
355fe088 6232 gimple *ptr_incr = NULL;
ebfd146a
IR
6233 int ncopies;
6234 int j;
2de001ee
RS
6235 gimple *next_stmt, *first_stmt;
6236 bool grouped_store;
ebfd146a 6237 unsigned int group_size, i;
6e1aa848
DN
6238 vec<tree> oprnds = vNULL;
6239 vec<tree> result_chain = vNULL;
ebfd146a 6240 bool inv_p;
09dfa495 6241 tree offset = NULL_TREE;
6e1aa848 6242 vec<tree> vec_oprnds = vNULL;
ebfd146a 6243 bool slp = (slp_node != NULL);
ebfd146a 6244 unsigned int vec_num;
a70d6342 6245 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6246 vec_info *vinfo = stmt_info->vinfo;
272c6793 6247 tree aggr_type;
134c85ca 6248 gather_scatter_info gs_info;
355fe088 6249 gimple *new_stmt;
d9f21f6a 6250 poly_uint64 vf;
2de001ee 6251 vec_load_store_type vls_type;
44fc7854 6252 tree ref_type;
a70d6342 6253
a70d6342 6254 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6255 return false;
6256
66c16fd9
RB
6257 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6258 && ! vec_stmt)
ebfd146a
IR
6259 return false;
6260
6261 /* Is vectorizable store? */
6262
c3a8f964
RS
6263 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6264 if (is_gimple_assign (stmt))
6265 {
6266 tree scalar_dest = gimple_assign_lhs (stmt);
6267 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6268 && is_pattern_stmt_p (stmt_info))
6269 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6270 if (TREE_CODE (scalar_dest) != ARRAY_REF
6271 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6272 && TREE_CODE (scalar_dest) != INDIRECT_REF
6273 && TREE_CODE (scalar_dest) != COMPONENT_REF
6274 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6275 && TREE_CODE (scalar_dest) != REALPART_EXPR
6276 && TREE_CODE (scalar_dest) != MEM_REF)
6277 return false;
6278 }
6279 else
6280 {
6281 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6282 if (!call || !gimple_call_internal_p (call))
6283 return false;
6284
6285 internal_fn ifn = gimple_call_internal_fn (call);
6286 if (!internal_store_fn_p (ifn))
c3a8f964 6287 return false;
ebfd146a 6288
c3a8f964
RS
6289 if (slp_node != NULL)
6290 {
6291 if (dump_enabled_p ())
6292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293 "SLP of masked stores not supported.\n");
6294 return false;
6295 }
6296
f307441a
RS
6297 int mask_index = internal_fn_mask_index (ifn);
6298 if (mask_index >= 0)
6299 {
6300 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6301 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6302 &mask_vectype))
f307441a
RS
6303 return false;
6304 }
c3a8f964
RS
6305 }
6306
6307 op = vect_get_store_rhs (stmt);
ebfd146a 6308
fce57248
RS
6309 /* Cannot have hybrid store SLP -- that would mean storing to the
6310 same location twice. */
6311 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6312
f4d09712 6313 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6314 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6315
6316 if (loop_vinfo)
b17dc4d4
RB
6317 {
6318 loop = LOOP_VINFO_LOOP (loop_vinfo);
6319 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6320 }
6321 else
6322 vf = 1;
465c8c19
JJ
6323
6324 /* Multiple types in SLP are handled by creating the appropriate number of
6325 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6326 case of SLP. */
fce57248 6327 if (slp)
465c8c19
JJ
6328 ncopies = 1;
6329 else
e8f142e2 6330 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6331
6332 gcc_assert (ncopies >= 1);
6333
6334 /* FORNOW. This restriction should be relaxed. */
6335 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6336 {
6337 if (dump_enabled_p ())
6338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6339 "multiple types in nested loop.\n");
6340 return false;
6341 }
6342
929b4411 6343 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6344 return false;
6345
272c6793 6346 elem_type = TREE_TYPE (vectype);
ebfd146a 6347 vec_mode = TYPE_MODE (vectype);
7b7b1813 6348
ebfd146a
IR
6349 if (!STMT_VINFO_DATA_REF (stmt_info))
6350 return false;
6351
2de001ee 6352 vect_memory_access_type memory_access_type;
7e11fc7f 6353 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6354 &memory_access_type, &gs_info))
6355 return false;
3bab6342 6356
c3a8f964
RS
6357 if (mask)
6358 {
7e11fc7f
RS
6359 if (memory_access_type == VMAT_CONTIGUOUS)
6360 {
6361 if (!VECTOR_MODE_P (vec_mode)
6362 || !can_vec_mask_load_store_p (vec_mode,
6363 TYPE_MODE (mask_vectype), false))
6364 return false;
6365 }
f307441a
RS
6366 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6367 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6368 {
6369 if (dump_enabled_p ())
6370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6371 "unsupported access type for masked store.\n");
6372 return false;
6373 }
c3a8f964
RS
6374 }
6375 else
6376 {
6377 /* FORNOW. In some cases can vectorize even if data-type not supported
6378 (e.g. - array initialization with 0). */
6379 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6380 return false;
6381 }
6382
f307441a 6383 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6384 && memory_access_type != VMAT_GATHER_SCATTER
6385 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6386 if (grouped_store)
6387 {
6388 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6389 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6390 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6391 }
6392 else
6393 {
6394 first_stmt = stmt;
6395 first_dr = dr;
6396 group_size = vec_num = 1;
6397 }
6398
ebfd146a
IR
6399 if (!vec_stmt) /* transformation not required. */
6400 {
2de001ee 6401 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6402
6403 if (loop_vinfo
6404 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6405 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6406 memory_access_type, &gs_info);
7cfb4d93 6407
ebfd146a 6408 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6409 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6410 vls_type, slp_node, cost_vec);
ebfd146a
IR
6411 return true;
6412 }
2de001ee 6413 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6414
67b8dbac 6415 /* Transform. */
ebfd146a 6416
f702e7d4 6417 ensure_base_align (dr);
c716e67f 6418
f307441a 6419 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6420 {
c3a8f964 6421 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6422 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6423 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6424 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6425 edge pe = loop_preheader_edge (loop);
6426 gimple_seq seq;
6427 basic_block new_bb;
6428 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6429 poly_uint64 scatter_off_nunits
6430 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6431
4d694b27 6432 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6433 modifier = NONE;
4d694b27 6434 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6435 {
3bab6342
AT
6436 modifier = WIDEN;
6437
4d694b27
RS
6438 /* Currently gathers and scatters are only supported for
6439 fixed-length vectors. */
6440 unsigned int count = scatter_off_nunits.to_constant ();
6441 vec_perm_builder sel (count, count, 1);
6442 for (i = 0; i < (unsigned int) count; ++i)
6443 sel.quick_push (i | (count / 2));
3bab6342 6444
4d694b27 6445 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6446 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6447 indices);
3bab6342
AT
6448 gcc_assert (perm_mask != NULL_TREE);
6449 }
4d694b27 6450 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6451 {
3bab6342
AT
6452 modifier = NARROW;
6453
4d694b27
RS
6454 /* Currently gathers and scatters are only supported for
6455 fixed-length vectors. */
6456 unsigned int count = nunits.to_constant ();
6457 vec_perm_builder sel (count, count, 1);
6458 for (i = 0; i < (unsigned int) count; ++i)
6459 sel.quick_push (i | (count / 2));
3bab6342 6460
4d694b27 6461 vec_perm_indices indices (sel, 2, count);
e3342de4 6462 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6463 gcc_assert (perm_mask != NULL_TREE);
6464 ncopies *= 2;
6465 }
6466 else
6467 gcc_unreachable ();
6468
134c85ca 6469 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6470 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6471 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6472 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6473 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6474 scaletype = TREE_VALUE (arglist);
6475
6476 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6477 && TREE_CODE (rettype) == VOID_TYPE);
6478
134c85ca 6479 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6480 if (!is_gimple_min_invariant (ptr))
6481 {
6482 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6483 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6484 gcc_assert (!new_bb);
6485 }
6486
6487 /* Currently we support only unconditional scatter stores,
6488 so mask should be all ones. */
6489 mask = build_int_cst (masktype, -1);
6490 mask = vect_init_vector (stmt, mask, masktype, NULL);
6491
134c85ca 6492 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6493
6494 prev_stmt_info = NULL;
6495 for (j = 0; j < ncopies; ++j)
6496 {
6497 if (j == 0)
6498 {
6499 src = vec_oprnd1
c3a8f964 6500 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6501 op = vec_oprnd0
134c85ca 6502 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6503 }
6504 else if (modifier != NONE && (j & 1))
6505 {
6506 if (modifier == WIDEN)
6507 {
6508 src = vec_oprnd1
929b4411 6509 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6510 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6511 stmt, gsi);
6512 }
6513 else if (modifier == NARROW)
6514 {
6515 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6516 stmt, gsi);
6517 op = vec_oprnd0
134c85ca
RS
6518 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6519 vec_oprnd0);
3bab6342
AT
6520 }
6521 else
6522 gcc_unreachable ();
6523 }
6524 else
6525 {
6526 src = vec_oprnd1
929b4411 6527 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6528 op = vec_oprnd0
134c85ca
RS
6529 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6530 vec_oprnd0);
3bab6342
AT
6531 }
6532
6533 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6534 {
928686b1
RS
6535 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6536 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6537 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6538 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6539 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6541 src = var;
6542 }
6543
6544 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6545 {
928686b1
RS
6546 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6547 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6548 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6549 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6550 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6551 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6552 op = var;
6553 }
6554
6555 new_stmt
134c85ca 6556 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6557
6558 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6559
6560 if (prev_stmt_info == NULL)
6561 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6562 else
6563 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6564 prev_stmt_info = vinfo_for_stmt (new_stmt);
6565 }
6566 return true;
6567 }
6568
f307441a 6569 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6570 {
f307441a
RS
6571 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6572 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6573 }
ebfd146a 6574
f307441a
RS
6575 if (grouped_store)
6576 {
ebfd146a 6577 /* FORNOW */
a70d6342 6578 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6579
6580 /* We vectorize all the stmts of the interleaving group when we
6581 reach the last stmt in the group. */
e14c1050
IR
6582 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6583 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6584 && !slp)
6585 {
6586 *vec_stmt = NULL;
6587 return true;
6588 }
6589
6590 if (slp)
4b5caab7 6591 {
0d0293ac 6592 grouped_store = false;
4b5caab7
IR
6593 /* VEC_NUM is the number of vect stmts to be created for this
6594 group. */
6595 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6596 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6597 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6598 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6599 op = vect_get_store_rhs (first_stmt);
4b5caab7 6600 }
ebfd146a 6601 else
4b5caab7
IR
6602 /* VEC_NUM is the number of vect stmts to be created for this
6603 group. */
ebfd146a 6604 vec_num = group_size;
44fc7854
BE
6605
6606 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6607 }
b8698a0f 6608 else
7cfb4d93 6609 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6610
73fbfcad 6611 if (dump_enabled_p ())
78c60e3d 6612 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6613 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6614
2de001ee
RS
6615 if (memory_access_type == VMAT_ELEMENTWISE
6616 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6617 {
6618 gimple_stmt_iterator incr_gsi;
6619 bool insert_after;
355fe088 6620 gimple *incr;
f2e2a985
MM
6621 tree offvar;
6622 tree ivstep;
6623 tree running_off;
f2e2a985
MM
6624 tree stride_base, stride_step, alias_off;
6625 tree vec_oprnd;
f502d50e 6626 unsigned int g;
4d694b27
RS
6627 /* Checked by get_load_store_type. */
6628 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6629
7cfb4d93 6630 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6631 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6632
6633 stride_base
6634 = fold_build_pointer_plus
b210f45f 6635 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6636 size_binop (PLUS_EXPR,
b210f45f 6637 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6638 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6639 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6640
6641 /* For a store with loop-invariant (but other than power-of-2)
6642 stride (i.e. not a grouped access) like so:
6643
6644 for (i = 0; i < n; i += stride)
6645 array[i] = ...;
6646
6647 we generate a new induction variable and new stores from
6648 the components of the (vectorized) rhs:
6649
6650 for (j = 0; ; j += VF*stride)
6651 vectemp = ...;
6652 tmp1 = vectemp[0];
6653 array[j] = tmp1;
6654 tmp2 = vectemp[1];
6655 array[j + stride] = tmp2;
6656 ...
6657 */
6658
4d694b27 6659 unsigned nstores = const_nunits;
b17dc4d4 6660 unsigned lnel = 1;
cee62fee 6661 tree ltype = elem_type;
04199738 6662 tree lvectype = vectype;
cee62fee
MM
6663 if (slp)
6664 {
4d694b27
RS
6665 if (group_size < const_nunits
6666 && const_nunits % group_size == 0)
b17dc4d4 6667 {
4d694b27 6668 nstores = const_nunits / group_size;
b17dc4d4
RB
6669 lnel = group_size;
6670 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6671 lvectype = vectype;
6672
6673 /* First check if vec_extract optab doesn't support extraction
6674 of vector elts directly. */
b397965c 6675 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6676 machine_mode vmode;
6677 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6678 || !VECTOR_MODE_P (vmode)
414fef4e 6679 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6680 || (convert_optab_handler (vec_extract_optab,
6681 TYPE_MODE (vectype), vmode)
6682 == CODE_FOR_nothing))
6683 {
6684 /* Try to avoid emitting an extract of vector elements
6685 by performing the extracts using an integer type of the
6686 same size, extracting from a vector of those and then
6687 re-interpreting it as the original vector type if
6688 supported. */
6689 unsigned lsize
6690 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6691 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6692 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6693 /* If we can't construct such a vector fall back to
6694 element extracts from the original vector type and
6695 element size stores. */
4d694b27 6696 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6697 && VECTOR_MODE_P (vmode)
414fef4e 6698 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6699 && (convert_optab_handler (vec_extract_optab,
6700 vmode, elmode)
6701 != CODE_FOR_nothing))
6702 {
4d694b27 6703 nstores = lnunits;
04199738
RB
6704 lnel = group_size;
6705 ltype = build_nonstandard_integer_type (lsize, 1);
6706 lvectype = build_vector_type (ltype, nstores);
6707 }
6708 /* Else fall back to vector extraction anyway.
6709 Fewer stores are more important than avoiding spilling
6710 of the vector we extract from. Compared to the
6711 construction case in vectorizable_load no store-forwarding
6712 issue exists here for reasonable archs. */
6713 }
b17dc4d4 6714 }
4d694b27
RS
6715 else if (group_size >= const_nunits
6716 && group_size % const_nunits == 0)
b17dc4d4
RB
6717 {
6718 nstores = 1;
4d694b27 6719 lnel = const_nunits;
b17dc4d4 6720 ltype = vectype;
04199738 6721 lvectype = vectype;
b17dc4d4 6722 }
cee62fee
MM
6723 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6724 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6725 }
6726
f2e2a985
MM
6727 ivstep = stride_step;
6728 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6729 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6730
6731 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6732
b210f45f
RB
6733 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6734 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6735 create_iv (stride_base, ivstep, NULL,
6736 loop, &incr_gsi, insert_after,
6737 &offvar, NULL);
6738 incr = gsi_stmt (incr_gsi);
310213d4 6739 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6740
b210f45f 6741 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6742
6743 prev_stmt_info = NULL;
44fc7854 6744 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6745 next_stmt = first_stmt;
6746 for (g = 0; g < group_size; g++)
f2e2a985 6747 {
f502d50e
MM
6748 running_off = offvar;
6749 if (g)
f2e2a985 6750 {
f502d50e
MM
6751 tree size = TYPE_SIZE_UNIT (ltype);
6752 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6753 size);
f502d50e 6754 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6755 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6756 running_off, pos);
f2e2a985 6757 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6758 running_off = newoff;
f502d50e 6759 }
b17dc4d4
RB
6760 unsigned int group_el = 0;
6761 unsigned HOST_WIDE_INT
6762 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6763 for (j = 0; j < ncopies; j++)
6764 {
c3a8f964 6765 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6766 and first_stmt == stmt. */
6767 if (j == 0)
6768 {
6769 if (slp)
6770 {
6771 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6772 slp_node);
f502d50e
MM
6773 vec_oprnd = vec_oprnds[0];
6774 }
6775 else
6776 {
c3a8f964 6777 op = vect_get_store_rhs (next_stmt);
81c40241 6778 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6779 }
6780 }
f2e2a985 6781 else
f502d50e
MM
6782 {
6783 if (slp)
6784 vec_oprnd = vec_oprnds[j];
6785 else
c079cbac 6786 {
929b4411
RS
6787 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6788 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6789 vec_oprnd);
c079cbac 6790 }
f502d50e 6791 }
04199738
RB
6792 /* Pun the vector to extract from if necessary. */
6793 if (lvectype != vectype)
6794 {
6795 tree tem = make_ssa_name (lvectype);
6796 gimple *pun
6797 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6798 lvectype, vec_oprnd));
6799 vect_finish_stmt_generation (stmt, pun, gsi);
6800 vec_oprnd = tem;
6801 }
f502d50e
MM
6802 for (i = 0; i < nstores; i++)
6803 {
6804 tree newref, newoff;
355fe088 6805 gimple *incr, *assign;
f502d50e
MM
6806 tree size = TYPE_SIZE (ltype);
6807 /* Extract the i'th component. */
6808 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6809 bitsize_int (i), size);
6810 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6811 size, pos);
6812
6813 elem = force_gimple_operand_gsi (gsi, elem, true,
6814 NULL_TREE, true,
6815 GSI_SAME_STMT);
6816
b17dc4d4
RB
6817 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6818 group_el * elsz);
f502d50e 6819 newref = build2 (MEM_REF, ltype,
b17dc4d4 6820 running_off, this_off);
19986382 6821 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6822
6823 /* And store it to *running_off. */
6824 assign = gimple_build_assign (newref, elem);
6825 vect_finish_stmt_generation (stmt, assign, gsi);
6826
b17dc4d4
RB
6827 group_el += lnel;
6828 if (! slp
6829 || group_el == group_size)
6830 {
6831 newoff = copy_ssa_name (running_off, NULL);
6832 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6833 running_off, stride_step);
6834 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6835
b17dc4d4
RB
6836 running_off = newoff;
6837 group_el = 0;
6838 }
225ce44b
RB
6839 if (g == group_size - 1
6840 && !slp)
f502d50e
MM
6841 {
6842 if (j == 0 && i == 0)
225ce44b
RB
6843 STMT_VINFO_VEC_STMT (stmt_info)
6844 = *vec_stmt = assign;
f502d50e
MM
6845 else
6846 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6847 prev_stmt_info = vinfo_for_stmt (assign);
6848 }
6849 }
f2e2a985 6850 }
f502d50e 6851 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6852 if (slp)
6853 break;
f2e2a985 6854 }
778dd3b6
RB
6855
6856 vec_oprnds.release ();
f2e2a985
MM
6857 return true;
6858 }
6859
8c681247 6860 auto_vec<tree> dr_chain (group_size);
9771b263 6861 oprnds.create (group_size);
ebfd146a 6862
720f5239 6863 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6864 gcc_assert (alignment_support_scheme);
70088b95
RS
6865 vec_loop_masks *loop_masks
6866 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6867 ? &LOOP_VINFO_MASKS (loop_vinfo)
6868 : NULL);
272c6793 6869 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6870 realignment. vect_supportable_dr_alignment always returns either
6871 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6872 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6873 && !mask
70088b95 6874 && !loop_masks)
272c6793
RS
6875 || alignment_support_scheme == dr_aligned
6876 || alignment_support_scheme == dr_unaligned_supported);
6877
62da9e14
RS
6878 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6879 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6880 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6881
f307441a
RS
6882 tree bump;
6883 tree vec_offset = NULL_TREE;
6884 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6885 {
6886 aggr_type = NULL_TREE;
6887 bump = NULL_TREE;
6888 }
6889 else if (memory_access_type == VMAT_GATHER_SCATTER)
6890 {
6891 aggr_type = elem_type;
6892 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6893 &bump, &vec_offset);
6894 }
272c6793 6895 else
f307441a
RS
6896 {
6897 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6898 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6899 else
6900 aggr_type = vectype;
6901 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6902 }
ebfd146a 6903
c3a8f964
RS
6904 if (mask)
6905 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6906
ebfd146a
IR
6907 /* In case the vectorization factor (VF) is bigger than the number
6908 of elements that we can fit in a vectype (nunits), we have to generate
6909 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6910 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6911 vect_get_vec_def_for_copy_stmt. */
6912
0d0293ac 6913 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6914
6915 S1: &base + 2 = x2
6916 S2: &base = x0
6917 S3: &base + 1 = x1
6918 S4: &base + 3 = x3
6919
6920 We create vectorized stores starting from base address (the access of the
6921 first stmt in the chain (S2 in the above example), when the last store stmt
6922 of the chain (S4) is reached:
6923
6924 VS1: &base = vx2
6925 VS2: &base + vec_size*1 = vx0
6926 VS3: &base + vec_size*2 = vx1
6927 VS4: &base + vec_size*3 = vx3
6928
6929 Then permutation statements are generated:
6930
3fcc1b55
JJ
6931 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6932 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6933 ...
b8698a0f 6934
ebfd146a
IR
6935 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6936 (the order of the data-refs in the output of vect_permute_store_chain
6937 corresponds to the order of scalar stmts in the interleaving chain - see
6938 the documentation of vect_permute_store_chain()).
6939
6940 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6941 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6942 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6943 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6944 */
6945
6946 prev_stmt_info = NULL;
c3a8f964 6947 tree vec_mask = NULL_TREE;
ebfd146a
IR
6948 for (j = 0; j < ncopies; j++)
6949 {
ebfd146a
IR
6950
6951 if (j == 0)
6952 {
6953 if (slp)
6954 {
6955 /* Get vectorized arguments for SLP_NODE. */
d092494c 6956 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6957 NULL, slp_node);
ebfd146a 6958
9771b263 6959 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6960 }
6961 else
6962 {
b8698a0f
L
6963 /* For interleaved stores we collect vectorized defs for all the
6964 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6965 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6966 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6967
0d0293ac 6968 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6969 OPRNDS are of size 1. */
b8698a0f 6970 next_stmt = first_stmt;
ebfd146a
IR
6971 for (i = 0; i < group_size; i++)
6972 {
b8698a0f
L
6973 /* Since gaps are not supported for interleaved stores,
6974 GROUP_SIZE is the exact number of stmts in the chain.
6975 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6976 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a 6977 iteration of the loop will be executed. */
c3a8f964 6978 op = vect_get_store_rhs (next_stmt);
81c40241 6979 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6980 dr_chain.quick_push (vec_oprnd);
6981 oprnds.quick_push (vec_oprnd);
e14c1050 6982 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6983 }
c3a8f964
RS
6984 if (mask)
6985 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6986 mask_vectype);
ebfd146a
IR
6987 }
6988
6989 /* We should have catched mismatched types earlier. */
6990 gcc_assert (useless_type_conversion_p (vectype,
6991 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6992 bool simd_lane_access_p
6993 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6994 if (simd_lane_access_p
6995 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6996 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6997 && integer_zerop (DR_OFFSET (first_dr))
6998 && integer_zerop (DR_INIT (first_dr))
6999 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7000 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
7001 {
7002 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7003 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7004 inv_p = false;
74bf76ed 7005 }
f307441a
RS
7006 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7007 {
7008 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7009 &dataref_ptr, &vec_offset);
7010 inv_p = false;
7011 }
74bf76ed
JJ
7012 else
7013 dataref_ptr
7014 = vect_create_data_ref_ptr (first_stmt, aggr_type,
7015 simd_lane_access_p ? loop : NULL,
09dfa495 7016 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
7017 simd_lane_access_p, &inv_p,
7018 NULL_TREE, bump);
a70d6342 7019 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 7020 }
b8698a0f 7021 else
ebfd146a 7022 {
b8698a0f
L
7023 /* For interleaved stores we created vectorized defs for all the
7024 defs stored in OPRNDS in the previous iteration (previous copy).
7025 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
7026 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7027 next copy.
0d0293ac 7028 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
7029 OPRNDS are of size 1. */
7030 for (i = 0; i < group_size; i++)
7031 {
9771b263 7032 op = oprnds[i];
929b4411
RS
7033 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
7034 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
7035 dr_chain[i] = vec_oprnd;
7036 oprnds[i] = vec_oprnd;
ebfd146a 7037 }
c3a8f964 7038 if (mask)
929b4411 7039 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
7040 if (dataref_offset)
7041 dataref_offset
f307441a
RS
7042 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7043 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7044 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7045 vec_offset);
74bf76ed
JJ
7046 else
7047 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7048 bump);
ebfd146a
IR
7049 }
7050
2de001ee 7051 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7052 {
272c6793 7053 tree vec_array;
267d3070 7054
3ba4ff41 7055 /* Get an array into which we can store the individual vectors. */
272c6793 7056 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7057
7058 /* Invalidate the current contents of VEC_ARRAY. This should
7059 become an RTL clobber too, which prevents the vector registers
7060 from being upward-exposed. */
7061 vect_clobber_variable (stmt, gsi, vec_array);
7062
7063 /* Store the individual vectors into the array. */
272c6793 7064 for (i = 0; i < vec_num; i++)
c2d7ab2a 7065 {
9771b263 7066 vec_oprnd = dr_chain[i];
272c6793 7067 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7068 }
b8698a0f 7069
7cfb4d93 7070 tree final_mask = NULL;
70088b95
RS
7071 if (loop_masks)
7072 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7073 vectype, j);
7cfb4d93
RS
7074 if (vec_mask)
7075 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7076 vec_mask, gsi);
7077
7e11fc7f 7078 gcall *call;
7cfb4d93 7079 if (final_mask)
7e11fc7f
RS
7080 {
7081 /* Emit:
7082 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7083 VEC_ARRAY). */
7084 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7085 tree alias_ptr = build_int_cst (ref_type, align);
7086 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7087 dataref_ptr, alias_ptr,
7cfb4d93 7088 final_mask, vec_array);
7e11fc7f
RS
7089 }
7090 else
7091 {
7092 /* Emit:
7093 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7094 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7095 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7096 vec_array);
7097 gimple_call_set_lhs (call, data_ref);
7098 }
a844293d
RS
7099 gimple_call_set_nothrow (call, true);
7100 new_stmt = call;
267d3070 7101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7102
7103 /* Record that VEC_ARRAY is now dead. */
7104 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7105 }
7106 else
7107 {
7108 new_stmt = NULL;
0d0293ac 7109 if (grouped_store)
272c6793 7110 {
b6b9227d
JJ
7111 if (j == 0)
7112 result_chain.create (group_size);
272c6793
RS
7113 /* Permute. */
7114 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7115 &result_chain);
7116 }
c2d7ab2a 7117
272c6793
RS
7118 next_stmt = first_stmt;
7119 for (i = 0; i < vec_num; i++)
7120 {
644ffefd 7121 unsigned align, misalign;
272c6793 7122
7cfb4d93 7123 tree final_mask = NULL_TREE;
70088b95
RS
7124 if (loop_masks)
7125 final_mask = vect_get_loop_mask (gsi, loop_masks,
7126 vec_num * ncopies,
7cfb4d93
RS
7127 vectype, vec_num * j + i);
7128 if (vec_mask)
7129 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7130 vec_mask, gsi);
7131
f307441a
RS
7132 if (memory_access_type == VMAT_GATHER_SCATTER)
7133 {
7134 tree scale = size_int (gs_info.scale);
7135 gcall *call;
70088b95 7136 if (loop_masks)
f307441a
RS
7137 call = gimple_build_call_internal
7138 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7139 scale, vec_oprnd, final_mask);
7140 else
7141 call = gimple_build_call_internal
7142 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7143 scale, vec_oprnd);
7144 gimple_call_set_nothrow (call, true);
7145 new_stmt = call;
7146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7147 break;
7148 }
7149
272c6793
RS
7150 if (i > 0)
7151 /* Bump the vector pointer. */
7152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7153 stmt, bump);
272c6793
RS
7154
7155 if (slp)
9771b263 7156 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7157 else if (grouped_store)
7158 /* For grouped stores vectorized defs are interleaved in
272c6793 7159 vect_permute_store_chain(). */
9771b263 7160 vec_oprnd = result_chain[i];
272c6793 7161
f702e7d4 7162 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7163 if (aligned_access_p (first_dr))
644ffefd 7164 misalign = 0;
272c6793
RS
7165 else if (DR_MISALIGNMENT (first_dr) == -1)
7166 {
25f68d90 7167 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7168 misalign = 0;
272c6793
RS
7169 }
7170 else
c3a8f964 7171 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7172 if (dataref_offset == NULL_TREE
7173 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7174 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7175 misalign);
c2d7ab2a 7176
62da9e14 7177 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7178 {
7179 tree perm_mask = perm_mask_for_reverse (vectype);
7180 tree perm_dest
c3a8f964 7181 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7182 vectype);
b731b390 7183 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7184
7185 /* Generate the permute statement. */
355fe088 7186 gimple *perm_stmt
0d0e4a03
JJ
7187 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7188 vec_oprnd, perm_mask);
09dfa495
BM
7189 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7190
7191 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7192 vec_oprnd = new_temp;
7193 }
7194
272c6793 7195 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7196 if (final_mask)
c3a8f964
RS
7197 {
7198 align = least_bit_hwi (misalign | align);
7199 tree ptr = build_int_cst (ref_type, align);
7200 gcall *call
7201 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7202 dataref_ptr, ptr,
7cfb4d93 7203 final_mask, vec_oprnd);
c3a8f964
RS
7204 gimple_call_set_nothrow (call, true);
7205 new_stmt = call;
7206 }
7207 else
7208 {
7209 data_ref = fold_build2 (MEM_REF, vectype,
7210 dataref_ptr,
7211 dataref_offset
7212 ? dataref_offset
7213 : build_int_cst (ref_type, 0));
7214 if (aligned_access_p (first_dr))
7215 ;
7216 else if (DR_MISALIGNMENT (first_dr) == -1)
7217 TREE_TYPE (data_ref)
7218 = build_aligned_type (TREE_TYPE (data_ref),
7219 align * BITS_PER_UNIT);
7220 else
7221 TREE_TYPE (data_ref)
7222 = build_aligned_type (TREE_TYPE (data_ref),
7223 TYPE_ALIGN (elem_type));
19986382 7224 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7225 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7226 }
272c6793 7227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7228
7229 if (slp)
7230 continue;
7231
e14c1050 7232 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7233 if (!next_stmt)
7234 break;
7235 }
ebfd146a 7236 }
1da0876c
RS
7237 if (!slp)
7238 {
7239 if (j == 0)
7240 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7241 else
7242 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7243 prev_stmt_info = vinfo_for_stmt (new_stmt);
7244 }
ebfd146a
IR
7245 }
7246
9771b263
DN
7247 oprnds.release ();
7248 result_chain.release ();
7249 vec_oprnds.release ();
ebfd146a
IR
7250
7251 return true;
7252}
7253
557be5a8
AL
7254/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7255 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7256 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7257 vect_gen_perm_mask_checked. */
a1e53f3f 7258
3fcc1b55 7259tree
4aae3cb3 7260vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7261{
b00cb3bf 7262 tree mask_type;
a1e53f3f 7263
0ecc2b7d
RS
7264 poly_uint64 nunits = sel.length ();
7265 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7266
7267 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7268 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7269}
7270
7ac7e286 7271/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7272 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7273
7274tree
4aae3cb3 7275vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7276{
7ac7e286 7277 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7278 return vect_gen_perm_mask_any (vectype, sel);
7279}
7280
aec7ae7d
JJ
7281/* Given a vector variable X and Y, that was generated for the scalar
7282 STMT, generate instructions to permute the vector elements of X and Y
7283 using permutation mask MASK_VEC, insert them at *GSI and return the
7284 permuted vector variable. */
a1e53f3f
L
7285
7286static tree
355fe088 7287permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7288 gimple_stmt_iterator *gsi)
a1e53f3f
L
7289{
7290 tree vectype = TREE_TYPE (x);
aec7ae7d 7291 tree perm_dest, data_ref;
355fe088 7292 gimple *perm_stmt;
a1e53f3f 7293
7ad429a4
RS
7294 tree scalar_dest = gimple_get_lhs (stmt);
7295 if (TREE_CODE (scalar_dest) == SSA_NAME)
7296 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7297 else
7298 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7299 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7300
7301 /* Generate the permute statement. */
0d0e4a03 7302 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7303 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7304
7305 return data_ref;
7306}
7307
6b916b36
RB
7308/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7309 inserting them on the loops preheader edge. Returns true if we
7310 were successful in doing so (and thus STMT can be moved then),
7311 otherwise returns false. */
7312
7313static bool
355fe088 7314hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7315{
7316 ssa_op_iter i;
7317 tree op;
7318 bool any = false;
7319
7320 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7321 {
355fe088 7322 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7323 if (!gimple_nop_p (def_stmt)
7324 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7325 {
7326 /* Make sure we don't need to recurse. While we could do
7327 so in simple cases when there are more complex use webs
7328 we don't have an easy way to preserve stmt order to fulfil
7329 dependencies within them. */
7330 tree op2;
7331 ssa_op_iter i2;
d1417442
JJ
7332 if (gimple_code (def_stmt) == GIMPLE_PHI)
7333 return false;
6b916b36
RB
7334 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7335 {
355fe088 7336 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7337 if (!gimple_nop_p (def_stmt2)
7338 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7339 return false;
7340 }
7341 any = true;
7342 }
7343 }
7344
7345 if (!any)
7346 return true;
7347
7348 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7349 {
355fe088 7350 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7351 if (!gimple_nop_p (def_stmt)
7352 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7353 {
7354 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7355 gsi_remove (&gsi, false);
7356 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7357 }
7358 }
7359
7360 return true;
7361}
7362
ebfd146a
IR
7363/* vectorizable_load.
7364
b8698a0f
L
7365 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7366 can be vectorized.
7367 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7368 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7369 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7370
7371static bool
355fe088 7372vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7373 slp_tree slp_node, slp_instance slp_node_instance,
7374 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7375{
7376 tree scalar_dest;
7377 tree vec_dest = NULL;
7378 tree data_ref = NULL;
7379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7380 stmt_vec_info prev_stmt_info;
ebfd146a 7381 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7382 struct loop *loop = NULL;
ebfd146a 7383 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7384 bool nested_in_vect_loop = false;
c716e67f 7385 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7386 tree elem_type;
ebfd146a 7387 tree new_temp;
ef4bddc2 7388 machine_mode mode;
355fe088 7389 gimple *new_stmt = NULL;
ebfd146a
IR
7390 tree dummy;
7391 enum dr_alignment_support alignment_support_scheme;
7392 tree dataref_ptr = NULL_TREE;
74bf76ed 7393 tree dataref_offset = NULL_TREE;
355fe088 7394 gimple *ptr_incr = NULL;
ebfd146a 7395 int ncopies;
4d694b27
RS
7396 int i, j;
7397 unsigned int group_size;
7398 poly_uint64 group_gap_adj;
ebfd146a
IR
7399 tree msq = NULL_TREE, lsq;
7400 tree offset = NULL_TREE;
356bbc4c 7401 tree byte_offset = NULL_TREE;
ebfd146a 7402 tree realignment_token = NULL_TREE;
538dd0b7 7403 gphi *phi = NULL;
6e1aa848 7404 vec<tree> dr_chain = vNULL;
0d0293ac 7405 bool grouped_load = false;
355fe088 7406 gimple *first_stmt;
4f0a0218 7407 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7408 bool inv_p;
7409 bool compute_in_loop = false;
7410 struct loop *at_loop;
7411 int vec_num;
7412 bool slp = (slp_node != NULL);
7413 bool slp_perm = false;
a70d6342 7414 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7415 poly_uint64 vf;
272c6793 7416 tree aggr_type;
134c85ca 7417 gather_scatter_info gs_info;
310213d4 7418 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7419 tree ref_type;
929b4411 7420 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7421
465c8c19
JJ
7422 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7423 return false;
7424
66c16fd9
RB
7425 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7426 && ! vec_stmt)
465c8c19
JJ
7427 return false;
7428
c3a8f964
RS
7429 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7430 if (is_gimple_assign (stmt))
7431 {
7432 scalar_dest = gimple_assign_lhs (stmt);
7433 if (TREE_CODE (scalar_dest) != SSA_NAME)
7434 return false;
465c8c19 7435
c3a8f964
RS
7436 tree_code code = gimple_assign_rhs_code (stmt);
7437 if (code != ARRAY_REF
7438 && code != BIT_FIELD_REF
7439 && code != INDIRECT_REF
7440 && code != COMPONENT_REF
7441 && code != IMAGPART_EXPR
7442 && code != REALPART_EXPR
7443 && code != MEM_REF
7444 && TREE_CODE_CLASS (code) != tcc_declaration)
7445 return false;
7446 }
7447 else
7448 {
7449 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7450 if (!call || !gimple_call_internal_p (call))
7451 return false;
7452
7453 internal_fn ifn = gimple_call_internal_fn (call);
7454 if (!internal_load_fn_p (ifn))
c3a8f964 7455 return false;
465c8c19 7456
c3a8f964
RS
7457 scalar_dest = gimple_call_lhs (call);
7458 if (!scalar_dest)
7459 return false;
7460
7461 if (slp_node != NULL)
7462 {
7463 if (dump_enabled_p ())
7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7465 "SLP of masked loads not supported.\n");
7466 return false;
7467 }
7468
bfaa08b7
RS
7469 int mask_index = internal_fn_mask_index (ifn);
7470 if (mask_index >= 0)
7471 {
7472 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7473 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7474 &mask_vectype))
bfaa08b7
RS
7475 return false;
7476 }
c3a8f964 7477 }
465c8c19
JJ
7478
7479 if (!STMT_VINFO_DATA_REF (stmt_info))
7480 return false;
7481
7482 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7483 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7484
a70d6342
IR
7485 if (loop_vinfo)
7486 {
7487 loop = LOOP_VINFO_LOOP (loop_vinfo);
7488 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7489 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7490 }
7491 else
3533e503 7492 vf = 1;
ebfd146a
IR
7493
7494 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7495 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7496 case of SLP. */
fce57248 7497 if (slp)
ebfd146a
IR
7498 ncopies = 1;
7499 else
e8f142e2 7500 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7501
7502 gcc_assert (ncopies >= 1);
7503
7504 /* FORNOW. This restriction should be relaxed. */
7505 if (nested_in_vect_loop && ncopies > 1)
7506 {
73fbfcad 7507 if (dump_enabled_p ())
78c60e3d 7508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7509 "multiple types in nested loop.\n");
ebfd146a
IR
7510 return false;
7511 }
7512
f2556b68
RB
7513 /* Invalidate assumptions made by dependence analysis when vectorization
7514 on the unrolled body effectively re-orders stmts. */
7515 if (ncopies > 1
7516 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7517 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7518 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7519 {
7520 if (dump_enabled_p ())
7521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7522 "cannot perform implicit CSE when unrolling "
7523 "with negative dependence distance\n");
7524 return false;
7525 }
7526
7b7b1813 7527 elem_type = TREE_TYPE (vectype);
947131ba 7528 mode = TYPE_MODE (vectype);
ebfd146a
IR
7529
7530 /* FORNOW. In some cases can vectorize even if data-type not supported
7531 (e.g. - data copies). */
947131ba 7532 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7533 {
73fbfcad 7534 if (dump_enabled_p ())
78c60e3d 7535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7536 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7537 return false;
7538 }
7539
ebfd146a 7540 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7541 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7542 {
0d0293ac 7543 grouped_load = true;
ebfd146a 7544 /* FORNOW */
2de001ee
RS
7545 gcc_assert (!nested_in_vect_loop);
7546 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7547
e14c1050 7548 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 7549 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7550
b1af7da6
RB
7551 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7552 slp_perm = true;
7553
f2556b68
RB
7554 /* Invalidate assumptions made by dependence analysis when vectorization
7555 on the unrolled body effectively re-orders stmts. */
7556 if (!PURE_SLP_STMT (stmt_info)
7557 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7558 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7559 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7560 {
7561 if (dump_enabled_p ())
7562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7563 "cannot perform implicit CSE when performing "
7564 "group loads with negative dependence distance\n");
7565 return false;
7566 }
96bb56b2
RB
7567
7568 /* Similarly when the stmt is a load that is both part of a SLP
7569 instance and a loop vectorized stmt via the same-dr mechanism
7570 we have to give up. */
7571 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7572 && (STMT_SLP_TYPE (stmt_info)
7573 != STMT_SLP_TYPE (vinfo_for_stmt
7574 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7575 {
7576 if (dump_enabled_p ())
7577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7578 "conflicting SLP types for CSEd load\n");
7579 return false;
7580 }
ebfd146a 7581 }
7cfb4d93
RS
7582 else
7583 group_size = 1;
ebfd146a 7584
2de001ee 7585 vect_memory_access_type memory_access_type;
7e11fc7f 7586 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7587 &memory_access_type, &gs_info))
7588 return false;
a1e53f3f 7589
c3a8f964
RS
7590 if (mask)
7591 {
7592 if (memory_access_type == VMAT_CONTIGUOUS)
7593 {
7e11fc7f
RS
7594 machine_mode vec_mode = TYPE_MODE (vectype);
7595 if (!VECTOR_MODE_P (vec_mode)
7596 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7597 TYPE_MODE (mask_vectype), true))
7598 return false;
7599 }
bfaa08b7 7600 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7601 {
7602 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7603 tree masktype
7604 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7605 if (TREE_CODE (masktype) == INTEGER_TYPE)
7606 {
7607 if (dump_enabled_p ())
7608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7609 "masked gather with integer mask not"
7610 " supported.");
7611 return false;
7612 }
7613 }
bfaa08b7
RS
7614 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7615 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7616 {
7617 if (dump_enabled_p ())
7618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7619 "unsupported access type for masked load.\n");
7620 return false;
7621 }
7622 }
7623
ebfd146a
IR
7624 if (!vec_stmt) /* transformation not required. */
7625 {
2de001ee
RS
7626 if (!slp)
7627 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7628
7629 if (loop_vinfo
7630 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7631 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7632 memory_access_type, &gs_info);
7cfb4d93 7633
ebfd146a 7634 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7635 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7636 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7637 return true;
7638 }
7639
2de001ee
RS
7640 if (!slp)
7641 gcc_assert (memory_access_type
7642 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7643
73fbfcad 7644 if (dump_enabled_p ())
78c60e3d 7645 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7646 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7647
67b8dbac 7648 /* Transform. */
ebfd146a 7649
f702e7d4 7650 ensure_base_align (dr);
c716e67f 7651
bfaa08b7 7652 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7653 {
929b4411
RS
7654 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7655 mask_dt);
aec7ae7d
JJ
7656 return true;
7657 }
2de001ee
RS
7658
7659 if (memory_access_type == VMAT_ELEMENTWISE
7660 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7661 {
7662 gimple_stmt_iterator incr_gsi;
7663 bool insert_after;
355fe088 7664 gimple *incr;
7d75abc8 7665 tree offvar;
7d75abc8
MM
7666 tree ivstep;
7667 tree running_off;
9771b263 7668 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7669 tree stride_base, stride_step, alias_off;
4d694b27
RS
7670 /* Checked by get_load_store_type. */
7671 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7672 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7673
7cfb4d93 7674 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7675 gcc_assert (!nested_in_vect_loop);
7d75abc8 7676
b210f45f 7677 if (grouped_load)
44fc7854
BE
7678 {
7679 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7680 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7681 }
ab313a8c 7682 else
44fc7854
BE
7683 {
7684 first_stmt = stmt;
7685 first_dr = dr;
b210f45f
RB
7686 }
7687 if (slp && grouped_load)
7688 {
7689 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7690 ref_type = get_group_alias_ptr_type (first_stmt);
7691 }
7692 else
7693 {
7694 if (grouped_load)
7695 cst_offset
7696 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7697 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7698 group_size = 1;
b210f45f 7699 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7700 }
ab313a8c 7701
14ac6aa2
RB
7702 stride_base
7703 = fold_build_pointer_plus
ab313a8c 7704 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7705 size_binop (PLUS_EXPR,
ab313a8c
RB
7706 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7707 convert_to_ptrofftype (DR_INIT (first_dr))));
7708 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7709
7710 /* For a load with loop-invariant (but other than power-of-2)
7711 stride (i.e. not a grouped access) like so:
7712
7713 for (i = 0; i < n; i += stride)
7714 ... = array[i];
7715
7716 we generate a new induction variable and new accesses to
7717 form a new vector (or vectors, depending on ncopies):
7718
7719 for (j = 0; ; j += VF*stride)
7720 tmp1 = array[j];
7721 tmp2 = array[j + stride];
7722 ...
7723 vectemp = {tmp1, tmp2, ...}
7724 */
7725
ab313a8c
RB
7726 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7727 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7728
7729 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7730
b210f45f
RB
7731 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7732 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7733 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7734 loop, &incr_gsi, insert_after,
7735 &offvar, NULL);
7736 incr = gsi_stmt (incr_gsi);
310213d4 7737 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7738
b210f45f 7739 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7740
7741 prev_stmt_info = NULL;
7742 running_off = offvar;
44fc7854 7743 alias_off = build_int_cst (ref_type, 0);
4d694b27 7744 int nloads = const_nunits;
e09b4c37 7745 int lnel = 1;
7b5fc413 7746 tree ltype = TREE_TYPE (vectype);
ea60dd34 7747 tree lvectype = vectype;
b266b968 7748 auto_vec<tree> dr_chain;
2de001ee 7749 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7750 {
4d694b27 7751 if (group_size < const_nunits)
e09b4c37 7752 {
ff03930a
JJ
7753 /* First check if vec_init optab supports construction from
7754 vector elts directly. */
b397965c 7755 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7756 machine_mode vmode;
7757 if (mode_for_vector (elmode, group_size).exists (&vmode)
7758 && VECTOR_MODE_P (vmode)
414fef4e 7759 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7760 && (convert_optab_handler (vec_init_optab,
7761 TYPE_MODE (vectype), vmode)
7762 != CODE_FOR_nothing))
ea60dd34 7763 {
4d694b27 7764 nloads = const_nunits / group_size;
ea60dd34 7765 lnel = group_size;
ff03930a
JJ
7766 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7767 }
7768 else
7769 {
7770 /* Otherwise avoid emitting a constructor of vector elements
7771 by performing the loads using an integer type of the same
7772 size, constructing a vector of those and then
7773 re-interpreting it as the original vector type.
7774 This avoids a huge runtime penalty due to the general
7775 inability to perform store forwarding from smaller stores
7776 to a larger load. */
7777 unsigned lsize
7778 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7779 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7780 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7781 /* If we can't construct such a vector fall back to
7782 element loads of the original vector type. */
4d694b27 7783 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7784 && VECTOR_MODE_P (vmode)
414fef4e 7785 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7786 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7787 != CODE_FOR_nothing))
7788 {
4d694b27 7789 nloads = lnunits;
ff03930a
JJ
7790 lnel = group_size;
7791 ltype = build_nonstandard_integer_type (lsize, 1);
7792 lvectype = build_vector_type (ltype, nloads);
7793 }
ea60dd34 7794 }
e09b4c37 7795 }
2de001ee 7796 else
e09b4c37 7797 {
ea60dd34 7798 nloads = 1;
4d694b27 7799 lnel = const_nunits;
e09b4c37 7800 ltype = vectype;
e09b4c37 7801 }
2de001ee
RS
7802 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7803 }
bb4e4747
BC
7804 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7805 else if (nloads == 1)
7806 ltype = vectype;
7807
2de001ee
RS
7808 if (slp)
7809 {
66c16fd9
RB
7810 /* For SLP permutation support we need to load the whole group,
7811 not only the number of vector stmts the permutation result
7812 fits in. */
b266b968 7813 if (slp_perm)
66c16fd9 7814 {
d9f21f6a
RS
7815 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7816 variable VF. */
7817 unsigned int const_vf = vf.to_constant ();
4d694b27 7818 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7819 dr_chain.create (ncopies);
7820 }
7821 else
7822 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7823 }
4d694b27 7824 unsigned int group_el = 0;
e09b4c37
RB
7825 unsigned HOST_WIDE_INT
7826 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7827 for (j = 0; j < ncopies; j++)
7828 {
7b5fc413 7829 if (nloads > 1)
e09b4c37
RB
7830 vec_alloc (v, nloads);
7831 for (i = 0; i < nloads; i++)
7b5fc413 7832 {
e09b4c37 7833 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7834 group_el * elsz + cst_offset);
19986382
RB
7835 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7836 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7837 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7838 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7839 if (nloads > 1)
7840 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7841 gimple_assign_lhs (new_stmt));
7842
7843 group_el += lnel;
7844 if (! slp
7845 || group_el == group_size)
7b5fc413 7846 {
e09b4c37
RB
7847 tree newoff = copy_ssa_name (running_off);
7848 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7849 running_off, stride_step);
7b5fc413
RB
7850 vect_finish_stmt_generation (stmt, incr, gsi);
7851
7852 running_off = newoff;
e09b4c37 7853 group_el = 0;
7b5fc413 7854 }
7b5fc413 7855 }
e09b4c37 7856 if (nloads > 1)
7d75abc8 7857 {
ea60dd34
RB
7858 tree vec_inv = build_constructor (lvectype, v);
7859 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7860 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7861 if (lvectype != vectype)
7862 {
7863 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7864 VIEW_CONVERT_EXPR,
7865 build1 (VIEW_CONVERT_EXPR,
7866 vectype, new_temp));
7867 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7868 }
7d75abc8
MM
7869 }
7870
7b5fc413 7871 if (slp)
b266b968 7872 {
b266b968
RB
7873 if (slp_perm)
7874 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7875 else
7876 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7877 }
7d75abc8 7878 else
225ce44b
RB
7879 {
7880 if (j == 0)
7881 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7882 else
7883 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7884 prev_stmt_info = vinfo_for_stmt (new_stmt);
7885 }
7d75abc8 7886 }
b266b968 7887 if (slp_perm)
29afecdf
RB
7888 {
7889 unsigned n_perms;
7890 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7891 slp_node_instance, false, &n_perms);
7892 }
7d75abc8
MM
7893 return true;
7894 }
aec7ae7d 7895
b5ec4de7
RS
7896 if (memory_access_type == VMAT_GATHER_SCATTER
7897 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7898 grouped_load = false;
7899
0d0293ac 7900 if (grouped_load)
ebfd146a 7901 {
e14c1050 7902 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7903 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7904 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7905 without permutation. */
7906 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7907 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7908 /* For BB vectorization always use the first stmt to base
7909 the data ref pointer on. */
7910 if (bb_vinfo)
7911 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7912
ebfd146a 7913 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7914 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7915 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7916 ??? But we can only do so if there is exactly one
7917 as we have no way to get at the rest. Leave the CSE
7918 opportunity alone.
7919 ??? With the group load eventually participating
7920 in multiple different permutations (having multiple
7921 slp nodes which refer to the same group) the CSE
7922 is even wrong code. See PR56270. */
7923 && !slp)
ebfd146a
IR
7924 {
7925 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7926 return true;
7927 }
7928 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7929 group_gap_adj = 0;
ebfd146a
IR
7930
7931 /* VEC_NUM is the number of vect stmts to be created for this group. */
7932 if (slp)
7933 {
0d0293ac 7934 grouped_load = false;
91ff1504
RB
7935 /* For SLP permutation support we need to load the whole group,
7936 not only the number of vector stmts the permutation result
7937 fits in. */
7938 if (slp_perm)
b267968e 7939 {
d9f21f6a
RS
7940 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7941 variable VF. */
7942 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7943 unsigned int const_nunits = nunits.to_constant ();
7944 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7945 group_gap_adj = vf * group_size - nunits * vec_num;
7946 }
91ff1504 7947 else
b267968e
RB
7948 {
7949 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7950 group_gap_adj
7951 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7952 }
a70d6342 7953 }
ebfd146a 7954 else
9b999e8c 7955 vec_num = group_size;
44fc7854
BE
7956
7957 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7958 }
7959 else
7960 {
7961 first_stmt = stmt;
7962 first_dr = dr;
7963 group_size = vec_num = 1;
9b999e8c 7964 group_gap_adj = 0;
44fc7854 7965 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7966 }
7967
720f5239 7968 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7969 gcc_assert (alignment_support_scheme);
70088b95
RS
7970 vec_loop_masks *loop_masks
7971 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7972 ? &LOOP_VINFO_MASKS (loop_vinfo)
7973 : NULL);
7cfb4d93
RS
7974 /* Targets with store-lane instructions must not require explicit
7975 realignment. vect_supportable_dr_alignment always returns either
7976 dr_aligned or dr_unaligned_supported for masked operations. */
7977 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7978 && !mask
70088b95 7979 && !loop_masks)
272c6793
RS
7980 || alignment_support_scheme == dr_aligned
7981 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7982
7983 /* In case the vectorization factor (VF) is bigger than the number
7984 of elements that we can fit in a vectype (nunits), we have to generate
7985 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7986 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7987 from one copy of the vector stmt to the next, in the field
ff802fa1 7988 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7989 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7990 stmts that use the defs of the current stmt. The example below
7991 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7992 need to create 4 vectorized stmts):
ebfd146a
IR
7993
7994 before vectorization:
7995 RELATED_STMT VEC_STMT
7996 S1: x = memref - -
7997 S2: z = x + 1 - -
7998
7999 step 1: vectorize stmt S1:
8000 We first create the vector stmt VS1_0, and, as usual, record a
8001 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8002 Next, we create the vector stmt VS1_1, and record a pointer to
8003 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 8004 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
8005 stmts and pointers:
8006 RELATED_STMT VEC_STMT
8007 VS1_0: vx0 = memref0 VS1_1 -
8008 VS1_1: vx1 = memref1 VS1_2 -
8009 VS1_2: vx2 = memref2 VS1_3 -
8010 VS1_3: vx3 = memref3 - -
8011 S1: x = load - VS1_0
8012 S2: z = x + 1 - -
8013
b8698a0f
L
8014 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8015 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8016 stmt S2. */
8017
0d0293ac 8018 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8019
8020 S1: x2 = &base + 2
8021 S2: x0 = &base
8022 S3: x1 = &base + 1
8023 S4: x3 = &base + 3
8024
b8698a0f 8025 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8026 starting from the access of the first stmt of the chain:
8027
8028 VS1: vx0 = &base
8029 VS2: vx1 = &base + vec_size*1
8030 VS3: vx3 = &base + vec_size*2
8031 VS4: vx4 = &base + vec_size*3
8032
8033 Then permutation statements are generated:
8034
e2c83630
RH
8035 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8036 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8037 ...
8038
8039 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8040 (the order of the data-refs in the output of vect_permute_load_chain
8041 corresponds to the order of scalar stmts in the interleaving chain - see
8042 the documentation of vect_permute_load_chain()).
8043 The generation of permutation stmts and recording them in
0d0293ac 8044 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8045
b8698a0f 8046 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8047 permutation stmts above are created for every copy. The result vector
8048 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8049 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8050
8051 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8052 on a target that supports unaligned accesses (dr_unaligned_supported)
8053 we generate the following code:
8054 p = initial_addr;
8055 indx = 0;
8056 loop {
8057 p = p + indx * vectype_size;
8058 vec_dest = *(p);
8059 indx = indx + 1;
8060 }
8061
8062 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8063 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8064 then generate the following code, in which the data in each iteration is
8065 obtained by two vector loads, one from the previous iteration, and one
8066 from the current iteration:
8067 p1 = initial_addr;
8068 msq_init = *(floor(p1))
8069 p2 = initial_addr + VS - 1;
8070 realignment_token = call target_builtin;
8071 indx = 0;
8072 loop {
8073 p2 = p2 + indx * vectype_size
8074 lsq = *(floor(p2))
8075 vec_dest = realign_load (msq, lsq, realignment_token)
8076 indx = indx + 1;
8077 msq = lsq;
8078 } */
8079
8080 /* If the misalignment remains the same throughout the execution of the
8081 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8082 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8083 This can only occur when vectorizing memory accesses in the inner-loop
8084 nested within an outer-loop that is being vectorized. */
8085
d1e4b493 8086 if (nested_in_vect_loop
cf098191
RS
8087 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8088 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8089 {
8090 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8091 compute_in_loop = true;
8092 }
8093
8094 if ((alignment_support_scheme == dr_explicit_realign_optimized
8095 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8096 && !compute_in_loop)
ebfd146a
IR
8097 {
8098 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8099 alignment_support_scheme, NULL_TREE,
8100 &at_loop);
8101 if (alignment_support_scheme == dr_explicit_realign_optimized)
8102 {
538dd0b7 8103 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8104 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8105 size_one_node);
ebfd146a
IR
8106 }
8107 }
8108 else
8109 at_loop = loop;
8110
62da9e14 8111 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8112 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8113
ab2fc782
RS
8114 tree bump;
8115 tree vec_offset = NULL_TREE;
8116 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8117 {
8118 aggr_type = NULL_TREE;
8119 bump = NULL_TREE;
8120 }
8121 else if (memory_access_type == VMAT_GATHER_SCATTER)
8122 {
8123 aggr_type = elem_type;
8124 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8125 &bump, &vec_offset);
8126 }
272c6793 8127 else
ab2fc782
RS
8128 {
8129 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8130 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8131 else
8132 aggr_type = vectype;
8133 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8134 }
272c6793 8135
c3a8f964 8136 tree vec_mask = NULL_TREE;
ebfd146a 8137 prev_stmt_info = NULL;
4d694b27 8138 poly_uint64 group_elt = 0;
ebfd146a 8139 for (j = 0; j < ncopies; j++)
b8698a0f 8140 {
272c6793 8141 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8142 if (j == 0)
74bf76ed
JJ
8143 {
8144 bool simd_lane_access_p
8145 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8146 if (simd_lane_access_p
8147 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8148 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8149 && integer_zerop (DR_OFFSET (first_dr))
8150 && integer_zerop (DR_INIT (first_dr))
8151 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8152 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8153 && (alignment_support_scheme == dr_aligned
8154 || alignment_support_scheme == dr_unaligned_supported))
8155 {
8156 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8157 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8158 inv_p = false;
74bf76ed 8159 }
4f0a0218
RB
8160 else if (first_stmt_for_drptr
8161 && first_stmt != first_stmt_for_drptr)
8162 {
8163 dataref_ptr
8164 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8165 at_loop, offset, &dummy, gsi,
8166 &ptr_incr, simd_lane_access_p,
ab2fc782 8167 &inv_p, byte_offset, bump);
4f0a0218
RB
8168 /* Adjust the pointer by the difference to first_stmt. */
8169 data_reference_p ptrdr
8170 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8171 tree diff = fold_convert (sizetype,
8172 size_binop (MINUS_EXPR,
8173 DR_INIT (first_dr),
8174 DR_INIT (ptrdr)));
8175 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8176 stmt, diff);
8177 }
bfaa08b7
RS
8178 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8179 {
8180 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8181 &dataref_ptr, &vec_offset);
8182 inv_p = false;
8183 }
74bf76ed
JJ
8184 else
8185 dataref_ptr
8186 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8187 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8188 simd_lane_access_p, &inv_p,
ab2fc782 8189 byte_offset, bump);
c3a8f964
RS
8190 if (mask)
8191 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8192 mask_vectype);
74bf76ed 8193 }
ebfd146a 8194 else
c3a8f964
RS
8195 {
8196 if (dataref_offset)
8197 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8198 bump);
bfaa08b7 8199 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8200 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8201 vec_offset);
c3a8f964 8202 else
ab2fc782
RS
8203 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8204 stmt, bump);
c3a8f964 8205 if (mask)
929b4411 8206 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8207 }
ebfd146a 8208
0d0293ac 8209 if (grouped_load || slp_perm)
9771b263 8210 dr_chain.create (vec_num);
5ce1ee7f 8211
2de001ee 8212 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8213 {
272c6793
RS
8214 tree vec_array;
8215
8216 vec_array = create_vector_array (vectype, vec_num);
8217
7cfb4d93 8218 tree final_mask = NULL_TREE;
70088b95
RS
8219 if (loop_masks)
8220 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8221 vectype, j);
7cfb4d93
RS
8222 if (vec_mask)
8223 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8224 vec_mask, gsi);
8225
7e11fc7f 8226 gcall *call;
7cfb4d93 8227 if (final_mask)
7e11fc7f
RS
8228 {
8229 /* Emit:
8230 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8231 VEC_MASK). */
8232 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8233 tree alias_ptr = build_int_cst (ref_type, align);
8234 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8235 dataref_ptr, alias_ptr,
7cfb4d93 8236 final_mask);
7e11fc7f
RS
8237 }
8238 else
8239 {
8240 /* Emit:
8241 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8242 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8243 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8244 }
a844293d
RS
8245 gimple_call_set_lhs (call, vec_array);
8246 gimple_call_set_nothrow (call, true);
8247 new_stmt = call;
272c6793 8248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8249
272c6793
RS
8250 /* Extract each vector into an SSA_NAME. */
8251 for (i = 0; i < vec_num; i++)
ebfd146a 8252 {
272c6793
RS
8253 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8254 vec_array, i);
9771b263 8255 dr_chain.quick_push (new_temp);
272c6793
RS
8256 }
8257
8258 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8259 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8260
8261 /* Record that VEC_ARRAY is now dead. */
8262 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8263 }
8264 else
8265 {
8266 for (i = 0; i < vec_num; i++)
8267 {
7cfb4d93 8268 tree final_mask = NULL_TREE;
70088b95 8269 if (loop_masks
7cfb4d93 8270 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8271 final_mask = vect_get_loop_mask (gsi, loop_masks,
8272 vec_num * ncopies,
7cfb4d93
RS
8273 vectype, vec_num * j + i);
8274 if (vec_mask)
8275 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8276 vec_mask, gsi);
8277
272c6793
RS
8278 if (i > 0)
8279 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8280 stmt, bump);
272c6793
RS
8281
8282 /* 2. Create the vector-load in the loop. */
8283 switch (alignment_support_scheme)
8284 {
8285 case dr_aligned:
8286 case dr_unaligned_supported:
be1ac4ec 8287 {
644ffefd
MJ
8288 unsigned int align, misalign;
8289
bfaa08b7
RS
8290 if (memory_access_type == VMAT_GATHER_SCATTER)
8291 {
8292 tree scale = size_int (gs_info.scale);
8293 gcall *call;
70088b95 8294 if (loop_masks)
bfaa08b7
RS
8295 call = gimple_build_call_internal
8296 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8297 vec_offset, scale, final_mask);
8298 else
8299 call = gimple_build_call_internal
8300 (IFN_GATHER_LOAD, 3, dataref_ptr,
8301 vec_offset, scale);
8302 gimple_call_set_nothrow (call, true);
8303 new_stmt = call;
8304 data_ref = NULL_TREE;
8305 break;
8306 }
8307
f702e7d4 8308 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8309 if (alignment_support_scheme == dr_aligned)
8310 {
8311 gcc_assert (aligned_access_p (first_dr));
644ffefd 8312 misalign = 0;
272c6793
RS
8313 }
8314 else if (DR_MISALIGNMENT (first_dr) == -1)
8315 {
25f68d90 8316 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8317 misalign = 0;
272c6793
RS
8318 }
8319 else
c3a8f964 8320 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8321 if (dataref_offset == NULL_TREE
8322 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8323 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8324 align, misalign);
c3a8f964 8325
7cfb4d93 8326 if (final_mask)
c3a8f964
RS
8327 {
8328 align = least_bit_hwi (misalign | align);
8329 tree ptr = build_int_cst (ref_type, align);
8330 gcall *call
8331 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8332 dataref_ptr, ptr,
7cfb4d93 8333 final_mask);
c3a8f964
RS
8334 gimple_call_set_nothrow (call, true);
8335 new_stmt = call;
8336 data_ref = NULL_TREE;
8337 }
8338 else
8339 {
8340 data_ref
8341 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8342 dataref_offset
8343 ? dataref_offset
8344 : build_int_cst (ref_type, 0));
8345 if (alignment_support_scheme == dr_aligned)
8346 ;
8347 else if (DR_MISALIGNMENT (first_dr) == -1)
8348 TREE_TYPE (data_ref)
8349 = build_aligned_type (TREE_TYPE (data_ref),
8350 align * BITS_PER_UNIT);
8351 else
8352 TREE_TYPE (data_ref)
8353 = build_aligned_type (TREE_TYPE (data_ref),
8354 TYPE_ALIGN (elem_type));
8355 }
272c6793 8356 break;
be1ac4ec 8357 }
272c6793 8358 case dr_explicit_realign:
267d3070 8359 {
272c6793 8360 tree ptr, bump;
272c6793 8361
d88981fc 8362 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8363
8364 if (compute_in_loop)
8365 msq = vect_setup_realignment (first_stmt, gsi,
8366 &realignment_token,
8367 dr_explicit_realign,
8368 dataref_ptr, NULL);
8369
aed93b23
RB
8370 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8371 ptr = copy_ssa_name (dataref_ptr);
8372 else
8373 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8374 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8375 new_stmt = gimple_build_assign
8376 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8377 build_int_cst
8378 (TREE_TYPE (dataref_ptr),
f702e7d4 8379 -(HOST_WIDE_INT) align));
272c6793
RS
8380 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8381 data_ref
8382 = build2 (MEM_REF, vectype, ptr,
44fc7854 8383 build_int_cst (ref_type, 0));
19986382 8384 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8385 vec_dest = vect_create_destination_var (scalar_dest,
8386 vectype);
8387 new_stmt = gimple_build_assign (vec_dest, data_ref);
8388 new_temp = make_ssa_name (vec_dest, new_stmt);
8389 gimple_assign_set_lhs (new_stmt, new_temp);
8390 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8391 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8392 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8393 msq = new_temp;
8394
d88981fc 8395 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8396 TYPE_SIZE_UNIT (elem_type));
d88981fc 8397 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8398 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8399 new_stmt = gimple_build_assign
8400 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8401 build_int_cst
f702e7d4 8402 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8403 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8404 gimple_assign_set_lhs (new_stmt, ptr);
8405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8406 data_ref
8407 = build2 (MEM_REF, vectype, ptr,
44fc7854 8408 build_int_cst (ref_type, 0));
272c6793 8409 break;
267d3070 8410 }
272c6793 8411 case dr_explicit_realign_optimized:
f702e7d4
RS
8412 {
8413 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8414 new_temp = copy_ssa_name (dataref_ptr);
8415 else
8416 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8417 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8418 new_stmt = gimple_build_assign
8419 (new_temp, BIT_AND_EXPR, dataref_ptr,
8420 build_int_cst (TREE_TYPE (dataref_ptr),
8421 -(HOST_WIDE_INT) align));
8422 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8423 data_ref
8424 = build2 (MEM_REF, vectype, new_temp,
8425 build_int_cst (ref_type, 0));
8426 break;
8427 }
272c6793
RS
8428 default:
8429 gcc_unreachable ();
8430 }
ebfd146a 8431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8432 /* DATA_REF is null if we've already built the statement. */
8433 if (data_ref)
19986382
RB
8434 {
8435 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8436 new_stmt = gimple_build_assign (vec_dest, data_ref);
8437 }
ebfd146a 8438 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8439 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8441
272c6793
RS
8442 /* 3. Handle explicit realignment if necessary/supported.
8443 Create in loop:
8444 vec_dest = realign_load (msq, lsq, realignment_token) */
8445 if (alignment_support_scheme == dr_explicit_realign_optimized
8446 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8447 {
272c6793
RS
8448 lsq = gimple_assign_lhs (new_stmt);
8449 if (!realignment_token)
8450 realignment_token = dataref_ptr;
8451 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8452 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8453 msq, lsq, realignment_token);
272c6793
RS
8454 new_temp = make_ssa_name (vec_dest, new_stmt);
8455 gimple_assign_set_lhs (new_stmt, new_temp);
8456 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8457
8458 if (alignment_support_scheme == dr_explicit_realign_optimized)
8459 {
8460 gcc_assert (phi);
8461 if (i == vec_num - 1 && j == ncopies - 1)
8462 add_phi_arg (phi, lsq,
8463 loop_latch_edge (containing_loop),
9e227d60 8464 UNKNOWN_LOCATION);
272c6793
RS
8465 msq = lsq;
8466 }
ebfd146a 8467 }
ebfd146a 8468
59fd17e3
RB
8469 /* 4. Handle invariant-load. */
8470 if (inv_p && !bb_vinfo)
8471 {
59fd17e3 8472 gcc_assert (!grouped_load);
d1417442
JJ
8473 /* If we have versioned for aliasing or the loop doesn't
8474 have any data dependencies that would preclude this,
8475 then we are sure this is a loop invariant load and
8476 thus we can insert it on the preheader edge. */
8477 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8478 && !nested_in_vect_loop
6b916b36 8479 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8480 {
8481 if (dump_enabled_p ())
8482 {
8483 dump_printf_loc (MSG_NOTE, vect_location,
8484 "hoisting out of the vectorized "
8485 "loop: ");
8486 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8487 }
b731b390 8488 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8489 gsi_insert_on_edge_immediate
8490 (loop_preheader_edge (loop),
8491 gimple_build_assign (tem,
8492 unshare_expr
8493 (gimple_assign_rhs1 (stmt))));
8494 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8495 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8496 set_vinfo_for_stmt (new_stmt,
8497 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8498 }
8499 else
8500 {
8501 gimple_stmt_iterator gsi2 = *gsi;
8502 gsi_next (&gsi2);
8503 new_temp = vect_init_vector (stmt, scalar_dest,
8504 vectype, &gsi2);
34cd48e5 8505 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8506 }
59fd17e3
RB
8507 }
8508
62da9e14 8509 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8510 {
aec7ae7d
JJ
8511 tree perm_mask = perm_mask_for_reverse (vectype);
8512 new_temp = permute_vec_elements (new_temp, new_temp,
8513 perm_mask, stmt, gsi);
ebfd146a
IR
8514 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8515 }
267d3070 8516
272c6793 8517 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8518 vect_transform_grouped_load (). */
8519 if (grouped_load || slp_perm)
9771b263 8520 dr_chain.quick_push (new_temp);
267d3070 8521
272c6793
RS
8522 /* Store vector loads in the corresponding SLP_NODE. */
8523 if (slp && !slp_perm)
9771b263 8524 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8525
8526 /* With SLP permutation we load the gaps as well, without
8527 we need to skip the gaps after we manage to fully load
8528 all elements. group_gap_adj is GROUP_SIZE here. */
8529 group_elt += nunits;
d9f21f6a
RS
8530 if (maybe_ne (group_gap_adj, 0U)
8531 && !slp_perm
8532 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8533 {
d9f21f6a
RS
8534 poly_wide_int bump_val
8535 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8536 * group_gap_adj);
8e6cdc90 8537 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8538 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8539 stmt, bump);
8540 group_elt = 0;
8541 }
272c6793 8542 }
9b999e8c
RB
8543 /* Bump the vector pointer to account for a gap or for excess
8544 elements loaded for a permuted SLP load. */
d9f21f6a 8545 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8546 {
d9f21f6a
RS
8547 poly_wide_int bump_val
8548 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8549 * group_gap_adj);
8e6cdc90 8550 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8551 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8552 stmt, bump);
8553 }
ebfd146a
IR
8554 }
8555
8556 if (slp && !slp_perm)
8557 continue;
8558
8559 if (slp_perm)
8560 {
29afecdf 8561 unsigned n_perms;
01d8bf07 8562 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8563 slp_node_instance, false,
8564 &n_perms))
ebfd146a 8565 {
9771b263 8566 dr_chain.release ();
ebfd146a
IR
8567 return false;
8568 }
8569 }
8570 else
8571 {
0d0293ac 8572 if (grouped_load)
ebfd146a 8573 {
2de001ee 8574 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8575 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8576 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8577 }
8578 else
8579 {
8580 if (j == 0)
8581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8582 else
8583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8584 prev_stmt_info = vinfo_for_stmt (new_stmt);
8585 }
8586 }
9771b263 8587 dr_chain.release ();
ebfd146a
IR
8588 }
8589
ebfd146a
IR
8590 return true;
8591}
8592
8593/* Function vect_is_simple_cond.
b8698a0f 8594
ebfd146a
IR
8595 Input:
8596 LOOP - the loop that is being vectorized.
8597 COND - Condition that is checked for simple use.
8598
e9e1d143
RG
8599 Output:
8600 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8601 *DTS - The def types for the arguments of the comparison
e9e1d143 8602
ebfd146a
IR
8603 Returns whether a COND can be vectorized. Checks whether
8604 condition operands are supportable using vec_is_simple_use. */
8605
87aab9b2 8606static bool
4fc5ebf1 8607vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8608 tree *comp_vectype, enum vect_def_type *dts,
8609 tree vectype)
ebfd146a
IR
8610{
8611 tree lhs, rhs;
e9e1d143 8612 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8613
a414c77f
IE
8614 /* Mask case. */
8615 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8616 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
8617 {
8618 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8619 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 8620 &dts[0], comp_vectype)
a414c77f
IE
8621 || !*comp_vectype
8622 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8623 return false;
8624 return true;
8625 }
8626
ebfd146a
IR
8627 if (!COMPARISON_CLASS_P (cond))
8628 return false;
8629
8630 lhs = TREE_OPERAND (cond, 0);
8631 rhs = TREE_OPERAND (cond, 1);
8632
8633 if (TREE_CODE (lhs) == SSA_NAME)
8634 {
355fe088 8635 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 8636 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
8637 return false;
8638 }
4fc5ebf1
JG
8639 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8640 || TREE_CODE (lhs) == FIXED_CST)
8641 dts[0] = vect_constant_def;
8642 else
ebfd146a
IR
8643 return false;
8644
8645 if (TREE_CODE (rhs) == SSA_NAME)
8646 {
355fe088 8647 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 8648 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
8649 return false;
8650 }
4fc5ebf1
JG
8651 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8652 || TREE_CODE (rhs) == FIXED_CST)
8653 dts[1] = vect_constant_def;
8654 else
ebfd146a
IR
8655 return false;
8656
28b33016 8657 if (vectype1 && vectype2
928686b1
RS
8658 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8659 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8660 return false;
8661
e9e1d143 8662 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8663 /* Invariant comparison. */
4515e413 8664 if (! *comp_vectype && vectype)
8da4c8d8
RB
8665 {
8666 tree scalar_type = TREE_TYPE (lhs);
8667 /* If we can widen the comparison to match vectype do so. */
8668 if (INTEGRAL_TYPE_P (scalar_type)
8669 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8670 TYPE_SIZE (TREE_TYPE (vectype))))
8671 scalar_type = build_nonstandard_integer_type
8672 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8673 TYPE_UNSIGNED (scalar_type));
8674 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8675 }
8676
ebfd146a
IR
8677 return true;
8678}
8679
8680/* vectorizable_condition.
8681
b8698a0f
L
8682 Check if STMT is conditional modify expression that can be vectorized.
8683 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8684 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8685 at GSI.
8686
8687 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8688 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8689 else clause if it is 2).
ebfd146a
IR
8690
8691 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8692
4bbe8262 8693bool
355fe088
TS
8694vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8695 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8696 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8697{
8698 tree scalar_dest = NULL_TREE;
8699 tree vec_dest = NULL_TREE;
01216d27
JJ
8700 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8701 tree then_clause, else_clause;
ebfd146a 8702 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8703 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8704 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8705 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8706 tree vec_compare;
ebfd146a
IR
8707 tree new_temp;
8708 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8709 enum vect_def_type dts[4]
8710 = {vect_unknown_def_type, vect_unknown_def_type,
8711 vect_unknown_def_type, vect_unknown_def_type};
8712 int ndts = 4;
f7e531cf 8713 int ncopies;
01216d27 8714 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8715 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8716 int i, j;
8717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8718 vec<tree> vec_oprnds0 = vNULL;
8719 vec<tree> vec_oprnds1 = vNULL;
8720 vec<tree> vec_oprnds2 = vNULL;
8721 vec<tree> vec_oprnds3 = vNULL;
74946978 8722 tree vec_cmp_type;
a414c77f 8723 bool masked = false;
b8698a0f 8724
f7e531cf
IR
8725 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8726 return false;
8727
bb6c2b68
RS
8728 vect_reduction_type reduction_type
8729 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8730 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8731 {
8732 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8733 return false;
ebfd146a 8734
af29617a
AH
8735 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8736 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8737 && reduc_def))
8738 return false;
ebfd146a 8739
af29617a
AH
8740 /* FORNOW: not yet supported. */
8741 if (STMT_VINFO_LIVE_P (stmt_info))
8742 {
8743 if (dump_enabled_p ())
8744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8745 "value used after loop.\n");
8746 return false;
8747 }
ebfd146a
IR
8748 }
8749
8750 /* Is vectorizable conditional operation? */
8751 if (!is_gimple_assign (stmt))
8752 return false;
8753
8754 code = gimple_assign_rhs_code (stmt);
8755
8756 if (code != COND_EXPR)
8757 return false;
8758
465c8c19 8759 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8760 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8761
fce57248 8762 if (slp_node)
465c8c19
JJ
8763 ncopies = 1;
8764 else
e8f142e2 8765 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8766
8767 gcc_assert (ncopies >= 1);
8768 if (reduc_index && ncopies > 1)
8769 return false; /* FORNOW */
8770
4e71066d
RG
8771 cond_expr = gimple_assign_rhs1 (stmt);
8772 then_clause = gimple_assign_rhs2 (stmt);
8773 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8774
4fc5ebf1 8775 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8776 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8777 || !comp_vectype)
ebfd146a
IR
8778 return false;
8779
81c40241 8780 gimple *def_stmt;
4fc5ebf1 8781 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8782 &vectype1))
8783 return false;
4fc5ebf1 8784 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8785 &vectype2))
ebfd146a 8786 return false;
2947d3b2
IE
8787
8788 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8789 return false;
8790
8791 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8792 return false;
8793
28b33016
IE
8794 masked = !COMPARISON_CLASS_P (cond_expr);
8795 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8796
74946978
MP
8797 if (vec_cmp_type == NULL_TREE)
8798 return false;
784fb9b3 8799
01216d27
JJ
8800 cond_code = TREE_CODE (cond_expr);
8801 if (!masked)
8802 {
8803 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8804 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8805 }
8806
8807 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8808 {
8809 /* Boolean values may have another representation in vectors
8810 and therefore we prefer bit operations over comparison for
8811 them (which also works for scalar masks). We store opcodes
8812 to use in bitop1 and bitop2. Statement is vectorized as
8813 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8814 depending on bitop1 and bitop2 arity. */
8815 switch (cond_code)
8816 {
8817 case GT_EXPR:
8818 bitop1 = BIT_NOT_EXPR;
8819 bitop2 = BIT_AND_EXPR;
8820 break;
8821 case GE_EXPR:
8822 bitop1 = BIT_NOT_EXPR;
8823 bitop2 = BIT_IOR_EXPR;
8824 break;
8825 case LT_EXPR:
8826 bitop1 = BIT_NOT_EXPR;
8827 bitop2 = BIT_AND_EXPR;
8828 std::swap (cond_expr0, cond_expr1);
8829 break;
8830 case LE_EXPR:
8831 bitop1 = BIT_NOT_EXPR;
8832 bitop2 = BIT_IOR_EXPR;
8833 std::swap (cond_expr0, cond_expr1);
8834 break;
8835 case NE_EXPR:
8836 bitop1 = BIT_XOR_EXPR;
8837 break;
8838 case EQ_EXPR:
8839 bitop1 = BIT_XOR_EXPR;
8840 bitop2 = BIT_NOT_EXPR;
8841 break;
8842 default:
8843 return false;
8844 }
8845 cond_code = SSA_NAME;
8846 }
8847
b8698a0f 8848 if (!vec_stmt)
ebfd146a 8849 {
01216d27
JJ
8850 if (bitop1 != NOP_EXPR)
8851 {
8852 machine_mode mode = TYPE_MODE (comp_vectype);
8853 optab optab;
8854
8855 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8856 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8857 return false;
8858
8859 if (bitop2 != NOP_EXPR)
8860 {
8861 optab = optab_for_tree_code (bitop2, comp_vectype,
8862 optab_default);
8863 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8864 return false;
8865 }
8866 }
4fc5ebf1
JG
8867 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8868 cond_code))
8869 {
68435eb2
RB
8870 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8871 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8872 cost_vec);
4fc5ebf1
JG
8873 return true;
8874 }
8875 return false;
ebfd146a
IR
8876 }
8877
f7e531cf
IR
8878 /* Transform. */
8879
8880 if (!slp_node)
8881 {
9771b263
DN
8882 vec_oprnds0.create (1);
8883 vec_oprnds1.create (1);
8884 vec_oprnds2.create (1);
8885 vec_oprnds3.create (1);
f7e531cf 8886 }
ebfd146a
IR
8887
8888 /* Handle def. */
8889 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8890 if (reduction_type != EXTRACT_LAST_REDUCTION)
8891 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8892
8893 /* Handle cond expr. */
a855b1b1
MM
8894 for (j = 0; j < ncopies; j++)
8895 {
bb6c2b68 8896 gimple *new_stmt = NULL;
a855b1b1
MM
8897 if (j == 0)
8898 {
f7e531cf
IR
8899 if (slp_node)
8900 {
00f96dc9
TS
8901 auto_vec<tree, 4> ops;
8902 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8903
a414c77f 8904 if (masked)
01216d27 8905 ops.safe_push (cond_expr);
a414c77f
IE
8906 else
8907 {
01216d27
JJ
8908 ops.safe_push (cond_expr0);
8909 ops.safe_push (cond_expr1);
a414c77f 8910 }
9771b263
DN
8911 ops.safe_push (then_clause);
8912 ops.safe_push (else_clause);
306b0c92 8913 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8914 vec_oprnds3 = vec_defs.pop ();
8915 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8916 if (!masked)
8917 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8918 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8919 }
8920 else
8921 {
355fe088 8922 gimple *gtemp;
a414c77f
IE
8923 if (masked)
8924 {
8925 vec_cond_lhs
8926 = vect_get_vec_def_for_operand (cond_expr, stmt,
8927 comp_vectype);
8928 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8929 &gtemp, &dts[0]);
8930 }
8931 else
8932 {
01216d27
JJ
8933 vec_cond_lhs
8934 = vect_get_vec_def_for_operand (cond_expr0,
8935 stmt, comp_vectype);
8936 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8937
8938 vec_cond_rhs
8939 = vect_get_vec_def_for_operand (cond_expr1,
8940 stmt, comp_vectype);
8941 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8942 }
f7e531cf
IR
8943 if (reduc_index == 1)
8944 vec_then_clause = reduc_def;
8945 else
8946 {
8947 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8948 stmt);
8949 vect_is_simple_use (then_clause, loop_vinfo,
8950 &gtemp, &dts[2]);
f7e531cf
IR
8951 }
8952 if (reduc_index == 2)
8953 vec_else_clause = reduc_def;
8954 else
8955 {
8956 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8957 stmt);
8958 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8959 }
a855b1b1
MM
8960 }
8961 }
8962 else
8963 {
a414c77f
IE
8964 vec_cond_lhs
8965 = vect_get_vec_def_for_stmt_copy (dts[0],
8966 vec_oprnds0.pop ());
8967 if (!masked)
8968 vec_cond_rhs
8969 = vect_get_vec_def_for_stmt_copy (dts[1],
8970 vec_oprnds1.pop ());
8971
a855b1b1 8972 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8973 vec_oprnds2.pop ());
a855b1b1 8974 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8975 vec_oprnds3.pop ());
f7e531cf
IR
8976 }
8977
8978 if (!slp_node)
8979 {
9771b263 8980 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8981 if (!masked)
8982 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8983 vec_oprnds2.quick_push (vec_then_clause);
8984 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8985 }
8986
9dc3f7de 8987 /* Arguments are ready. Create the new vector stmt. */
9771b263 8988 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8989 {
9771b263
DN
8990 vec_then_clause = vec_oprnds2[i];
8991 vec_else_clause = vec_oprnds3[i];
a855b1b1 8992
a414c77f
IE
8993 if (masked)
8994 vec_compare = vec_cond_lhs;
8995 else
8996 {
8997 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8998 if (bitop1 == NOP_EXPR)
8999 vec_compare = build2 (cond_code, vec_cmp_type,
9000 vec_cond_lhs, vec_cond_rhs);
9001 else
9002 {
9003 new_temp = make_ssa_name (vec_cmp_type);
9004 if (bitop1 == BIT_NOT_EXPR)
9005 new_stmt = gimple_build_assign (new_temp, bitop1,
9006 vec_cond_rhs);
9007 else
9008 new_stmt
9009 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9010 vec_cond_rhs);
9011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9012 if (bitop2 == NOP_EXPR)
9013 vec_compare = new_temp;
9014 else if (bitop2 == BIT_NOT_EXPR)
9015 {
9016 /* Instead of doing ~x ? y : z do x ? z : y. */
9017 vec_compare = new_temp;
9018 std::swap (vec_then_clause, vec_else_clause);
9019 }
9020 else
9021 {
9022 vec_compare = make_ssa_name (vec_cmp_type);
9023 new_stmt
9024 = gimple_build_assign (vec_compare, bitop2,
9025 vec_cond_lhs, new_temp);
9026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9027 }
9028 }
a414c77f 9029 }
bb6c2b68
RS
9030 if (reduction_type == EXTRACT_LAST_REDUCTION)
9031 {
9032 if (!is_gimple_val (vec_compare))
9033 {
9034 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9035 new_stmt = gimple_build_assign (vec_compare_name,
9036 vec_compare);
9037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9038 vec_compare = vec_compare_name;
9039 }
9040 gcc_assert (reduc_index == 2);
9041 new_stmt = gimple_build_call_internal
9042 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9043 vec_then_clause);
9044 gimple_call_set_lhs (new_stmt, scalar_dest);
9045 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9046 if (stmt == gsi_stmt (*gsi))
9047 vect_finish_replace_stmt (stmt, new_stmt);
9048 else
9049 {
9050 /* In this case we're moving the definition to later in the
9051 block. That doesn't matter because the only uses of the
9052 lhs are in phi statements. */
9053 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9054 gsi_remove (&old_gsi, true);
9055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9056 }
9057 }
9058 else
9059 {
9060 new_temp = make_ssa_name (vec_dest);
9061 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9062 vec_compare, vec_then_clause,
9063 vec_else_clause);
9064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9065 }
f7e531cf 9066 if (slp_node)
9771b263 9067 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9068 }
9069
9070 if (slp_node)
9071 continue;
9072
9073 if (j == 0)
9074 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9075 else
9076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9077
9078 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9079 }
b8698a0f 9080
9771b263
DN
9081 vec_oprnds0.release ();
9082 vec_oprnds1.release ();
9083 vec_oprnds2.release ();
9084 vec_oprnds3.release ();
f7e531cf 9085
ebfd146a
IR
9086 return true;
9087}
9088
42fd8198
IE
9089/* vectorizable_comparison.
9090
9091 Check if STMT is comparison expression that can be vectorized.
9092 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9093 comparison, put it in VEC_STMT, and insert it at GSI.
9094
9095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9096
fce57248 9097static bool
42fd8198
IE
9098vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9099 gimple **vec_stmt, tree reduc_def,
68435eb2 9100 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9101{
9102 tree lhs, rhs1, rhs2;
9103 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9104 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9105 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9106 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9107 tree new_temp;
9108 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9109 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9110 int ndts = 2;
928686b1 9111 poly_uint64 nunits;
42fd8198 9112 int ncopies;
49e76ff1 9113 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9114 stmt_vec_info prev_stmt_info = NULL;
9115 int i, j;
9116 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9117 vec<tree> vec_oprnds0 = vNULL;
9118 vec<tree> vec_oprnds1 = vNULL;
9119 gimple *def_stmt;
9120 tree mask_type;
9121 tree mask;
9122
c245362b
IE
9123 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9124 return false;
9125
30480bcd 9126 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9127 return false;
9128
9129 mask_type = vectype;
9130 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9131
fce57248 9132 if (slp_node)
42fd8198
IE
9133 ncopies = 1;
9134 else
e8f142e2 9135 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9136
9137 gcc_assert (ncopies >= 1);
42fd8198
IE
9138 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9139 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9140 && reduc_def))
9141 return false;
9142
9143 if (STMT_VINFO_LIVE_P (stmt_info))
9144 {
9145 if (dump_enabled_p ())
9146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9147 "value used after loop.\n");
9148 return false;
9149 }
9150
9151 if (!is_gimple_assign (stmt))
9152 return false;
9153
9154 code = gimple_assign_rhs_code (stmt);
9155
9156 if (TREE_CODE_CLASS (code) != tcc_comparison)
9157 return false;
9158
9159 rhs1 = gimple_assign_rhs1 (stmt);
9160 rhs2 = gimple_assign_rhs2 (stmt);
9161
9162 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9163 &dts[0], &vectype1))
9164 return false;
9165
9166 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9167 &dts[1], &vectype2))
9168 return false;
9169
9170 if (vectype1 && vectype2
928686b1
RS
9171 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9172 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9173 return false;
9174
9175 vectype = vectype1 ? vectype1 : vectype2;
9176
9177 /* Invariant comparison. */
9178 if (!vectype)
9179 {
69a9a66f 9180 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9181 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9182 return false;
9183 }
928686b1 9184 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9185 return false;
9186
49e76ff1
IE
9187 /* Can't compare mask and non-mask types. */
9188 if (vectype1 && vectype2
9189 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9190 return false;
9191
9192 /* Boolean values may have another representation in vectors
9193 and therefore we prefer bit operations over comparison for
9194 them (which also works for scalar masks). We store opcodes
9195 to use in bitop1 and bitop2. Statement is vectorized as
9196 BITOP2 (rhs1 BITOP1 rhs2) or
9197 rhs1 BITOP2 (BITOP1 rhs2)
9198 depending on bitop1 and bitop2 arity. */
9199 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9200 {
9201 if (code == GT_EXPR)
9202 {
9203 bitop1 = BIT_NOT_EXPR;
9204 bitop2 = BIT_AND_EXPR;
9205 }
9206 else if (code == GE_EXPR)
9207 {
9208 bitop1 = BIT_NOT_EXPR;
9209 bitop2 = BIT_IOR_EXPR;
9210 }
9211 else if (code == LT_EXPR)
9212 {
9213 bitop1 = BIT_NOT_EXPR;
9214 bitop2 = BIT_AND_EXPR;
9215 std::swap (rhs1, rhs2);
264d951a 9216 std::swap (dts[0], dts[1]);
49e76ff1
IE
9217 }
9218 else if (code == LE_EXPR)
9219 {
9220 bitop1 = BIT_NOT_EXPR;
9221 bitop2 = BIT_IOR_EXPR;
9222 std::swap (rhs1, rhs2);
264d951a 9223 std::swap (dts[0], dts[1]);
49e76ff1
IE
9224 }
9225 else
9226 {
9227 bitop1 = BIT_XOR_EXPR;
9228 if (code == EQ_EXPR)
9229 bitop2 = BIT_NOT_EXPR;
9230 }
9231 }
9232
42fd8198
IE
9233 if (!vec_stmt)
9234 {
49e76ff1 9235 if (bitop1 == NOP_EXPR)
68435eb2
RB
9236 {
9237 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9238 return false;
9239 }
49e76ff1
IE
9240 else
9241 {
9242 machine_mode mode = TYPE_MODE (vectype);
9243 optab optab;
9244
9245 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9246 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9247 return false;
9248
9249 if (bitop2 != NOP_EXPR)
9250 {
9251 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9252 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9253 return false;
9254 }
49e76ff1 9255 }
68435eb2
RB
9256
9257 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9258 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9259 dts, ndts, slp_node, cost_vec);
9260 return true;
42fd8198
IE
9261 }
9262
9263 /* Transform. */
9264 if (!slp_node)
9265 {
9266 vec_oprnds0.create (1);
9267 vec_oprnds1.create (1);
9268 }
9269
9270 /* Handle def. */
9271 lhs = gimple_assign_lhs (stmt);
9272 mask = vect_create_destination_var (lhs, mask_type);
9273
9274 /* Handle cmp expr. */
9275 for (j = 0; j < ncopies; j++)
9276 {
9277 gassign *new_stmt = NULL;
9278 if (j == 0)
9279 {
9280 if (slp_node)
9281 {
9282 auto_vec<tree, 2> ops;
9283 auto_vec<vec<tree>, 2> vec_defs;
9284
9285 ops.safe_push (rhs1);
9286 ops.safe_push (rhs2);
306b0c92 9287 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9288 vec_oprnds1 = vec_defs.pop ();
9289 vec_oprnds0 = vec_defs.pop ();
9290 }
9291 else
9292 {
e4af0bc4
IE
9293 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9294 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9295 }
9296 }
9297 else
9298 {
9299 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9300 vec_oprnds0.pop ());
9301 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9302 vec_oprnds1.pop ());
9303 }
9304
9305 if (!slp_node)
9306 {
9307 vec_oprnds0.quick_push (vec_rhs1);
9308 vec_oprnds1.quick_push (vec_rhs2);
9309 }
9310
9311 /* Arguments are ready. Create the new vector stmt. */
9312 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9313 {
9314 vec_rhs2 = vec_oprnds1[i];
9315
9316 new_temp = make_ssa_name (mask);
49e76ff1
IE
9317 if (bitop1 == NOP_EXPR)
9318 {
9319 new_stmt = gimple_build_assign (new_temp, code,
9320 vec_rhs1, vec_rhs2);
9321 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9322 }
9323 else
9324 {
9325 if (bitop1 == BIT_NOT_EXPR)
9326 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9327 else
9328 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9329 vec_rhs2);
9330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9331 if (bitop2 != NOP_EXPR)
9332 {
9333 tree res = make_ssa_name (mask);
9334 if (bitop2 == BIT_NOT_EXPR)
9335 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9336 else
9337 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9338 new_temp);
9339 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9340 }
9341 }
42fd8198
IE
9342 if (slp_node)
9343 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9344 }
9345
9346 if (slp_node)
9347 continue;
9348
9349 if (j == 0)
9350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9351 else
9352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9353
9354 prev_stmt_info = vinfo_for_stmt (new_stmt);
9355 }
9356
9357 vec_oprnds0.release ();
9358 vec_oprnds1.release ();
9359
9360 return true;
9361}
ebfd146a 9362
68a0f2ff
RS
9363/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9364 can handle all live statements in the node. Otherwise return true
9365 if STMT is not live or if vectorizable_live_operation can handle it.
9366 GSI and VEC_STMT are as for vectorizable_live_operation. */
9367
9368static bool
9369can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9370 slp_tree slp_node, gimple **vec_stmt,
9371 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9372{
9373 if (slp_node)
9374 {
9375 gimple *slp_stmt;
9376 unsigned int i;
9377 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9378 {
9379 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9380 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9381 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9382 vec_stmt, cost_vec))
68a0f2ff
RS
9383 return false;
9384 }
9385 }
9386 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9387 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9388 cost_vec))
68a0f2ff
RS
9389 return false;
9390
9391 return true;
9392}
9393
8644a673 9394/* Make sure the statement is vectorizable. */
ebfd146a
IR
9395
9396bool
891ad31c 9397vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9398 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9399{
8644a673 9400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9401 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9402 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9403 bool ok;
355fe088 9404 gimple *pattern_stmt;
363477c0 9405 gimple_seq pattern_def_seq;
ebfd146a 9406
73fbfcad 9407 if (dump_enabled_p ())
ebfd146a 9408 {
78c60e3d
SS
9409 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9410 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9411 }
ebfd146a 9412
1825a1f3 9413 if (gimple_has_volatile_ops (stmt))
b8698a0f 9414 {
73fbfcad 9415 if (dump_enabled_p ())
78c60e3d 9416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9417 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9418
9419 return false;
9420 }
b8698a0f
L
9421
9422 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9423 to include:
9424 - the COND_EXPR which is the loop exit condition
9425 - any LABEL_EXPRs in the loop
b8698a0f 9426 - computations that are used only for array indexing or loop control.
8644a673 9427 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9428 instance, therefore, all the statements are relevant.
ebfd146a 9429
d092494c 9430 Pattern statement needs to be analyzed instead of the original statement
83197f37 9431 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9432 statements. In basic blocks we are called from some SLP instance
9433 traversal, don't analyze pattern stmts instead, the pattern stmts
9434 already will be part of SLP instance. */
83197f37
IR
9435
9436 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9437 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9438 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9439 {
9d5e7640 9440 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9441 && pattern_stmt
9d5e7640
IR
9442 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9443 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9444 {
83197f37 9445 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9446 stmt = pattern_stmt;
9447 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9448 if (dump_enabled_p ())
9d5e7640 9449 {
78c60e3d
SS
9450 dump_printf_loc (MSG_NOTE, vect_location,
9451 "==> examining pattern statement: ");
9452 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9453 }
9454 }
9455 else
9456 {
73fbfcad 9457 if (dump_enabled_p ())
e645e942 9458 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9459
9d5e7640
IR
9460 return true;
9461 }
8644a673 9462 }
83197f37 9463 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9464 && node == NULL
83197f37
IR
9465 && pattern_stmt
9466 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9467 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9468 {
9469 /* Analyze PATTERN_STMT too. */
73fbfcad 9470 if (dump_enabled_p ())
83197f37 9471 {
78c60e3d
SS
9472 dump_printf_loc (MSG_NOTE, vect_location,
9473 "==> examining pattern statement: ");
9474 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9475 }
9476
891ad31c 9477 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9478 node_instance, cost_vec))
83197f37
IR
9479 return false;
9480 }
ebfd146a 9481
1107f3ae 9482 if (is_pattern_stmt_p (stmt_info)
079c527f 9483 && node == NULL
363477c0 9484 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 9485 {
363477c0 9486 gimple_stmt_iterator si;
1107f3ae 9487
363477c0
JJ
9488 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9489 {
355fe088 9490 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
9491 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9492 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9493 {
9494 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 9495 if (dump_enabled_p ())
363477c0 9496 {
78c60e3d
SS
9497 dump_printf_loc (MSG_NOTE, vect_location,
9498 "==> examining pattern def statement: ");
9499 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 9500 }
1107f3ae 9501
363477c0 9502 if (!vect_analyze_stmt (pattern_def_stmt,
68435eb2
RB
9503 need_to_vectorize, node, node_instance,
9504 cost_vec))
363477c0
JJ
9505 return false;
9506 }
9507 }
9508 }
1107f3ae 9509
8644a673
IR
9510 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9511 {
9512 case vect_internal_def:
9513 break;
ebfd146a 9514
8644a673 9515 case vect_reduction_def:
7c5222ff 9516 case vect_nested_cycle:
14a61437
RB
9517 gcc_assert (!bb_vinfo
9518 && (relevance == vect_used_in_outer
9519 || relevance == vect_used_in_outer_by_reduction
9520 || relevance == vect_used_by_reduction
b28ead45
AH
9521 || relevance == vect_unused_in_scope
9522 || relevance == vect_used_only_live));
8644a673
IR
9523 break;
9524
9525 case vect_induction_def:
e7baeb39
RB
9526 gcc_assert (!bb_vinfo);
9527 break;
9528
8644a673
IR
9529 case vect_constant_def:
9530 case vect_external_def:
9531 case vect_unknown_def_type:
9532 default:
9533 gcc_unreachable ();
9534 }
ebfd146a 9535
8644a673 9536 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9537 {
8644a673 9538 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9539 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9540 || (is_gimple_call (stmt)
9541 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9542 *need_to_vectorize = true;
ebfd146a
IR
9543 }
9544
b1af7da6
RB
9545 if (PURE_SLP_STMT (stmt_info) && !node)
9546 {
9547 dump_printf_loc (MSG_NOTE, vect_location,
9548 "handled only by SLP analysis\n");
9549 return true;
9550 }
9551
9552 ok = true;
9553 if (!bb_vinfo
9554 && (STMT_VINFO_RELEVANT_P (stmt_info)
9555 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9556 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9557 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9558 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9559 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9560 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9561 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9562 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9563 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9564 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9565 cost_vec)
9566 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9567 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9568 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9569 else
9570 {
9571 if (bb_vinfo)
68435eb2
RB
9572 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9573 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9574 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9575 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9576 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9577 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9578 cost_vec)
9579 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9580 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9581 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9582 cost_vec)
9583 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9584 cost_vec));
b1af7da6 9585 }
8644a673
IR
9586
9587 if (!ok)
ebfd146a 9588 {
73fbfcad 9589 if (dump_enabled_p ())
8644a673 9590 {
78c60e3d
SS
9591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9592 "not vectorized: relevant stmt not ");
9593 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9594 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9595 }
b8698a0f 9596
ebfd146a
IR
9597 return false;
9598 }
9599
8644a673
IR
9600 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9601 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9602 if (!bb_vinfo
9603 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9604 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9605 {
73fbfcad 9606 if (dump_enabled_p ())
8644a673 9607 {
78c60e3d 9608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9609 "not vectorized: live stmt not supported: ");
78c60e3d 9610 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9611 }
b8698a0f 9612
8644a673 9613 return false;
ebfd146a
IR
9614 }
9615
ebfd146a
IR
9616 return true;
9617}
9618
9619
9620/* Function vect_transform_stmt.
9621
9622 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9623
9624bool
355fe088 9625vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9626 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9627 slp_instance slp_node_instance)
9628{
9629 bool is_store = false;
355fe088 9630 gimple *vec_stmt = NULL;
ebfd146a 9631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9632 bool done;
ebfd146a 9633
fce57248 9634 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9635 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9636
e57d9a82
RB
9637 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9638 && nested_in_vect_loop_p
9639 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9640 stmt));
9641
ebfd146a
IR
9642 switch (STMT_VINFO_TYPE (stmt_info))
9643 {
9644 case type_demotion_vec_info_type:
ebfd146a 9645 case type_promotion_vec_info_type:
ebfd146a 9646 case type_conversion_vec_info_type:
68435eb2 9647 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9648 gcc_assert (done);
9649 break;
9650
9651 case induc_vec_info_type:
68435eb2 9652 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9653 gcc_assert (done);
9654 break;
9655
9dc3f7de 9656 case shift_vec_info_type:
68435eb2 9657 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9658 gcc_assert (done);
9659 break;
9660
ebfd146a 9661 case op_vec_info_type:
68435eb2 9662 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9663 gcc_assert (done);
9664 break;
9665
9666 case assignment_vec_info_type:
68435eb2 9667 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9668 gcc_assert (done);
9669 break;
9670
9671 case load_vec_info_type:
b8698a0f 9672 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9673 slp_node_instance, NULL);
ebfd146a
IR
9674 gcc_assert (done);
9675 break;
9676
9677 case store_vec_info_type:
68435eb2 9678 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9679 gcc_assert (done);
0d0293ac 9680 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9681 {
9682 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9683 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9684 one are skipped, and there vec_stmt_info shouldn't be freed
9685 meanwhile. */
0d0293ac 9686 *grouped_store = true;
f307441a
RS
9687 stmt_vec_info group_info
9688 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9689 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
ebfd146a 9690 is_store = true;
f307441a 9691 }
ebfd146a
IR
9692 else
9693 is_store = true;
9694 break;
9695
9696 case condition_vec_info_type:
68435eb2 9697 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9698 gcc_assert (done);
9699 break;
9700
42fd8198 9701 case comparison_vec_info_type:
68435eb2 9702 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9703 gcc_assert (done);
9704 break;
9705
ebfd146a 9706 case call_vec_info_type:
68435eb2 9707 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9708 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9709 break;
9710
0136f8f0 9711 case call_simd_clone_vec_info_type:
68435eb2 9712 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9713 stmt = gsi_stmt (*gsi);
9714 break;
9715
ebfd146a 9716 case reduc_vec_info_type:
891ad31c 9717 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9718 slp_node_instance, NULL);
ebfd146a
IR
9719 gcc_assert (done);
9720 break;
9721
9722 default:
9723 if (!STMT_VINFO_LIVE_P (stmt_info))
9724 {
73fbfcad 9725 if (dump_enabled_p ())
78c60e3d 9726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9727 "stmt not supported.\n");
ebfd146a
IR
9728 gcc_unreachable ();
9729 }
9730 }
9731
225ce44b
RB
9732 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9733 This would break hybrid SLP vectorization. */
9734 if (slp_node)
d90f8440
RB
9735 gcc_assert (!vec_stmt
9736 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9737
ebfd146a
IR
9738 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9739 is being vectorized, but outside the immediately enclosing loop. */
9740 if (vec_stmt
e57d9a82 9741 && nested_p
ebfd146a
IR
9742 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9743 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9744 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9745 vect_used_in_outer_by_reduction))
ebfd146a 9746 {
a70d6342
IR
9747 struct loop *innerloop = LOOP_VINFO_LOOP (
9748 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9749 imm_use_iterator imm_iter;
9750 use_operand_p use_p;
9751 tree scalar_dest;
355fe088 9752 gimple *exit_phi;
ebfd146a 9753
73fbfcad 9754 if (dump_enabled_p ())
78c60e3d 9755 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9756 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9757
9758 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9759 (to be used when vectorizing outer-loop stmts that use the DEF of
9760 STMT). */
9761 if (gimple_code (stmt) == GIMPLE_PHI)
9762 scalar_dest = PHI_RESULT (stmt);
9763 else
9764 scalar_dest = gimple_assign_lhs (stmt);
9765
9766 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9767 {
9768 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9769 {
9770 exit_phi = USE_STMT (use_p);
9771 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9772 }
9773 }
9774 }
9775
9776 /* Handle stmts whose DEF is used outside the loop-nest that is
9777 being vectorized. */
68a0f2ff 9778 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9779 {
68435eb2 9780 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9781 gcc_assert (done);
9782 }
9783
9784 if (vec_stmt)
83197f37 9785 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9786
b8698a0f 9787 return is_store;
ebfd146a
IR
9788}
9789
9790
b8698a0f 9791/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9792 stmt_vec_info. */
9793
9794void
355fe088 9795vect_remove_stores (gimple *first_stmt)
ebfd146a 9796{
355fe088
TS
9797 gimple *next = first_stmt;
9798 gimple *tmp;
ebfd146a
IR
9799 gimple_stmt_iterator next_si;
9800
9801 while (next)
9802 {
78048b1c
JJ
9803 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9804
9805 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9806 if (is_pattern_stmt_p (stmt_info))
9807 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9808 /* Free the attached stmt_vec_info and remove the stmt. */
9809 next_si = gsi_for_stmt (next);
3d3f2249 9810 unlink_stmt_vdef (next);
ebfd146a 9811 gsi_remove (&next_si, true);
3d3f2249 9812 release_defs (next);
ebfd146a
IR
9813 free_stmt_vec_info (next);
9814 next = tmp;
9815 }
9816}
9817
9818
9819/* Function new_stmt_vec_info.
9820
9821 Create and initialize a new stmt_vec_info struct for STMT. */
9822
9823stmt_vec_info
310213d4 9824new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9825{
9826 stmt_vec_info res;
9827 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9828
9829 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9830 STMT_VINFO_STMT (res) = stmt;
310213d4 9831 res->vinfo = vinfo;
8644a673 9832 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9833 STMT_VINFO_LIVE_P (res) = false;
9834 STMT_VINFO_VECTYPE (res) = NULL;
9835 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9836 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9837 STMT_VINFO_IN_PATTERN_P (res) = false;
9838 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9839 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9840 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9841 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9842 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9843
ebfd146a
IR
9844 if (gimple_code (stmt) == GIMPLE_PHI
9845 && is_loop_header_bb_p (gimple_bb (stmt)))
9846 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9847 else
8644a673
IR
9848 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9849
9771b263 9850 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9851 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9852 STMT_VINFO_NUM_SLP_USES (res) = 0;
9853
e14c1050
IR
9854 GROUP_FIRST_ELEMENT (res) = NULL;
9855 GROUP_NEXT_ELEMENT (res) = NULL;
9856 GROUP_SIZE (res) = 0;
9857 GROUP_STORE_COUNT (res) = 0;
9858 GROUP_GAP (res) = 0;
9859 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9860
9861 return res;
9862}
9863
9864
9865/* Create a hash table for stmt_vec_info. */
9866
9867void
9868init_stmt_vec_info_vec (void)
9869{
9771b263
DN
9870 gcc_assert (!stmt_vec_info_vec.exists ());
9871 stmt_vec_info_vec.create (50);
ebfd146a
IR
9872}
9873
9874
9875/* Free hash table for stmt_vec_info. */
9876
9877void
9878free_stmt_vec_info_vec (void)
9879{
93675444 9880 unsigned int i;
3161455c 9881 stmt_vec_info info;
93675444
JJ
9882 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9883 if (info != NULL)
3161455c 9884 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9885 gcc_assert (stmt_vec_info_vec.exists ());
9886 stmt_vec_info_vec.release ();
ebfd146a
IR
9887}
9888
9889
9890/* Free stmt vectorization related info. */
9891
9892void
355fe088 9893free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9894{
9895 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9896
9897 if (!stmt_info)
9898 return;
9899
78048b1c
JJ
9900 /* Check if this statement has a related "pattern stmt"
9901 (introduced by the vectorizer during the pattern recognition
9902 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9903 too. */
9904 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9905 {
9906 stmt_vec_info patt_info
9907 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9908 if (patt_info)
9909 {
363477c0 9910 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9911 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9912 gimple_set_bb (patt_stmt, NULL);
9913 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9914 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9915 release_ssa_name (lhs);
363477c0
JJ
9916 if (seq)
9917 {
9918 gimple_stmt_iterator si;
9919 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9920 {
355fe088 9921 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9922 gimple_set_bb (seq_stmt, NULL);
7532abf2 9923 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9924 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9925 release_ssa_name (lhs);
9926 free_stmt_vec_info (seq_stmt);
9927 }
363477c0 9928 }
f0281fde 9929 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9930 }
9931 }
9932
9771b263 9933 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9934 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9935 set_vinfo_for_stmt (stmt, NULL);
9936 free (stmt_info);
9937}
9938
9939
bb67d9c7 9940/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9941
bb67d9c7 9942 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9943 by the target. */
9944
c803b2a9 9945tree
86e36728 9946get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9947{
c7d97b28 9948 tree orig_scalar_type = scalar_type;
3bd8f481 9949 scalar_mode inner_mode;
ef4bddc2 9950 machine_mode simd_mode;
86e36728 9951 poly_uint64 nunits;
ebfd146a
IR
9952 tree vectype;
9953
3bd8f481
RS
9954 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9955 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9956 return NULL_TREE;
9957
3bd8f481 9958 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9959
7b7b1813
RG
9960 /* For vector types of elements whose mode precision doesn't
9961 match their types precision we use a element type of mode
9962 precision. The vectorization routines will have to make sure
48f2e373
RB
9963 they support the proper result truncation/extension.
9964 We also make sure to build vector types with INTEGER_TYPE
9965 component type only. */
6d7971b8 9966 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9967 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9968 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9969 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9970 TYPE_UNSIGNED (scalar_type));
6d7971b8 9971
ccbf5bb4
RG
9972 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9973 When the component mode passes the above test simply use a type
9974 corresponding to that mode. The theory is that any use that
9975 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9976 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9977 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9978 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9979
9980 /* We can't build a vector type of elements with alignment bigger than
9981 their size. */
dfc2e2ac 9982 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9983 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9984 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9985
dfc2e2ac
RB
9986 /* If we felt back to using the mode fail if there was
9987 no scalar type for it. */
9988 if (scalar_type == NULL_TREE)
9989 return NULL_TREE;
9990
bb67d9c7
RG
9991 /* If no size was supplied use the mode the target prefers. Otherwise
9992 lookup a vector mode of the specified size. */
86e36728 9993 if (known_eq (size, 0U))
bb67d9c7 9994 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9995 else if (!multiple_p (size, nbytes, &nunits)
9996 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9997 return NULL_TREE;
4c8fd8ac 9998 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9999 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 10000 return NULL_TREE;
ebfd146a
IR
10001
10002 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
10003
10004 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10005 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 10006 return NULL_TREE;
ebfd146a 10007
c7d97b28
RB
10008 /* Re-attach the address-space qualifier if we canonicalized the scalar
10009 type. */
10010 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10011 return build_qualified_type
10012 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10013
ebfd146a
IR
10014 return vectype;
10015}
10016
86e36728 10017poly_uint64 current_vector_size;
bb67d9c7
RG
10018
10019/* Function get_vectype_for_scalar_type.
10020
10021 Returns the vector type corresponding to SCALAR_TYPE as supported
10022 by the target. */
10023
10024tree
10025get_vectype_for_scalar_type (tree scalar_type)
10026{
10027 tree vectype;
10028 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10029 current_vector_size);
10030 if (vectype
86e36728 10031 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
10032 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10033 return vectype;
10034}
10035
42fd8198
IE
10036/* Function get_mask_type_for_scalar_type.
10037
10038 Returns the mask type corresponding to a result of comparison
10039 of vectors of specified SCALAR_TYPE as supported by target. */
10040
10041tree
10042get_mask_type_for_scalar_type (tree scalar_type)
10043{
10044 tree vectype = get_vectype_for_scalar_type (scalar_type);
10045
10046 if (!vectype)
10047 return NULL;
10048
10049 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10050 current_vector_size);
10051}
10052
b690cc0f
RG
10053/* Function get_same_sized_vectype
10054
10055 Returns a vector type corresponding to SCALAR_TYPE of size
10056 VECTOR_TYPE if supported by the target. */
10057
10058tree
bb67d9c7 10059get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10060{
2568d8a1 10061 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10062 return build_same_sized_truth_vector_type (vector_type);
10063
bb67d9c7
RG
10064 return get_vectype_for_scalar_type_and_size
10065 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10066}
10067
ebfd146a
IR
10068/* Function vect_is_simple_use.
10069
10070 Input:
81c40241
RB
10071 VINFO - the vect info of the loop or basic block that is being vectorized.
10072 OPERAND - operand in the loop or bb.
10073 Output:
10074 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
10075 DT - the type of definition
ebfd146a
IR
10076
10077 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10078 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10079 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10080 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10081 is the case in reduction/induction computations).
10082 For basic blocks, supportable operands are constants and bb invariants.
10083 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10084
10085bool
81c40241
RB
10086vect_is_simple_use (tree operand, vec_info *vinfo,
10087 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 10088{
ebfd146a 10089 *def_stmt = NULL;
3fc356dc 10090 *dt = vect_unknown_def_type;
b8698a0f 10091
73fbfcad 10092 if (dump_enabled_p ())
ebfd146a 10093 {
78c60e3d
SS
10094 dump_printf_loc (MSG_NOTE, vect_location,
10095 "vect_is_simple_use: operand ");
10096 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 10097 dump_printf (MSG_NOTE, "\n");
ebfd146a 10098 }
b8698a0f 10099
b758f602 10100 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
10101 {
10102 *dt = vect_constant_def;
10103 return true;
10104 }
b8698a0f 10105
ebfd146a
IR
10106 if (is_gimple_min_invariant (operand))
10107 {
8644a673 10108 *dt = vect_external_def;
ebfd146a
IR
10109 return true;
10110 }
10111
ebfd146a
IR
10112 if (TREE_CODE (operand) != SSA_NAME)
10113 {
73fbfcad 10114 if (dump_enabled_p ())
af29617a
AH
10115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10116 "not ssa-name.\n");
ebfd146a
IR
10117 return false;
10118 }
b8698a0f 10119
3fc356dc 10120 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 10121 {
3fc356dc
RB
10122 *dt = vect_external_def;
10123 return true;
ebfd146a
IR
10124 }
10125
3fc356dc 10126 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 10127 if (dump_enabled_p ())
ebfd146a 10128 {
78c60e3d
SS
10129 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
10130 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
10131 }
10132
61d371eb 10133 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 10134 *dt = vect_external_def;
ebfd146a
IR
10135 else
10136 {
3fc356dc 10137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 10138 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
10139 }
10140
2e8ab70c
RB
10141 if (dump_enabled_p ())
10142 {
10143 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10144 switch (*dt)
10145 {
10146 case vect_uninitialized_def:
10147 dump_printf (MSG_NOTE, "uninitialized\n");
10148 break;
10149 case vect_constant_def:
10150 dump_printf (MSG_NOTE, "constant\n");
10151 break;
10152 case vect_external_def:
10153 dump_printf (MSG_NOTE, "external\n");
10154 break;
10155 case vect_internal_def:
10156 dump_printf (MSG_NOTE, "internal\n");
10157 break;
10158 case vect_induction_def:
10159 dump_printf (MSG_NOTE, "induction\n");
10160 break;
10161 case vect_reduction_def:
10162 dump_printf (MSG_NOTE, "reduction\n");
10163 break;
10164 case vect_double_reduction_def:
10165 dump_printf (MSG_NOTE, "double reduction\n");
10166 break;
10167 case vect_nested_cycle:
10168 dump_printf (MSG_NOTE, "nested cycle\n");
10169 break;
10170 case vect_unknown_def_type:
10171 dump_printf (MSG_NOTE, "unknown\n");
10172 break;
10173 }
10174 }
10175
81c40241 10176 if (*dt == vect_unknown_def_type)
ebfd146a 10177 {
73fbfcad 10178 if (dump_enabled_p ())
78c60e3d 10179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10180 "Unsupported pattern.\n");
ebfd146a
IR
10181 return false;
10182 }
10183
ebfd146a
IR
10184 switch (gimple_code (*def_stmt))
10185 {
10186 case GIMPLE_PHI:
ebfd146a 10187 case GIMPLE_ASSIGN:
ebfd146a 10188 case GIMPLE_CALL:
81c40241 10189 break;
ebfd146a 10190 default:
73fbfcad 10191 if (dump_enabled_p ())
78c60e3d 10192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10193 "unsupported defining stmt:\n");
ebfd146a
IR
10194 return false;
10195 }
10196
10197 return true;
10198}
10199
81c40241 10200/* Function vect_is_simple_use.
b690cc0f 10201
81c40241 10202 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10203 type of OPERAND and stores it to *VECTYPE. If the definition of
10204 OPERAND is vect_uninitialized_def, vect_constant_def or
10205 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10206 is responsible to compute the best suited vector type for the
10207 scalar operand. */
10208
10209bool
81c40241
RB
10210vect_is_simple_use (tree operand, vec_info *vinfo,
10211 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 10212{
81c40241 10213 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
10214 return false;
10215
10216 /* Now get a vector type if the def is internal, otherwise supply
10217 NULL_TREE and leave it up to the caller to figure out a proper
10218 type for the use stmt. */
10219 if (*dt == vect_internal_def
10220 || *dt == vect_induction_def
10221 || *dt == vect_reduction_def
10222 || *dt == vect_double_reduction_def
10223 || *dt == vect_nested_cycle)
10224 {
10225 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
10226
10227 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10228 && !STMT_VINFO_RELEVANT (stmt_info)
10229 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 10230 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 10231
b690cc0f
RG
10232 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10233 gcc_assert (*vectype != NULL_TREE);
10234 }
10235 else if (*dt == vect_uninitialized_def
10236 || *dt == vect_constant_def
10237 || *dt == vect_external_def)
10238 *vectype = NULL_TREE;
10239 else
10240 gcc_unreachable ();
10241
10242 return true;
10243}
10244
ebfd146a
IR
10245
10246/* Function supportable_widening_operation
10247
b8698a0f
L
10248 Check whether an operation represented by the code CODE is a
10249 widening operation that is supported by the target platform in
b690cc0f
RG
10250 vector form (i.e., when operating on arguments of type VECTYPE_IN
10251 producing a result of type VECTYPE_OUT).
b8698a0f 10252
ebfd146a
IR
10253 Widening operations we currently support are NOP (CONVERT), FLOAT
10254 and WIDEN_MULT. This function checks if these operations are supported
10255 by the target platform either directly (via vector tree-codes), or via
10256 target builtins.
10257
10258 Output:
b8698a0f
L
10259 - CODE1 and CODE2 are codes of vector operations to be used when
10260 vectorizing the operation, if available.
ebfd146a
IR
10261 - MULTI_STEP_CVT determines the number of required intermediate steps in
10262 case of multi-step conversion (like char->short->int - in that case
10263 MULTI_STEP_CVT will be 1).
b8698a0f
L
10264 - INTERM_TYPES contains the intermediate type required to perform the
10265 widening operation (short in the above example). */
ebfd146a
IR
10266
10267bool
355fe088 10268supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10269 tree vectype_out, tree vectype_in,
ebfd146a
IR
10270 enum tree_code *code1, enum tree_code *code2,
10271 int *multi_step_cvt,
9771b263 10272 vec<tree> *interm_types)
ebfd146a
IR
10273{
10274 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10275 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10276 struct loop *vect_loop = NULL;
ef4bddc2 10277 machine_mode vec_mode;
81f40b79 10278 enum insn_code icode1, icode2;
ebfd146a 10279 optab optab1, optab2;
b690cc0f
RG
10280 tree vectype = vectype_in;
10281 tree wide_vectype = vectype_out;
ebfd146a 10282 enum tree_code c1, c2;
4a00c761
JJ
10283 int i;
10284 tree prev_type, intermediate_type;
ef4bddc2 10285 machine_mode intermediate_mode, prev_mode;
4a00c761 10286 optab optab3, optab4;
ebfd146a 10287
4a00c761 10288 *multi_step_cvt = 0;
4ef69dfc
IR
10289 if (loop_info)
10290 vect_loop = LOOP_VINFO_LOOP (loop_info);
10291
ebfd146a
IR
10292 switch (code)
10293 {
10294 case WIDEN_MULT_EXPR:
6ae6116f
RH
10295 /* The result of a vectorized widening operation usually requires
10296 two vectors (because the widened results do not fit into one vector).
10297 The generated vector results would normally be expected to be
10298 generated in the same order as in the original scalar computation,
10299 i.e. if 8 results are generated in each vector iteration, they are
10300 to be organized as follows:
10301 vect1: [res1,res2,res3,res4],
10302 vect2: [res5,res6,res7,res8].
10303
10304 However, in the special case that the result of the widening
10305 operation is used in a reduction computation only, the order doesn't
10306 matter (because when vectorizing a reduction we change the order of
10307 the computation). Some targets can take advantage of this and
10308 generate more efficient code. For example, targets like Altivec,
10309 that support widen_mult using a sequence of {mult_even,mult_odd}
10310 generate the following vectors:
10311 vect1: [res1,res3,res5,res7],
10312 vect2: [res2,res4,res6,res8].
10313
10314 When vectorizing outer-loops, we execute the inner-loop sequentially
10315 (each vectorized inner-loop iteration contributes to VF outer-loop
10316 iterations in parallel). We therefore don't allow to change the
10317 order of the computation in the inner-loop during outer-loop
10318 vectorization. */
10319 /* TODO: Another case in which order doesn't *really* matter is when we
10320 widen and then contract again, e.g. (short)((int)x * y >> 8).
10321 Normally, pack_trunc performs an even/odd permute, whereas the
10322 repack from an even/odd expansion would be an interleave, which
10323 would be significantly simpler for e.g. AVX2. */
10324 /* In any case, in order to avoid duplicating the code below, recurse
10325 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10326 are properly set up for the caller. If we fail, we'll continue with
10327 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10328 if (vect_loop
10329 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10330 && !nested_in_vect_loop_p (vect_loop, stmt)
10331 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10332 stmt, vectype_out, vectype_in,
a86ec597
RH
10333 code1, code2, multi_step_cvt,
10334 interm_types))
ebc047a2
CH
10335 {
10336 /* Elements in a vector with vect_used_by_reduction property cannot
10337 be reordered if the use chain with this property does not have the
10338 same operation. One such an example is s += a * b, where elements
10339 in a and b cannot be reordered. Here we check if the vector defined
10340 by STMT is only directly used in the reduction statement. */
10341 tree lhs = gimple_assign_lhs (stmt);
10342 use_operand_p dummy;
355fe088 10343 gimple *use_stmt;
ebc047a2
CH
10344 stmt_vec_info use_stmt_info = NULL;
10345 if (single_imm_use (lhs, &dummy, &use_stmt)
10346 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10347 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10348 return true;
10349 }
4a00c761
JJ
10350 c1 = VEC_WIDEN_MULT_LO_EXPR;
10351 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10352 break;
10353
81c40241
RB
10354 case DOT_PROD_EXPR:
10355 c1 = DOT_PROD_EXPR;
10356 c2 = DOT_PROD_EXPR;
10357 break;
10358
10359 case SAD_EXPR:
10360 c1 = SAD_EXPR;
10361 c2 = SAD_EXPR;
10362 break;
10363
6ae6116f
RH
10364 case VEC_WIDEN_MULT_EVEN_EXPR:
10365 /* Support the recursion induced just above. */
10366 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10367 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10368 break;
10369
36ba4aae 10370 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10371 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10372 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10373 break;
10374
ebfd146a 10375 CASE_CONVERT:
4a00c761
JJ
10376 c1 = VEC_UNPACK_LO_EXPR;
10377 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10378 break;
10379
10380 case FLOAT_EXPR:
4a00c761
JJ
10381 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10382 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10383 break;
10384
10385 case FIX_TRUNC_EXPR:
10386 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10387 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10388 computing the operation. */
10389 return false;
10390
10391 default:
10392 gcc_unreachable ();
10393 }
10394
6ae6116f 10395 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10396 std::swap (c1, c2);
4a00c761 10397
ebfd146a
IR
10398 if (code == FIX_TRUNC_EXPR)
10399 {
10400 /* The signedness is determined from output operand. */
b690cc0f
RG
10401 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10402 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10403 }
10404 else
10405 {
10406 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10407 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10408 }
10409
10410 if (!optab1 || !optab2)
10411 return false;
10412
10413 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10414 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10415 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10416 return false;
10417
4a00c761
JJ
10418 *code1 = c1;
10419 *code2 = c2;
10420
10421 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10422 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10423 /* For scalar masks we may have different boolean
10424 vector types having the same QImode. Thus we
10425 add additional check for elements number. */
10426 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10427 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10428 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10429
b8698a0f 10430 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10431 types. */
ebfd146a 10432
4a00c761
JJ
10433 prev_type = vectype;
10434 prev_mode = vec_mode;
b8698a0f 10435
4a00c761
JJ
10436 if (!CONVERT_EXPR_CODE_P (code))
10437 return false;
b8698a0f 10438
4a00c761
JJ
10439 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10440 intermediate steps in promotion sequence. We try
10441 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10442 not. */
9771b263 10443 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10444 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10445 {
10446 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10447 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10448 {
7cfb4d93 10449 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10450 if (intermediate_mode != TYPE_MODE (intermediate_type))
10451 return false;
10452 }
10453 else
10454 intermediate_type
10455 = lang_hooks.types.type_for_mode (intermediate_mode,
10456 TYPE_UNSIGNED (prev_type));
10457
4a00c761
JJ
10458 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10459 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10460
10461 if (!optab3 || !optab4
10462 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10463 || insn_data[icode1].operand[0].mode != intermediate_mode
10464 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10465 || insn_data[icode2].operand[0].mode != intermediate_mode
10466 || ((icode1 = optab_handler (optab3, intermediate_mode))
10467 == CODE_FOR_nothing)
10468 || ((icode2 = optab_handler (optab4, intermediate_mode))
10469 == CODE_FOR_nothing))
10470 break;
ebfd146a 10471
9771b263 10472 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10473 (*multi_step_cvt)++;
10474
10475 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10476 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10477 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10478 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10479 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10480
10481 prev_type = intermediate_type;
10482 prev_mode = intermediate_mode;
ebfd146a
IR
10483 }
10484
9771b263 10485 interm_types->release ();
4a00c761 10486 return false;
ebfd146a
IR
10487}
10488
10489
10490/* Function supportable_narrowing_operation
10491
b8698a0f
L
10492 Check whether an operation represented by the code CODE is a
10493 narrowing operation that is supported by the target platform in
b690cc0f
RG
10494 vector form (i.e., when operating on arguments of type VECTYPE_IN
10495 and producing a result of type VECTYPE_OUT).
b8698a0f 10496
ebfd146a 10497 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 10498 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
10499 the target platform directly via vector tree-codes.
10500
10501 Output:
b8698a0f
L
10502 - CODE1 is the code of a vector operation to be used when
10503 vectorizing the operation, if available.
ebfd146a
IR
10504 - MULTI_STEP_CVT determines the number of required intermediate steps in
10505 case of multi-step conversion (like int->short->char - in that case
10506 MULTI_STEP_CVT will be 1).
10507 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10508 narrowing operation (short in the above example). */
ebfd146a
IR
10509
10510bool
10511supportable_narrowing_operation (enum tree_code code,
b690cc0f 10512 tree vectype_out, tree vectype_in,
ebfd146a 10513 enum tree_code *code1, int *multi_step_cvt,
9771b263 10514 vec<tree> *interm_types)
ebfd146a 10515{
ef4bddc2 10516 machine_mode vec_mode;
ebfd146a
IR
10517 enum insn_code icode1;
10518 optab optab1, interm_optab;
b690cc0f
RG
10519 tree vectype = vectype_in;
10520 tree narrow_vectype = vectype_out;
ebfd146a 10521 enum tree_code c1;
3ae0661a 10522 tree intermediate_type, prev_type;
ef4bddc2 10523 machine_mode intermediate_mode, prev_mode;
ebfd146a 10524 int i;
4a00c761 10525 bool uns;
ebfd146a 10526
4a00c761 10527 *multi_step_cvt = 0;
ebfd146a
IR
10528 switch (code)
10529 {
10530 CASE_CONVERT:
10531 c1 = VEC_PACK_TRUNC_EXPR;
10532 break;
10533
10534 case FIX_TRUNC_EXPR:
10535 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10536 break;
10537
10538 case FLOAT_EXPR:
10539 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10540 tree code and optabs used for computing the operation. */
10541 return false;
10542
10543 default:
10544 gcc_unreachable ();
10545 }
10546
10547 if (code == FIX_TRUNC_EXPR)
10548 /* The signedness is determined from output operand. */
b690cc0f 10549 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10550 else
10551 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10552
10553 if (!optab1)
10554 return false;
10555
10556 vec_mode = TYPE_MODE (vectype);
947131ba 10557 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10558 return false;
10559
4a00c761
JJ
10560 *code1 = c1;
10561
10562 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10563 /* For scalar masks we may have different boolean
10564 vector types having the same QImode. Thus we
10565 add additional check for elements number. */
10566 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10567 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10568 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10569
ebfd146a
IR
10570 /* Check if it's a multi-step conversion that can be done using intermediate
10571 types. */
4a00c761 10572 prev_mode = vec_mode;
3ae0661a 10573 prev_type = vectype;
4a00c761
JJ
10574 if (code == FIX_TRUNC_EXPR)
10575 uns = TYPE_UNSIGNED (vectype_out);
10576 else
10577 uns = TYPE_UNSIGNED (vectype);
10578
10579 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10580 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10581 costly than signed. */
10582 if (code == FIX_TRUNC_EXPR && uns)
10583 {
10584 enum insn_code icode2;
10585
10586 intermediate_type
10587 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10588 interm_optab
10589 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10590 if (interm_optab != unknown_optab
4a00c761
JJ
10591 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10592 && insn_data[icode1].operand[0].mode
10593 == insn_data[icode2].operand[0].mode)
10594 {
10595 uns = false;
10596 optab1 = interm_optab;
10597 icode1 = icode2;
10598 }
10599 }
ebfd146a 10600
4a00c761
JJ
10601 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10602 intermediate steps in promotion sequence. We try
10603 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10604 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10605 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10606 {
10607 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10608 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10609 {
7cfb4d93 10610 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10611 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10612 return false;
3ae0661a
IE
10613 }
10614 else
10615 intermediate_type
10616 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10617 interm_optab
10618 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10619 optab_default);
10620 if (!interm_optab
10621 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10622 || insn_data[icode1].operand[0].mode != intermediate_mode
10623 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10624 == CODE_FOR_nothing))
10625 break;
10626
9771b263 10627 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10628 (*multi_step_cvt)++;
10629
10630 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10631 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10632 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10633 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10634
10635 prev_mode = intermediate_mode;
3ae0661a 10636 prev_type = intermediate_type;
4a00c761 10637 optab1 = interm_optab;
ebfd146a
IR
10638 }
10639
9771b263 10640 interm_types->release ();
4a00c761 10641 return false;
ebfd146a 10642}
7cfb4d93
RS
10643
10644/* Generate and return a statement that sets vector mask MASK such that
10645 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10646
10647gcall *
10648vect_gen_while (tree mask, tree start_index, tree end_index)
10649{
10650 tree cmp_type = TREE_TYPE (start_index);
10651 tree mask_type = TREE_TYPE (mask);
10652 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10653 cmp_type, mask_type,
10654 OPTIMIZE_FOR_SPEED));
10655 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10656 start_index, end_index,
10657 build_zero_cst (mask_type));
10658 gimple_call_set_lhs (call, mask);
10659 return call;
10660}
535e7c11
RS
10661
10662/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10663 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10664
10665tree
10666vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10667 tree end_index)
10668{
10669 tree tmp = make_ssa_name (mask_type);
10670 gcall *call = vect_gen_while (tmp, start_index, end_index);
10671 gimple_seq_add_stmt (seq, call);
10672 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10673}
1f3cb663
RS
10674
10675/* Try to compute the vector types required to vectorize STMT_INFO,
10676 returning true on success and false if vectorization isn't possible.
10677
10678 On success:
10679
10680 - Set *STMT_VECTYPE_OUT to:
10681 - NULL_TREE if the statement doesn't need to be vectorized;
10682 - boolean_type_node if the statement is a boolean operation whose
10683 vector type can only be determined once all the other vector types
10684 are known; and
10685 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10686
10687 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10688 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10689 statement does not help to determine the overall number of units. */
10690
10691bool
10692vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10693 tree *stmt_vectype_out,
10694 tree *nunits_vectype_out)
10695{
10696 gimple *stmt = stmt_info->stmt;
10697
10698 *stmt_vectype_out = NULL_TREE;
10699 *nunits_vectype_out = NULL_TREE;
10700
10701 if (gimple_get_lhs (stmt) == NULL_TREE
10702 /* MASK_STORE has no lhs, but is ok. */
10703 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10704 {
10705 if (is_a <gcall *> (stmt))
10706 {
10707 /* Ignore calls with no lhs. These must be calls to
10708 #pragma omp simd functions, and what vectorization factor
10709 it really needs can't be determined until
10710 vectorizable_simd_clone_call. */
10711 if (dump_enabled_p ())
10712 dump_printf_loc (MSG_NOTE, vect_location,
10713 "defer to SIMD clone analysis.\n");
10714 return true;
10715 }
10716
10717 if (dump_enabled_p ())
10718 {
10719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10720 "not vectorized: irregular stmt.");
10721 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10722 }
10723 return false;
10724 }
10725
10726 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10727 {
10728 if (dump_enabled_p ())
10729 {
10730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10731 "not vectorized: vector stmt in loop:");
10732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10733 }
10734 return false;
10735 }
10736
10737 tree vectype;
10738 tree scalar_type = NULL_TREE;
10739 if (STMT_VINFO_VECTYPE (stmt_info))
10740 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10741 else
10742 {
10743 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10744 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10745 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10746 else
10747 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10748
10749 /* Pure bool ops don't participate in number-of-units computation.
10750 For comparisons use the types being compared. */
10751 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10752 && is_gimple_assign (stmt)
10753 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10754 {
10755 *stmt_vectype_out = boolean_type_node;
10756
10757 tree rhs1 = gimple_assign_rhs1 (stmt);
10758 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10759 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10760 scalar_type = TREE_TYPE (rhs1);
10761 else
10762 {
10763 if (dump_enabled_p ())
10764 dump_printf_loc (MSG_NOTE, vect_location,
10765 "pure bool operation.\n");
10766 return true;
10767 }
10768 }
10769
10770 if (dump_enabled_p ())
10771 {
10772 dump_printf_loc (MSG_NOTE, vect_location,
10773 "get vectype for scalar type: ");
10774 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10775 dump_printf (MSG_NOTE, "\n");
10776 }
10777 vectype = get_vectype_for_scalar_type (scalar_type);
10778 if (!vectype)
10779 {
10780 if (dump_enabled_p ())
10781 {
10782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10783 "not vectorized: unsupported data-type ");
10784 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10785 scalar_type);
10786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10787 }
10788 return false;
10789 }
10790
10791 if (!*stmt_vectype_out)
10792 *stmt_vectype_out = vectype;
10793
10794 if (dump_enabled_p ())
10795 {
10796 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10797 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10798 dump_printf (MSG_NOTE, "\n");
10799 }
10800 }
10801
10802 /* Don't try to compute scalar types if the stmt produces a boolean
10803 vector; use the existing vector type instead. */
10804 tree nunits_vectype;
10805 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10806 nunits_vectype = vectype;
10807 else
10808 {
10809 /* The number of units is set according to the smallest scalar
10810 type (or the largest vector size, but we only support one
10811 vector size per vectorization). */
10812 if (*stmt_vectype_out != boolean_type_node)
10813 {
10814 HOST_WIDE_INT dummy;
10815 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10816 }
10817 if (dump_enabled_p ())
10818 {
10819 dump_printf_loc (MSG_NOTE, vect_location,
10820 "get vectype for scalar type: ");
10821 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10822 dump_printf (MSG_NOTE, "\n");
10823 }
10824 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10825 }
10826 if (!nunits_vectype)
10827 {
10828 if (dump_enabled_p ())
10829 {
10830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10831 "not vectorized: unsupported data-type ");
10832 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10833 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10834 }
10835 return false;
10836 }
10837
10838 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10839 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10840 {
10841 if (dump_enabled_p ())
10842 {
10843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10844 "not vectorized: different sized vector "
10845 "types in statement, ");
10846 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10847 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10848 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10849 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10850 }
10851 return false;
10852 }
10853
10854 if (dump_enabled_p ())
10855 {
10856 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10857 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10858 dump_printf (MSG_NOTE, "\n");
10859
10860 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10861 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10862 dump_printf (MSG_NOTE, "\n");
10863 }
10864
10865 *nunits_vectype_out = nunits_vectype;
10866 return true;
10867}
10868
10869/* Try to determine the correct vector type for STMT_INFO, which is a
10870 statement that produces a scalar boolean result. Return the vector
10871 type on success, otherwise return NULL_TREE. */
10872
10873tree
10874vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10875{
10876 gimple *stmt = stmt_info->stmt;
10877 tree mask_type = NULL;
10878 tree vectype, scalar_type;
10879
10880 if (is_gimple_assign (stmt)
10881 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10882 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10883 {
10884 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10885 mask_type = get_mask_type_for_scalar_type (scalar_type);
10886
10887 if (!mask_type)
10888 {
10889 if (dump_enabled_p ())
10890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10891 "not vectorized: unsupported mask\n");
10892 return NULL_TREE;
10893 }
10894 }
10895 else
10896 {
10897 tree rhs;
10898 ssa_op_iter iter;
10899 gimple *def_stmt;
10900 enum vect_def_type dt;
10901
10902 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10903 {
10904 if (!vect_is_simple_use (rhs, stmt_info->vinfo,
10905 &def_stmt, &dt, &vectype))
10906 {
10907 if (dump_enabled_p ())
10908 {
10909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10910 "not vectorized: can't compute mask type "
10911 "for statement, ");
10912 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10913 0);
10914 }
10915 return NULL_TREE;
10916 }
10917
10918 /* No vectype probably means external definition.
10919 Allow it in case there is another operand which
10920 allows to determine mask type. */
10921 if (!vectype)
10922 continue;
10923
10924 if (!mask_type)
10925 mask_type = vectype;
10926 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10927 TYPE_VECTOR_SUBPARTS (vectype)))
10928 {
10929 if (dump_enabled_p ())
10930 {
10931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10932 "not vectorized: different sized masks "
10933 "types in statement, ");
10934 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10935 mask_type);
10936 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10937 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10938 vectype);
10939 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10940 }
10941 return NULL_TREE;
10942 }
10943 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10944 != VECTOR_BOOLEAN_TYPE_P (vectype))
10945 {
10946 if (dump_enabled_p ())
10947 {
10948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10949 "not vectorized: mixed mask and "
10950 "nonmask vector types in statement, ");
10951 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10952 mask_type);
10953 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10954 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10955 vectype);
10956 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10957 }
10958 return NULL_TREE;
10959 }
10960 }
10961
10962 /* We may compare boolean value loaded as vector of integers.
10963 Fix mask_type in such case. */
10964 if (mask_type
10965 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10966 && gimple_code (stmt) == GIMPLE_ASSIGN
10967 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10968 mask_type = build_same_sized_truth_vector_type (mask_type);
10969 }
10970
10971 /* No mask_type should mean loop invariant predicate.
10972 This is probably a subject for optimization in if-conversion. */
10973 if (!mask_type && dump_enabled_p ())
10974 {
10975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10976 "not vectorized: can't compute mask type "
10977 "for statement, ");
10978 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10979 }
10980 return mask_type;
10981}