]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR target/84711 (AArch32 big-endian fails when taking subreg of a vector mode...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
b28ead45
AH
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
894dd753 272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
bfaa08b7
RS
400 {
401 internal_fn ifn = gimple_call_internal_fn (stmt);
402 int mask_index = internal_fn_mask_index (ifn);
403 if (mask_index >= 0
404 && use == gimple_call_arg (stmt, mask_index))
405 return true;
f307441a
RS
406 int stored_value_index = internal_fn_stored_value_index (ifn);
407 if (stored_value_index >= 0
408 && use == gimple_call_arg (stmt, stored_value_index))
409 return true;
bfaa08b7
RS
410 if (internal_gather_scatter_fn_p (ifn)
411 && use == gimple_call_arg (stmt, 1))
412 return true;
bfaa08b7 413 }
5ce9450f
JJ
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b28ead45 458process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 459 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
355fe088 466 gimple *def_stmt;
ebfd146a
IR
467 enum vect_def_type dt;
468
b8698a0f 469 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 470 that are used for address computation are not considered relevant. */
aec7ae7d 471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
472 return true;
473
894dd753 474 if (!vect_is_simple_use (use, loop_vinfo, &dt, &def_stmt))
b8698a0f 475 {
73fbfcad 476 if (dump_enabled_p ())
78c60e3d 477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 478 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
479 return false;
480 }
481
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
484
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
487 {
73fbfcad 488 if (dump_enabled_p ())
e645e942 489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
490 return true;
491 }
492
b8698a0f
L
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
505 {
73fbfcad 506 if (dump_enabled_p ())
78c60e3d 507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 508 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 511 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
512 return true;
513 }
514
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
517 d = def_stmt
518 inner-loop:
519 stmt # use (d)
520 outer-loop-tail-bb:
521 ... */
522 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
523 {
73fbfcad 524 if (dump_enabled_p ())
78c60e3d 525 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 526 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 527
ebfd146a
IR
528 switch (relevant)
529 {
8644a673 530 case vect_unused_in_scope:
7c5222ff
IR
531 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
532 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 533 break;
7c5222ff 534
ebfd146a 535 case vect_used_in_outer_by_reduction:
7c5222ff 536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
537 relevant = vect_used_by_reduction;
538 break;
7c5222ff 539
ebfd146a 540 case vect_used_in_outer:
7c5222ff 541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 542 relevant = vect_used_in_scope;
ebfd146a 543 break;
7c5222ff 544
8644a673 545 case vect_used_in_scope:
ebfd146a
IR
546 break;
547
548 default:
549 gcc_unreachable ();
b8698a0f 550 }
ebfd146a
IR
551 }
552
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
555 ...
556 inner-loop:
557 d = def_stmt
06066f92 558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
559 stmt # use (d) */
560 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
561 {
73fbfcad 562 if (dump_enabled_p ())
78c60e3d 563 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 564 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 565
ebfd146a
IR
566 switch (relevant)
567 {
8644a673 568 case vect_unused_in_scope:
b8698a0f 569 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 570 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 571 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
572 break;
573
ebfd146a 574 case vect_used_by_reduction:
b28ead45 575 case vect_used_only_live:
ebfd146a
IR
576 relevant = vect_used_in_outer_by_reduction;
577 break;
578
8644a673 579 case vect_used_in_scope:
ebfd146a
IR
580 relevant = vect_used_in_outer;
581 break;
582
583 default:
584 gcc_unreachable ();
585 }
586 }
643a9684
RB
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
590 of course. */
643a9684
RB
591 else if (gimple_code (stmt) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 593 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
594 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
595 == use))
596 {
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location,
599 "induction value on backedge.\n");
600 return true;
601 }
602
ebfd146a 603
b28ead45 604 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
605 return true;
606}
607
608
609/* Function vect_mark_stmts_to_be_vectorized.
610
611 Not all stmts in the loop need to be vectorized. For example:
612
613 for i...
614 for j...
615 1. T0 = i + j
616 2. T1 = a[T0]
617
618 3. j = j + 1
619
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
622
623 This pass detects such stmts. */
624
625bool
626vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
627{
ebfd146a
IR
628 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
629 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
630 unsigned int nbbs = loop->num_nodes;
631 gimple_stmt_iterator si;
355fe088 632 gimple *stmt;
ebfd146a
IR
633 unsigned int i;
634 stmt_vec_info stmt_vinfo;
635 basic_block bb;
355fe088 636 gimple *phi;
ebfd146a 637 bool live_p;
b28ead45 638 enum vect_relevant relevant;
ebfd146a 639
adac3a68 640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 641
355fe088 642 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
643
644 /* 1. Init worklist. */
645 for (i = 0; i < nbbs; i++)
646 {
647 bb = bbs[i];
648 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 649 {
ebfd146a 650 phi = gsi_stmt (si);
73fbfcad 651 if (dump_enabled_p ())
ebfd146a 652 {
78c60e3d
SS
653 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
655 }
656
657 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 658 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
659 }
660 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
661 {
662 stmt = gsi_stmt (si);
73fbfcad 663 if (dump_enabled_p ())
ebfd146a 664 {
78c60e3d
SS
665 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 667 }
ebfd146a
IR
668
669 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 670 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
671 }
672 }
673
674 /* 2. Process_worklist */
9771b263 675 while (worklist.length () > 0)
ebfd146a
IR
676 {
677 use_operand_p use_p;
678 ssa_op_iter iter;
679
9771b263 680 stmt = worklist.pop ();
73fbfcad 681 if (dump_enabled_p ())
ebfd146a 682 {
78c60e3d
SS
683 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
685 }
686
b8698a0f 687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 of STMT. */
ebfd146a
IR
690 stmt_vinfo = vinfo_for_stmt (stmt);
691 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 692
b28ead45
AH
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
695
696 One exception is when STMT has been identified as defining a reduction
b28ead45 697 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 698 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 699 those that are used by a reduction computation, and those that are
ff802fa1 700 (also) used by a regular computation. This allows us later on to
b8698a0f 701 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 702 order of the results that they produce does not have to be kept. */
ebfd146a 703
b28ead45 704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 705 {
06066f92 706 case vect_reduction_def:
b28ead45
AH
707 gcc_assert (relevant != vect_unused_in_scope);
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_in_scope
710 && relevant != vect_used_by_reduction
711 && relevant != vect_used_only_live)
06066f92 712 {
b28ead45
AH
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of reduction.\n");
716 return false;
06066f92 717 }
06066f92 718 break;
b8698a0f 719
06066f92 720 case vect_nested_cycle:
b28ead45
AH
721 if (relevant != vect_unused_in_scope
722 && relevant != vect_used_in_outer_by_reduction
723 && relevant != vect_used_in_outer)
06066f92 724 {
73fbfcad 725 if (dump_enabled_p ())
78c60e3d 726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 727 "unsupported use of nested cycle.\n");
7c5222ff 728
06066f92
IR
729 return false;
730 }
b8698a0f
L
731 break;
732
06066f92 733 case vect_double_reduction_def:
b28ead45
AH
734 if (relevant != vect_unused_in_scope
735 && relevant != vect_used_by_reduction
736 && relevant != vect_used_only_live)
06066f92 737 {
73fbfcad 738 if (dump_enabled_p ())
78c60e3d 739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 740 "unsupported use of double reduction.\n");
7c5222ff 741
7c5222ff 742 return false;
06066f92 743 }
b8698a0f 744 break;
7c5222ff 745
06066f92
IR
746 default:
747 break;
7c5222ff 748 }
b8698a0f 749
aec7ae7d 750 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
751 {
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt))
756 {
69d2aade
JJ
757 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
758 tree op = gimple_assign_rhs1 (stmt);
759
760 i = 1;
761 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
762 {
763 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 764 relevant, &worklist, false)
69d2aade 765 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 766 relevant, &worklist, false))
566d377a 767 return false;
69d2aade
JJ
768 i = 2;
769 }
770 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 771 {
69d2aade 772 op = gimple_op (stmt, i);
afbe6325 773 if (TREE_CODE (op) == SSA_NAME
b28ead45 774 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 775 &worklist, false))
07687835 776 return false;
9d5e7640
IR
777 }
778 }
779 else if (is_gimple_call (stmt))
780 {
781 for (i = 0; i < gimple_call_num_args (stmt); i++)
782 {
783 tree arg = gimple_call_arg (stmt, i);
b28ead45 784 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 785 &worklist, false))
07687835 786 return false;
9d5e7640
IR
787 }
788 }
789 }
790 else
791 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
792 {
793 tree op = USE_FROM_PTR (use_p);
b28ead45 794 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 795 &worklist, false))
07687835 796 return false;
9d5e7640 797 }
aec7ae7d 798
3bab6342 799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 800 {
134c85ca
RS
801 gather_scatter_info gs_info;
802 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
803 gcc_unreachable ();
804 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
805 &worklist, true))
566d377a 806 return false;
aec7ae7d 807 }
ebfd146a
IR
808 } /* while worklist */
809
ebfd146a
IR
810 return true;
811}
812
68435eb2
RB
813/* Compute the prologue cost for invariant or constant operands. */
814
815static unsigned
816vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
817 unsigned opno, enum vect_def_type dt,
818 stmt_vector_for_cost *cost_vec)
819{
820 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
821 tree op = gimple_op (stmt, opno);
822 unsigned prologue_cost = 0;
823
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
828 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
829 unsigned num_vects_to_check;
830 unsigned HOST_WIDE_INT const_nunits;
831 unsigned nelt_limit;
832 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
833 && ! multiple_p (const_nunits, group_size))
834 {
835 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
836 nelt_limit = const_nunits;
837 }
838 else
839 {
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check = 1;
844 nelt_limit = group_size;
845 }
846 tree elt = NULL_TREE;
847 unsigned nelt = 0;
848 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
849 {
850 unsigned si = j % group_size;
851 if (nelt == 0)
852 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
857 opno))
858 elt = NULL_TREE;
859 nelt++;
860 if (nelt == nelt_limit)
861 {
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost += record_stmt_cost
865 (cost_vec, 1,
866 dt == vect_external_def
867 ? (elt ? scalar_to_vec : vec_construct)
868 : vector_load,
869 stmt_info, 0, vect_prologue);
870 nelt = 0;
871 }
872 }
873
874 return prologue_cost;
875}
ebfd146a 876
b8698a0f 877/* Function vect_model_simple_cost.
ebfd146a 878
b8698a0f 879 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
882
68435eb2 883static void
b8698a0f 884vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 885 enum vect_def_type *dt,
4fc5ebf1 886 int ndts,
68435eb2
RB
887 slp_tree node,
888 stmt_vector_for_cost *cost_vec)
ebfd146a 889{
92345349 890 int inside_cost = 0, prologue_cost = 0;
ebfd146a 891
68435eb2 892 gcc_assert (cost_vec != NULL);
ebfd146a 893
68435eb2
RB
894 /* ??? Somehow we need to fix this at the callers. */
895 if (node)
896 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
897
898 if (node)
899 {
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
902 re-engineered. */
903 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
904 tree lhs = gimple_get_lhs (stmt);
905 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
906 {
907 tree op = gimple_op (stmt, i);
68435eb2
RB
908 enum vect_def_type dt;
909 if (!op || op == lhs)
910 continue;
894dd753 911 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
912 && (dt == vect_constant_def || dt == vect_external_def))
913 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
914 i, dt, cost_vec);
915 }
916 }
917 else
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
920 cost model. */
921 for (int i = 0; i < ndts; i++)
922 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
923 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
925
926 /* Adjust for two-operator SLP nodes. */
927 if (node && SLP_TREE_TWO_OPERATORS (node))
928 {
929 ncopies *= 2;
930 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
931 stmt_info, 0, vect_body);
932 }
c3e7ee41
BS
933
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
935 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
936 stmt_info, 0, vect_body);
c3e7ee41 937
73fbfcad 938 if (dump_enabled_p ())
78c60e3d
SS
939 dump_printf_loc (MSG_NOTE, vect_location,
940 "vect_model_simple_cost: inside_cost = %d, "
e645e942 941 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
942}
943
944
8bd37302
BS
945/* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
949
950static void
951vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
952 enum vect_def_type *dt, int pwr,
953 stmt_vector_for_cost *cost_vec)
8bd37302
BS
954{
955 int i, tmp;
92345349 956 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 957
8bd37302
BS
958 for (i = 0; i < pwr + 1; i++)
959 {
960 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
961 (i + 1) : i;
68435eb2
RB
962 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
963 vec_promote_demote, stmt_info, 0,
964 vect_body);
8bd37302
BS
965 }
966
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i = 0; i < 2; i++)
92345349 969 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
970 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
971 stmt_info, 0, vect_prologue);
8bd37302 972
73fbfcad 973 if (dump_enabled_p ())
78c60e3d
SS
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
977}
978
ebfd146a
IR
979/* Function vect_model_store_cost
980
0d0293ac
MM
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
ebfd146a 983
68435eb2 984static void
b8698a0f 985vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 986 enum vect_def_type dt,
2de001ee 987 vect_memory_access_type memory_access_type,
9ce4345a 988 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 989 stmt_vector_for_cost *cost_vec)
ebfd146a 990{
92345349 991 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
992 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
993 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 994
68435eb2
RB
995 /* ??? Somehow we need to fix this at the callers. */
996 if (slp_node)
997 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
998
9ce4345a 999 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1000 {
1001 if (slp_node)
1002 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1003 1, dt, cost_vec);
1004 else
1005 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1006 stmt_info, 0, vect_prologue);
1007 }
ebfd146a 1008
892a981f
RS
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node && grouped_access_p)
57c454d2 1012 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1013
892a981f
RS
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1018
272c6793 1019 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1021 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1022 include the cost of the permutes. */
1023 if (first_stmt_p
1024 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1025 {
e1377713
ES
1026 /* Uses a high and low interleave or shuffle operations for each
1027 needed permute. */
2c53b149 1028 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1029 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1030 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1031 stmt_info, 0, vect_body);
ebfd146a 1032
73fbfcad 1033 if (dump_enabled_p ())
78c60e3d 1034 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1035 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1036 group_size);
ebfd146a
IR
1037 }
1038
cee62fee 1039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1040 /* Costs of the stores. */
067bc855
RB
1041 if (memory_access_type == VMAT_ELEMENTWISE
1042 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1043 {
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1046 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1047 ncopies * assumed_nunits,
1048 scalar_store, stmt_info, 0, vect_body);
1049 }
f2e2a985 1050 else
57c454d2 1051 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1052
2de001ee
RS
1053 if (memory_access_type == VMAT_ELEMENTWISE
1054 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1055 {
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1058 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1059 ncopies * assumed_nunits,
1060 vec_to_scalar, stmt_info, 0, vect_body);
1061 }
cee62fee 1062
73fbfcad 1063 if (dump_enabled_p ())
78c60e3d
SS
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: inside_cost = %d, "
e645e942 1066 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1067}
1068
1069
720f5239
IR
1070/* Calculate cost of DR's memory access. */
1071void
57c454d2 1072vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1073 unsigned int *inside_cost,
92345349 1074 stmt_vector_for_cost *body_cost_vec)
720f5239 1075{
57c454d2 1076 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1077 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1078
1079 switch (alignment_support_scheme)
1080 {
1081 case dr_aligned:
1082 {
92345349
BS
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 vector_store, stmt_info, 0,
1085 vect_body);
720f5239 1086
73fbfcad 1087 if (dump_enabled_p ())
78c60e3d 1088 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1089 "vect_model_store_cost: aligned.\n");
720f5239
IR
1090 break;
1091 }
1092
1093 case dr_unaligned_supported:
1094 {
720f5239 1095 /* Here, we assign an additional cost for the unaligned store. */
92345349 1096 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1097 unaligned_store, stmt_info,
92345349 1098 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1099 if (dump_enabled_p ())
78c60e3d
SS
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: unaligned supported by "
e645e942 1102 "hardware.\n");
720f5239
IR
1103 break;
1104 }
1105
38eec4c6
UW
1106 case dr_unaligned_unsupported:
1107 {
1108 *inside_cost = VECT_MAX_COST;
1109
73fbfcad 1110 if (dump_enabled_p ())
78c60e3d 1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1112 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1113 break;
1114 }
1115
720f5239
IR
1116 default:
1117 gcc_unreachable ();
1118 }
1119}
1120
1121
ebfd146a
IR
1122/* Function vect_model_load_cost
1123
892a981f
RS
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1126 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1127 access scheme chosen. */
1128
68435eb2
RB
1129static void
1130vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1131 vect_memory_access_type memory_access_type,
68435eb2 1132 slp_instance instance,
2de001ee 1133 slp_tree slp_node,
68435eb2 1134 stmt_vector_for_cost *cost_vec)
ebfd146a 1135{
892a981f 1136 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
92345349 1137 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1139
68435eb2
RB
1140 gcc_assert (cost_vec);
1141
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1145
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1147 {
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
2c53b149 1150 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 /* Record the cost for the permutation. */
1153 unsigned n_perms;
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1156 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1157 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1158 slp_vf, instance, true,
1159 &n_perms);
1160 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1161 stmt_info, 0, vect_body);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
2c53b149 1164 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
68435eb2
RB
1165 bitmap_clear (perm);
1166 for (unsigned i = 0;
1167 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1168 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1169 ncopies = 0;
1170 bool load_seen = false;
2c53b149 1171 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
68435eb2
RB
1172 {
1173 if (i % assumed_nunits == 0)
1174 {
1175 if (load_seen)
1176 ncopies++;
1177 load_seen = false;
1178 }
1179 if (bitmap_bit_p (perm, i))
1180 load_seen = true;
1181 }
1182 if (load_seen)
1183 ncopies++;
1184 gcc_assert (ncopies
2c53b149 1185 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
68435eb2
RB
1186 + assumed_nunits - 1) / assumed_nunits);
1187 }
1188
892a981f
RS
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node && grouped_access_p)
57c454d2 1192 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1193
892a981f
RS
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1198
272c6793 1199 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1201 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1205 {
2c23db6d
ES
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
2c53b149 1208 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
ebfd146a 1212
73fbfcad 1213 if (dump_enabled_p ())
e645e942
TJ
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1216 group_size);
ebfd146a
IR
1217 }
1218
1219 /* The loads themselves. */
067bc855
RB
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1222 {
a21892ad
BS
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1225 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1226 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1227 ncopies * assumed_nunits,
92345349 1228 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1229 }
1230 else
57c454d2 1231 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1232 &inside_cost, &prologue_cost,
68435eb2 1233 cost_vec, cost_vec, true);
2de001ee
RS
1234 if (memory_access_type == VMAT_ELEMENTWISE
1235 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1236 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1237 stmt_info, 0, vect_body);
720f5239 1238
73fbfcad 1239 if (dump_enabled_p ())
78c60e3d
SS
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "vect_model_load_cost: inside_cost = %d, "
e645e942 1242 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1243}
1244
1245
1246/* Calculate cost of DR's memory access. */
1247void
57c454d2 1248vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1249 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1250 unsigned int *prologue_cost,
1251 stmt_vector_for_cost *prologue_cost_vec,
1252 stmt_vector_for_cost *body_cost_vec,
1253 bool record_prologue_costs)
720f5239 1254{
57c454d2 1255 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1256 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1257
1258 switch (alignment_support_scheme)
ebfd146a
IR
1259 {
1260 case dr_aligned:
1261 {
92345349
BS
1262 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1263 stmt_info, 0, vect_body);
ebfd146a 1264
73fbfcad 1265 if (dump_enabled_p ())
78c60e3d 1266 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1267 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1268
1269 break;
1270 }
1271 case dr_unaligned_supported:
1272 {
720f5239 1273 /* Here, we assign an additional cost for the unaligned load. */
92345349 1274 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1275 unaligned_load, stmt_info,
92345349 1276 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1277
73fbfcad 1278 if (dump_enabled_p ())
78c60e3d
SS
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "vect_model_load_cost: unaligned supported by "
e645e942 1281 "hardware.\n");
ebfd146a
IR
1282
1283 break;
1284 }
1285 case dr_explicit_realign:
1286 {
92345349
BS
1287 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1288 vector_load, stmt_info, 0, vect_body);
1289 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1290 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1291
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
92345349 1294 prologue costs. */
ebfd146a 1295 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1296 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1297 stmt_info, 0, vect_body);
ebfd146a 1298
73fbfcad 1299 if (dump_enabled_p ())
e645e942
TJ
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "vect_model_load_cost: explicit realign\n");
8bd37302 1302
ebfd146a
IR
1303 break;
1304 }
1305 case dr_explicit_realign_optimized:
1306 {
73fbfcad 1307 if (dump_enabled_p ())
e645e942 1308 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1309 "vect_model_load_cost: unaligned software "
e645e942 1310 "pipelined.\n");
ebfd146a
IR
1311
1312 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1313 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1314 if this is an access in a group of loads, which provide grouped
ebfd146a 1315 access, then the above cost should only be considered for one
ff802fa1 1316 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1317 and a realignment op. */
1318
92345349 1319 if (add_realign_cost && record_prologue_costs)
ebfd146a 1320 {
92345349
BS
1321 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1322 vector_stmt, stmt_info,
1323 0, vect_prologue);
ebfd146a 1324 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1325 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1326 vector_stmt, stmt_info,
1327 0, vect_prologue);
ebfd146a
IR
1328 }
1329
92345349
BS
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1331 stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1333 stmt_info, 0, vect_body);
8bd37302 1334
73fbfcad 1335 if (dump_enabled_p ())
78c60e3d 1336 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1337 "vect_model_load_cost: explicit realign optimized"
1338 "\n");
8bd37302 1339
ebfd146a
IR
1340 break;
1341 }
1342
38eec4c6
UW
1343 case dr_unaligned_unsupported:
1344 {
1345 *inside_cost = VECT_MAX_COST;
1346
73fbfcad 1347 if (dump_enabled_p ())
78c60e3d 1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1349 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1350 break;
1351 }
1352
ebfd146a
IR
1353 default:
1354 gcc_unreachable ();
1355 }
ebfd146a
IR
1356}
1357
418b7df3
RG
1358/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1360
418b7df3 1361static void
355fe088 1362vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1363{
ebfd146a 1364 if (gsi)
418b7df3 1365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1366 else
1367 {
418b7df3 1368 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1370
a70d6342
IR
1371 if (loop_vinfo)
1372 {
1373 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1374 basic_block new_bb;
1375 edge pe;
a70d6342
IR
1376
1377 if (nested_in_vect_loop_p (loop, stmt))
1378 loop = loop->inner;
b8698a0f 1379
a70d6342 1380 pe = loop_preheader_edge (loop);
418b7df3 1381 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1382 gcc_assert (!new_bb);
1383 }
1384 else
1385 {
1386 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1387 basic_block bb;
1388 gimple_stmt_iterator gsi_bb_start;
1389
1390 gcc_assert (bb_vinfo);
1391 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1392 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1393 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1394 }
ebfd146a
IR
1395 }
1396
73fbfcad 1397 if (dump_enabled_p ())
ebfd146a 1398 {
78c60e3d
SS
1399 dump_printf_loc (MSG_NOTE, vect_location,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1402 }
418b7df3
RG
1403}
1404
1405/* Function vect_init_vector.
ebfd146a 1406
5467ee52
RG
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
418b7df3
RG
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1414
1415tree
355fe088 1416vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1417{
355fe088 1418 gimple *init_stmt;
418b7df3
RG
1419 tree new_temp;
1420
e412ece4
RB
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1423 {
e412ece4
RB
1424 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1425 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1426 {
5a308cf1
IE
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type))
1430 {
b3d51f23
IE
1431 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1432 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1433
1434 if (CONSTANT_CLASS_P (val))
1435 val = integer_zerop (val) ? false_val : true_val;
1436 else
1437 {
1438 new_temp = make_ssa_name (TREE_TYPE (type));
1439 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1440 val, true_val, false_val);
1441 vect_init_vector_1 (stmt, init_stmt, gsi);
1442 val = new_temp;
1443 }
1444 }
1445 else if (CONSTANT_CLASS_P (val))
42fd8198 1446 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1447 else
1448 {
b731b390 1449 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1451 init_stmt = gimple_build_assign (new_temp,
1452 fold_build1 (VIEW_CONVERT_EXPR,
1453 TREE_TYPE (type),
1454 val));
1455 else
1456 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1457 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1458 val = new_temp;
418b7df3
RG
1459 }
1460 }
5467ee52 1461 val = build_vector_from_val (type, val);
418b7df3
RG
1462 }
1463
0e22bb5a
RB
1464 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1465 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1466 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1467 return new_temp;
ebfd146a
IR
1468}
1469
c83a894c 1470/* Function vect_get_vec_def_for_operand_1.
a70d6342 1471
c83a894c
AH
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1474
1475tree
c83a894c 1476vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1477{
1478 tree vec_oprnd;
355fe088 1479 gimple *vec_stmt;
ebfd146a 1480 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1481
1482 switch (dt)
1483 {
81c40241 1484 /* operand is a constant or a loop invariant. */
ebfd146a 1485 case vect_constant_def:
81c40241 1486 case vect_external_def:
c83a894c
AH
1487 /* Code should use vect_get_vec_def_for_operand. */
1488 gcc_unreachable ();
ebfd146a 1489
81c40241 1490 /* operand is defined inside the loop. */
8644a673 1491 case vect_internal_def:
ebfd146a 1492 {
ebfd146a
IR
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1495
ebfd146a 1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1497 /* Get vectorized pattern statement. */
1498 if (!vec_stmt
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info))
1501 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1503 gcc_assert (vec_stmt);
1504 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1505 vec_oprnd = PHI_RESULT (vec_stmt);
1506 else if (is_gimple_call (vec_stmt))
1507 vec_oprnd = gimple_call_lhs (vec_stmt);
1508 else
1509 vec_oprnd = gimple_assign_lhs (vec_stmt);
1510 return vec_oprnd;
1511 }
1512
c78e3652 1513 /* operand is defined by a loop header phi. */
ebfd146a 1514 case vect_reduction_def:
06066f92 1515 case vect_double_reduction_def:
7c5222ff 1516 case vect_nested_cycle:
ebfd146a
IR
1517 case vect_induction_def:
1518 {
1519 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1520
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info = vinfo_for_stmt (def_stmt);
1523 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1524 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1525 vec_oprnd = PHI_RESULT (vec_stmt);
1526 else
1527 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1528 return vec_oprnd;
1529 }
1530
1531 default:
1532 gcc_unreachable ();
1533 }
1534}
1535
1536
c83a894c
AH
1537/* Function vect_get_vec_def_for_operand.
1538
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1541
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1544
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1548
1549tree
1550vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1551{
1552 gimple *def_stmt;
1553 enum vect_def_type dt;
1554 bool is_simple_use;
1555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1557
1558 if (dump_enabled_p ())
1559 {
1560 dump_printf_loc (MSG_NOTE, vect_location,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1563 dump_printf (MSG_NOTE, "\n");
1564 }
1565
894dd753 1566 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, &def_stmt);
c83a894c
AH
1567 gcc_assert (is_simple_use);
1568 if (def_stmt && dump_enabled_p ())
1569 {
1570 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1572 }
1573
1574 if (dt == vect_constant_def || dt == vect_external_def)
1575 {
1576 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1577 tree vector_type;
1578
1579 if (vectype)
1580 vector_type = vectype;
2568d8a1 1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1583 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1584 else
1585 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1586
1587 gcc_assert (vector_type);
1588 return vect_init_vector (stmt, op, vector_type, NULL);
1589 }
1590 else
1591 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1592}
1593
1594
ebfd146a
IR
1595/* Function vect_get_vec_def_for_stmt_copy
1596
ff802fa1 1597 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1600 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1602 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1603 DT is the type of the vector def VEC_OPRND.
1604
1605 Context:
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1608 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1609 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1613 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1616
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1618
ebfd146a
IR
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1622 VS1.3: vx.3 = memref3
ebfd146a
IR
1623
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1628
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
b8698a0f
L
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1633 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1634 returns the vector-def 'vx.0'.
1635
b8698a0f
L
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1640
b8698a0f
L
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1650
1651tree
1652vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1653{
355fe088 1654 gimple *vec_stmt_for_operand;
ebfd146a
IR
1655 stmt_vec_info def_stmt_info;
1656
1657 /* Do nothing; can reuse same def. */
8644a673 1658 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1659 return vec_oprnd;
1660
1661 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1662 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1663 gcc_assert (def_stmt_info);
1664 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1665 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1666 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1667 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1668 else
1669 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1670 return vec_oprnd;
1671}
1672
1673
1674/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1676
c78e3652 1677void
b8698a0f 1678vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1679 vec<tree> *vec_oprnds0,
1680 vec<tree> *vec_oprnds1)
ebfd146a 1681{
9771b263 1682 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1683
1684 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1685 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1686
9771b263 1687 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1688 {
9771b263 1689 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1691 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1692 }
1693}
1694
1695
c78e3652 1696/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1697
c78e3652 1698void
355fe088 1699vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1700 vec<tree> *vec_oprnds0,
1701 vec<tree> *vec_oprnds1,
306b0c92 1702 slp_tree slp_node)
ebfd146a
IR
1703{
1704 if (slp_node)
d092494c
IR
1705 {
1706 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1707 auto_vec<tree> ops (nops);
1708 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1709
9771b263 1710 ops.quick_push (op0);
d092494c 1711 if (op1)
9771b263 1712 ops.quick_push (op1);
d092494c 1713
306b0c92 1714 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1715
37b5ec8f 1716 *vec_oprnds0 = vec_defs[0];
d092494c 1717 if (op1)
37b5ec8f 1718 *vec_oprnds1 = vec_defs[1];
d092494c 1719 }
ebfd146a
IR
1720 else
1721 {
1722 tree vec_oprnd;
1723
9771b263 1724 vec_oprnds0->create (1);
81c40241 1725 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1726 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1727
1728 if (op1)
1729 {
9771b263 1730 vec_oprnds1->create (1);
81c40241 1731 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1732 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1733 }
1734 }
1735}
1736
bb6c2b68
RS
1737/* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1740
1741static void
1742vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1743{
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746
1747 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1748
1749 if (dump_enabled_p ())
1750 {
1751 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1753 }
1754
1755 gimple_set_location (vec_stmt, gimple_location (stmt));
1756
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr = lookup_stmt_eh_lp (stmt);
1761 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1762 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1763}
1764
1765/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1767
1768void
1769vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1770{
1771 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1772
1773 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1774 gsi_replace (&gsi, vec_stmt, false);
1775
1776 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1777}
ebfd146a
IR
1778
1779/* Function vect_finish_stmt_generation.
1780
1781 Insert a new stmt. */
1782
1783void
355fe088 1784vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1785 gimple_stmt_iterator *gsi)
1786{
ebfd146a
IR
1787 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1788
54e8e2c3
RG
1789 if (!gsi_end_p (*gsi)
1790 && gimple_has_mem_ops (vec_stmt))
1791 {
355fe088 1792 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1793 tree vuse = gimple_vuse (at_stmt);
1794 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1795 {
1796 tree vdef = gimple_vdef (at_stmt);
1797 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1803 && ((is_gimple_assign (vec_stmt)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1805 || (is_gimple_call (vec_stmt)
1806 && !(gimple_call_flags (vec_stmt)
1807 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1808 {
1809 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1810 gimple_set_vdef (vec_stmt, new_vdef);
1811 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1812 }
1813 }
1814 }
ebfd146a 1815 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1816 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1817}
1818
70439f0d
RS
1819/* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1823
70439f0d
RS
1824static internal_fn
1825vectorizable_internal_function (combined_fn cfn, tree fndecl,
1826 tree vectype_out, tree vectype_in)
ebfd146a 1827{
70439f0d
RS
1828 internal_fn ifn;
1829 if (internal_fn_p (cfn))
1830 ifn = as_internal_fn (cfn);
1831 else
1832 ifn = associated_internal_fn (fndecl);
1833 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1834 {
1835 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1836 if (info.vectorizable)
1837 {
1838 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1839 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1840 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1841 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1842 return ifn;
1843 }
1844 }
1845 return IFN_LAST;
ebfd146a
IR
1846}
1847
5ce9450f 1848
355fe088 1849static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1850 gimple_stmt_iterator *);
1851
7cfb4d93
RS
1852/* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1856
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
bfaa08b7
RS
1861 If the access is a gather load or scatter store, GS_INFO describes
1862 its arguments.
7cfb4d93
RS
1863
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1866
1867static void
1868check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1869 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1870 vect_memory_access_type memory_access_type,
1871 gather_scatter_info *gs_info)
7cfb4d93
RS
1872{
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type == VMAT_INVARIANT)
1875 return;
1876
1877 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1878 machine_mode vecmode = TYPE_MODE (vectype);
1879 bool is_load = (vls_type == VLS_LOAD);
1880 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1881 {
1882 if (is_load
1883 ? !vect_load_lanes_supported (vectype, group_size, true)
1884 : !vect_store_lanes_supported (vectype, group_size, true))
1885 {
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1892 return;
1893 }
1894 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1895 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1896 return;
1897 }
1898
bfaa08b7
RS
1899 if (memory_access_type == VMAT_GATHER_SCATTER)
1900 {
f307441a
RS
1901 internal_fn ifn = (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1904 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1905 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1906 gs_info->memory_type,
1907 TYPE_SIGN (offset_type),
1908 gs_info->scale))
1909 {
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
f307441a 1914 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1916 return;
1917 }
1918 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1919 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1920 return;
1921 }
1922
7cfb4d93
RS
1923 if (memory_access_type != VMAT_CONTIGUOUS
1924 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1925 {
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1933 return;
1934 }
1935
1936 machine_mode mask_mode;
1937 if (!(targetm.vectorize.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode),
1939 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1940 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1941 {
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1946 " store.\n");
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1948 return;
1949 }
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1954 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1955 unsigned int nvectors;
1956 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1957 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1958 else
1959 gcc_unreachable ();
1960}
1961
1962/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1966
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1969
1970static tree
1971prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1972 gimple_stmt_iterator *gsi)
1973{
1974 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1975 if (!loop_mask)
1976 return vec_mask;
1977
1978 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1979 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1980 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1981 vec_mask, loop_mask);
1982 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1983 return and_res;
1984}
1985
429ef523
RS
1986/* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1989
1990 { 0, X, X*2, X*3, ... }
1991
1992 without loss of precision, where X is STMT's DR_STEP.
1993
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1996
1997static bool
1998vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1999 bool masked_p,
2000 gather_scatter_info *gs_info)
2001{
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2004 tree step = DR_STEP (dr);
2005 if (TREE_CODE (step) != INTEGER_CST)
2006 {
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE, vect_location,
2010 "cannot truncate variable step.\n");
2011 return false;
2012 }
2013
2014 /* Get the number of bits in an element. */
2015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2016 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2017 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2018
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2022
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2025 widest_int max_iters;
2026 if (max_loop_iterations (loop, &max_iters)
2027 && max_iters < count)
2028 count = max_iters.to_shwi ();
2029
2030 /* Try scales of 1 and the element size. */
2031 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2032 bool overflow_p = false;
2033 for (int i = 0; i < 2; ++i)
2034 {
2035 int scale = scales[i];
2036 widest_int factor;
2037 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2038 continue;
2039
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
2042 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2043 if (overflow_p)
2044 continue;
2045 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2046 if (wi::min_precision (range, sign) > element_bits)
2047 {
2048 overflow_p = true;
2049 continue;
2050 }
2051
2052 /* See whether the target supports the operation. */
2053 tree memory_type = TREE_TYPE (DR_REF (dr));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2055 memory_type, element_bits, sign, scale,
2056 &gs_info->ifn, &gs_info->element_type))
2057 continue;
2058
2059 tree offset_type = build_nonstandard_integer_type (element_bits,
2060 sign == UNSIGNED);
2061
2062 gs_info->decl = NULL_TREE;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info->base = NULL_TREE;
2066 gs_info->offset = fold_convert (offset_type, step);
929b4411 2067 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2068 gs_info->offset_vectype = NULL_TREE;
2069 gs_info->scale = scale;
2070 gs_info->memory_type = memory_type;
2071 return true;
2072 }
2073
2074 if (overflow_p && dump_enabled_p ())
2075 dump_printf_loc (MSG_NOTE, vect_location,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits);
2078
2079 return false;
2080}
2081
ab2fc782
RS
2082/* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2086 operation. */
ab2fc782
RS
2087
2088static bool
2089vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2090 bool masked_p,
ab2fc782
RS
2091 gather_scatter_info *gs_info)
2092{
2093 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2094 || gs_info->decl)
429ef523
RS
2095 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2096 masked_p, gs_info);
ab2fc782
RS
2097
2098 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2099 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2100 tree offset_type = TREE_TYPE (gs_info->offset);
2101 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2102
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits >= offset_bits);
2105
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits > offset_bits)
2109 {
2110 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2111 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2112 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2113 }
2114
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE, vect_location,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info->scale);
2119
2120 return true;
2121}
2122
62da9e14
RS
2123/* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2126
2127static int
2128compare_step_with_zero (gimple *stmt)
2129{
2130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2131 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2132 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2133 size_zero_node);
62da9e14
RS
2134}
2135
2136/* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2138
2139static tree
2140perm_mask_for_reverse (tree vectype)
2141{
928686b1 2142 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2143
d980067b
RS
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder sel (nunits, 1, 3);
928686b1 2146 for (int i = 0; i < 3; ++i)
908a1a16 2147 sel.quick_push (nunits - 1 - i);
62da9e14 2148
e3342de4
RS
2149 vec_perm_indices indices (sel, 1, nunits);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2151 return NULL_TREE;
e3342de4 2152 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2153}
5ce9450f 2154
c3a8f964
RS
2155/* STMT is either a masked or unconditional store. Return the value
2156 being stored. */
2157
f307441a 2158tree
c3a8f964
RS
2159vect_get_store_rhs (gimple *stmt)
2160{
2161 if (gassign *assign = dyn_cast <gassign *> (stmt))
2162 {
2163 gcc_assert (gimple_assign_single_p (assign));
2164 return gimple_assign_rhs1 (assign);
2165 }
2166 if (gcall *call = dyn_cast <gcall *> (stmt))
2167 {
2168 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2169 int index = internal_fn_stored_value_index (ifn);
2170 gcc_assert (index >= 0);
2171 return gimple_call_arg (stmt, index);
c3a8f964
RS
2172 }
2173 gcc_unreachable ();
2174}
2175
2de001ee
RS
2176/* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2178 or store.
2179
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2184
2185static bool
2186get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2187 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2188 vect_memory_access_type *memory_access_type,
2189 gather_scatter_info *gs_info)
2de001ee
RS
2190{
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 vec_info *vinfo = stmt_info->vinfo;
2193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2194 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2c53b149 2195 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2196 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 2197 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2de001ee 2198 bool single_element_p = (stmt == first_stmt
2c53b149
RB
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2200 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2202
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p = false;
2206
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2209 bool can_overrun_p = (!masked_p
2210 && vls_type == VLS_LOAD
2211 && loop_vinfo
2212 && !loop->inner);
2de001ee
RS
2213
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2217
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2220
2221 if (slp)
2222 {
2223 if (STMT_VINFO_STRIDED_P (stmt_info))
2224 {
2c53b149 2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
928686b1 2228 if (multiple_p (nunits, group_size))
2de001ee
RS
2229 *memory_access_type = VMAT_STRIDED_SLP;
2230 else
2231 *memory_access_type = VMAT_ELEMENTWISE;
2232 }
2233 else
2234 {
2235 overrun_p = loop_vinfo && gap != 0;
2236 if (overrun_p && vls_type != VLS_LOAD)
2237 {
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2241 return false;
2242 }
f702e7d4
RS
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
f9ef2c76 2247 if (overrun_p
f702e7d4
RS
2248 && gap < (vect_known_alignment_in_bytes (first_dr)
2249 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2250 overrun_p = false;
2de001ee
RS
2251 if (overrun_p && !can_overrun_p)
2252 {
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "Peeling for outer loop is not supported\n");
2256 return false;
2257 }
2258 *memory_access_type = VMAT_CONTIGUOUS;
2259 }
2260 }
2261 else
2262 {
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type = VMAT_ELEMENTWISE;
2266
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
f9ef2c76 2274 if (would_overrun_p
7e11fc7f 2275 && !masked_p
f702e7d4
RS
2276 && gap < (vect_known_alignment_in_bytes (first_dr)
2277 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2278 would_overrun_p = false;
f702e7d4 2279
2de001ee 2280 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2281 && (can_overrun_p || !would_overrun_p)
2282 && compare_step_with_zero (stmt) > 0)
2de001ee 2283 {
6737facb
RS
2284 /* First cope with the degenerate case of a single-element
2285 vector. */
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2287 *memory_access_type = VMAT_CONTIGUOUS;
2288
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type == VMAT_ELEMENTWISE
2291 && (vls_type == VLS_LOAD
7e11fc7f
RS
2292 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2293 : vect_store_lanes_supported (vectype, group_size,
2294 masked_p)))
2de001ee
RS
2295 {
2296 *memory_access_type = VMAT_LOAD_STORE_LANES;
2297 overrun_p = would_overrun_p;
2298 }
2299
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type == VMAT_ELEMENTWISE
2302 && (vls_type == VLS_LOAD
2303 ? vect_grouped_load_supported (vectype, single_element_p,
2304 group_size)
2305 : vect_grouped_store_supported (vectype, group_size)))
2306 {
2307 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2308 overrun_p = would_overrun_p;
2309 }
2310 }
429ef523
RS
2311
2312 /* As a last resort, trying using a gather load or scatter store.
2313
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type == VMAT_ELEMENTWISE
2320 && single_element_p
2321 && loop_vinfo
2322 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2323 masked_p, gs_info))
2324 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2325 }
2326
2327 if (vls_type != VLS_LOAD && first_stmt == stmt)
2328 {
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2c53b149 2331 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2de001ee
RS
2332 while (next_stmt)
2333 {
7e11fc7f 2334 tree op = vect_get_store_rhs (next_stmt);
2de001ee 2335 enum vect_def_type dt;
894dd753 2336 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2337 {
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2340 "use not simple.\n");
2341 return false;
2342 }
2c53b149 2343 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2de001ee
RS
2344 }
2345 }
2346
2347 if (overrun_p)
2348 {
2349 gcc_assert (can_overrun_p);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "Data access with gaps requires scalar "
2353 "epilogue loop\n");
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2355 }
2356
2357 return true;
2358}
2359
62da9e14
RS
2360/* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2363
2364static vect_memory_access_type
2365get_negative_load_store_type (gimple *stmt, tree vectype,
2366 vec_load_store_type vls_type,
2367 unsigned int ncopies)
2368{
2369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2370 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2371 dr_alignment_support alignment_support_scheme;
2372
2373 if (ncopies > 1)
2374 {
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE;
2379 }
2380
2381 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2382 if (alignment_support_scheme != dr_aligned
2383 && alignment_support_scheme != dr_unaligned_supported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE;
2389 }
2390
2391 if (vls_type == VLS_STORE_INVARIANT)
2392 {
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE, vect_location,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN;
2398 }
2399
2400 if (!perm_mask_for_reverse (vectype))
2401 {
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE;
2406 }
2407
2408 return VMAT_CONTIGUOUS_REVERSE;
2409}
2410
2de001ee
RS
2411/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2415
2416 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2417 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2420
2421static bool
7e11fc7f 2422get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2423 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2424 vect_memory_access_type *memory_access_type,
2425 gather_scatter_info *gs_info)
2426{
2427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2428 vec_info *vinfo = stmt_info->vinfo;
2429 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2430 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2432 {
2433 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2434 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2435 gcc_unreachable ();
894dd753 2436 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2437 &gs_info->offset_dt,
2438 &gs_info->offset_vectype))
2439 {
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "%s index use not simple.\n",
2443 vls_type == VLS_LOAD ? "gather" : "scatter");
2444 return false;
2445 }
2446 }
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2448 {
7e11fc7f 2449 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2450 memory_access_type, gs_info))
2de001ee
RS
2451 return false;
2452 }
2453 else if (STMT_VINFO_STRIDED_P (stmt_info))
2454 {
2455 gcc_assert (!slp);
ab2fc782 2456 if (loop_vinfo
429ef523
RS
2457 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2458 masked_p, gs_info))
ab2fc782
RS
2459 *memory_access_type = VMAT_GATHER_SCATTER;
2460 else
2461 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2462 }
2463 else
62da9e14
RS
2464 {
2465 int cmp = compare_step_with_zero (stmt);
2466 if (cmp < 0)
2467 *memory_access_type = get_negative_load_store_type
2468 (stmt, vectype, vls_type, ncopies);
2469 else if (cmp == 0)
2470 {
2471 gcc_assert (vls_type == VLS_LOAD);
2472 *memory_access_type = VMAT_INVARIANT;
2473 }
2474 else
2475 *memory_access_type = VMAT_CONTIGUOUS;
2476 }
2de001ee 2477
4d694b27
RS
2478 if ((*memory_access_type == VMAT_ELEMENTWISE
2479 || *memory_access_type == VMAT_STRIDED_SLP)
2480 && !nunits.is_constant ())
2481 {
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2486 return false;
2487 }
2488
2de001ee
RS
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2493 && !STMT_VINFO_STRIDED_P (stmt_info)
2c53b149
RB
2494 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2497 {
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "not falling back to elementwise accesses\n");
2501 return false;
2502 }
2503 return true;
2504}
2505
aaeefd88 2506/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2510
2511static bool
929b4411
RS
2512vect_check_load_store_mask (gimple *stmt, tree mask,
2513 vect_def_type *mask_dt_out,
2514 tree *mask_vectype_out)
aaeefd88
RS
2515{
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2517 {
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 "mask argument is not a boolean.\n");
2521 return false;
2522 }
2523
2524 if (TREE_CODE (mask) != SSA_NAME)
2525 {
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not an SSA name.\n");
2529 return false;
2530 }
2531
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2533 enum vect_def_type mask_dt;
aaeefd88 2534 tree mask_vectype;
894dd753 2535 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2536 {
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "mask use not simple.\n");
2540 return false;
2541 }
2542
2543 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2544 if (!mask_vectype)
2545 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2546
2547 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2548 {
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "could not find an appropriate vector mask type.\n");
2552 return false;
2553 }
2554
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2556 TYPE_VECTOR_SUBPARTS (vectype)))
2557 {
2558 if (dump_enabled_p ())
2559 {
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2563 dump_printf (MSG_MISSED_OPTIMIZATION,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2566 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2567 }
2568 return false;
2569 }
2570
929b4411 2571 *mask_dt_out = mask_dt;
aaeefd88
RS
2572 *mask_vectype_out = mask_vectype;
2573 return true;
2574}
2575
3133c3b6
RS
2576/* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
929b4411
RS
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2580
2581static bool
929b4411
RS
2582vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2583 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2584{
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2588 {
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "cannot encode constant as a byte sequence.\n");
2592 return false;
2593 }
2594
2595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2596 enum vect_def_type rhs_dt;
3133c3b6 2597 tree rhs_vectype;
894dd753 2598 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2599 {
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "use not simple.\n");
2603 return false;
2604 }
2605
2606 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2607 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2608 {
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "incompatible vector types.\n");
2612 return false;
2613 }
2614
929b4411 2615 *rhs_dt_out = rhs_dt;
3133c3b6 2616 *rhs_vectype_out = rhs_vectype;
929b4411 2617 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2618 *vls_type_out = VLS_STORE_INVARIANT;
2619 else
2620 *vls_type_out = VLS_STORE;
2621 return true;
2622}
2623
bc9587eb
RS
2624/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2627
2628static tree
2629vect_build_all_ones_mask (gimple *stmt, tree masktype)
2630{
2631 if (TREE_CODE (masktype) == INTEGER_TYPE)
2632 return build_int_cst (masktype, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2634 {
2635 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2636 mask = build_vector_from_val (masktype, mask);
2637 return vect_init_vector (stmt, mask, masktype, NULL);
2638 }
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2640 {
2641 REAL_VALUE_TYPE r;
2642 long tmp[6];
2643 for (int j = 0; j < 6; ++j)
2644 tmp[j] = -1;
2645 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2646 tree mask = build_real (TREE_TYPE (masktype), r);
2647 mask = build_vector_from_val (masktype, mask);
2648 return vect_init_vector (stmt, mask, masktype, NULL);
2649 }
2650 gcc_unreachable ();
2651}
2652
2653/* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2655
2656static tree
2657vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2658{
2659 tree merge;
2660 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2661 merge = build_int_cst (TREE_TYPE (vectype), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2663 {
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = 0;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2669 merge = build_real (TREE_TYPE (vectype), r);
2670 }
2671 else
2672 gcc_unreachable ();
2673 merge = build_vector_from_val (vectype, merge);
2674 return vect_init_vector (stmt, merge, vectype, NULL);
2675}
2676
c48d2d35
RS
2677/* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
929b4411 2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2681
2682static void
2683vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2684 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2685 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2686{
2687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2688 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2689 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2691 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2692 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2693 edge pe = loop_preheader_edge (loop);
2694 enum { NARROW, NONE, WIDEN } modifier;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2697
2698 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2699 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2700 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree scaletype = TREE_VALUE (arglist);
2705 gcc_checking_assert (types_compatible_p (srctype, rettype)
2706 && (!mask || types_compatible_p (srctype, masktype)));
2707
2708 tree perm_mask = NULL_TREE;
2709 tree mask_perm_mask = NULL_TREE;
2710 if (known_eq (nunits, gather_off_nunits))
2711 modifier = NONE;
2712 else if (known_eq (nunits * 2, gather_off_nunits))
2713 {
2714 modifier = WIDEN;
2715
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count = gather_off_nunits.to_constant ();
2719 vec_perm_builder sel (count, count, 1);
2720 for (int i = 0; i < count; ++i)
2721 sel.quick_push (i | (count / 2));
2722
2723 vec_perm_indices indices (sel, 1, count);
2724 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2725 indices);
2726 }
2727 else if (known_eq (nunits, gather_off_nunits * 2))
2728 {
2729 modifier = NARROW;
2730
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count = nunits.to_constant ();
2734 vec_perm_builder sel (count, count, 1);
2735 sel.quick_grow (count);
2736 for (int i = 0; i < count; ++i)
2737 sel[i] = i < count / 2 ? i : i + count / 2;
2738 vec_perm_indices indices (sel, 2, count);
2739 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2740
2741 ncopies *= 2;
2742
2743 if (mask)
2744 {
2745 for (int i = 0; i < count; ++i)
2746 sel[i] = i | (count / 2);
2747 indices.new_vector (sel, 2, count);
2748 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2749 }
2750 }
2751 else
2752 gcc_unreachable ();
2753
2754 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2755 vectype);
2756
2757 tree ptr = fold_convert (ptrtype, gs_info->base);
2758 if (!is_gimple_min_invariant (ptr))
2759 {
2760 gimple_seq seq;
2761 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2762 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2763 gcc_assert (!new_bb);
2764 }
2765
2766 tree scale = build_int_cst (scaletype, gs_info->scale);
2767
2768 tree vec_oprnd0 = NULL_TREE;
2769 tree vec_mask = NULL_TREE;
2770 tree src_op = NULL_TREE;
2771 tree mask_op = NULL_TREE;
2772 tree prev_res = NULL_TREE;
2773 stmt_vec_info prev_stmt_info = NULL;
2774
2775 if (!mask)
2776 {
2777 src_op = vect_build_zero_merge_argument (stmt, rettype);
2778 mask_op = vect_build_all_ones_mask (stmt, masktype);
2779 }
2780
2781 for (int j = 0; j < ncopies; ++j)
2782 {
2783 tree op, var;
2784 gimple *new_stmt;
2785 if (modifier == WIDEN && (j & 1))
2786 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2787 perm_mask, stmt, gsi);
2788 else if (j == 0)
2789 op = vec_oprnd0
2790 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2791 else
2792 op = vec_oprnd0
2793 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2794
2795 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2796 {
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2798 TYPE_VECTOR_SUBPARTS (idxtype)));
2799 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2800 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2801 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2803 op = var;
2804 }
2805
2806 if (mask)
2807 {
2808 if (mask_perm_mask && (j & 1))
2809 mask_op = permute_vec_elements (mask_op, mask_op,
2810 mask_perm_mask, stmt, gsi);
2811 else
2812 {
2813 if (j == 0)
2814 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2815 else
929b4411 2816 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2817
2818 mask_op = vec_mask;
2819 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2820 {
2821 gcc_assert
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2823 TYPE_VECTOR_SUBPARTS (masktype)));
2824 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2825 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2826 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2827 mask_op);
2828 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2829 mask_op = var;
2830 }
2831 }
2832 src_op = mask_op;
2833 }
2834
2835 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2836 mask_op, scale);
2837
2838 if (!useless_type_conversion_p (vectype, rettype))
2839 {
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2841 TYPE_VECTOR_SUBPARTS (rettype)));
2842 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2843 gimple_call_set_lhs (new_stmt, op);
2844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2845 var = make_ssa_name (vec_dest);
2846 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2847 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2848 }
2849 else
2850 {
2851 var = make_ssa_name (vec_dest, new_stmt);
2852 gimple_call_set_lhs (new_stmt, var);
2853 }
2854
2855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2856
2857 if (modifier == NARROW)
2858 {
2859 if ((j & 1) == 0)
2860 {
2861 prev_res = var;
2862 continue;
2863 }
2864 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2865 new_stmt = SSA_NAME_DEF_STMT (var);
2866 }
2867
2868 if (prev_stmt_info == NULL)
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2870 else
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2872 prev_stmt_info = vinfo_for_stmt (new_stmt);
2873 }
2874}
2875
bfaa08b7
RS
2876/* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2880
2881static void
2882vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2883 gather_scatter_info *gs_info,
2884 tree *dataref_ptr, tree *vec_offset)
2885{
2886 gimple_seq stmts = NULL;
2887 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2888 if (stmts != NULL)
2889 {
2890 basic_block new_bb;
2891 edge pe = loop_preheader_edge (loop);
2892 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2893 gcc_assert (!new_bb);
2894 }
2895 tree offset_type = TREE_TYPE (gs_info->offset);
2896 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2897 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2898 offset_vectype);
2899}
2900
ab2fc782
RS
2901/* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2904
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2909
2910static void
2911vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2912 gather_scatter_info *gs_info,
2913 tree *dataref_bump, tree *vec_offset)
2914{
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2918 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2919 gimple_seq stmts;
2920
2921 tree bump = size_binop (MULT_EXPR,
2922 fold_convert (sizetype, DR_STEP (dr)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2924 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2925 if (stmts)
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2927
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type = TREE_TYPE (gs_info->offset);
2931 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2932 offset_type = TREE_TYPE (offset_vectype);
2933
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2936 ssize_int (gs_info->scale));
2937 step = fold_convert (offset_type, step);
2938 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2939
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2942 build_zero_cst (offset_type), step);
2943 if (stmts)
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2945}
2946
2947/* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2950 vectorization. */
2951
2952static tree
2953vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2954 vect_memory_access_type memory_access_type)
2955{
2956 if (memory_access_type == VMAT_INVARIANT)
2957 return size_zero_node;
2958
2959 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2960 tree step = vect_dr_behavior (dr)->step;
2961 if (tree_int_cst_sgn (step) == -1)
2962 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2963 return iv_step;
2964}
2965
37b14185
RB
2966/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2967
2968static bool
2969vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2970 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2971 tree vectype_in, enum vect_def_type *dt,
2972 stmt_vector_for_cost *cost_vec)
37b14185
RB
2973{
2974 tree op, vectype;
2975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2977 unsigned ncopies;
2978 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2979
2980 op = gimple_call_arg (stmt, 0);
2981 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2982
2983 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2984 return false;
37b14185
RB
2985
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2988 case of SLP. */
2989 if (slp_node)
2990 ncopies = 1;
2991 else
e8f142e2 2992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2993
2994 gcc_assert (ncopies >= 1);
2995
2996 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2997 if (! char_vectype)
2998 return false;
2999
928686b1
RS
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3001 return false;
3002
794e3180 3003 unsigned word_bytes = num_bytes / nunits;
908a1a16 3004
d980067b
RS
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder elts (num_bytes, word_bytes, 3);
3007 for (unsigned i = 0; i < 3; ++i)
37b14185 3008 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3009 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3010
e3342de4
RS
3011 vec_perm_indices indices (elts, 1, num_bytes);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3013 return false;
3014
3015 if (! vec_stmt)
3016 {
3017 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3019 if (! slp_node)
37b14185 3020 {
68435eb2
RB
3021 record_stmt_cost (cost_vec,
3022 1, vector_stmt, stmt_info, 0, vect_prologue);
3023 record_stmt_cost (cost_vec,
3024 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3025 }
3026 return true;
3027 }
3028
736d0f28 3029 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3030
3031 /* Transform. */
3032 vec<tree> vec_oprnds = vNULL;
3033 gimple *new_stmt = NULL;
3034 stmt_vec_info prev_stmt_info = NULL;
3035 for (unsigned j = 0; j < ncopies; j++)
3036 {
3037 /* Handle uses. */
3038 if (j == 0)
306b0c92 3039 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3040 else
3041 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3042
3043 /* Arguments are ready. create the new vector stmt. */
3044 unsigned i;
3045 tree vop;
3046 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3047 {
3048 tree tem = make_ssa_name (char_vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 char_vectype, vop));
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 tree tem2 = make_ssa_name (char_vectype);
3053 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3054 tem, tem, bswap_vconst);
3055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3056 tem = make_ssa_name (vectype);
3057 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3058 vectype, tem2));
3059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3060 if (slp_node)
3061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3062 }
3063
3064 if (slp_node)
3065 continue;
3066
3067 if (j == 0)
3068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3069 else
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3071
3072 prev_stmt_info = vinfo_for_stmt (new_stmt);
3073 }
3074
3075 vec_oprnds.release ();
3076 return true;
3077}
3078
b1b6836e
RS
3079/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3082 *CONVERT_CODE. */
3083
3084static bool
3085simple_integer_narrowing (tree vectype_out, tree vectype_in,
3086 tree_code *convert_code)
3087{
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3090 return false;
3091
3092 tree_code code;
3093 int multi_step_cvt = 0;
3094 auto_vec <tree, 8> interm_types;
3095 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3096 &code, &multi_step_cvt,
3097 &interm_types)
3098 || multi_step_cvt)
3099 return false;
3100
3101 *convert_code = code;
3102 return true;
3103}
5ce9450f 3104
ebfd146a
IR
3105/* Function vectorizable_call.
3106
538dd0b7 3107 Check if GS performs a function call that can be vectorized.
b8698a0f 3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3111
3112static bool
355fe088 3113vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3114 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3115{
538dd0b7 3116 gcall *stmt;
ebfd146a
IR
3117 tree vec_dest;
3118 tree scalar_dest;
0267732b 3119 tree op;
ebfd146a 3120 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3121 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3122 tree vectype_out, vectype_in;
c7bda0f4
RS
3123 poly_uint64 nunits_in;
3124 poly_uint64 nunits_out;
ebfd146a 3125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3127 vec_info *vinfo = stmt_info->vinfo;
81c40241 3128 tree fndecl, new_temp, rhs_type;
0502fb85
UB
3129 enum vect_def_type dt[3]
3130 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3131 int ndts = 3;
355fe088 3132 gimple *new_stmt = NULL;
ebfd146a 3133 int ncopies, j;
6e1aa848 3134 vec<tree> vargs = vNULL;
ebfd146a
IR
3135 enum { NARROW, NONE, WIDEN } modifier;
3136 size_t i, nargs;
9d5e7640 3137 tree lhs;
ebfd146a 3138
190c2236 3139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3140 return false;
3141
66c16fd9
RB
3142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3143 && ! vec_stmt)
ebfd146a
IR
3144 return false;
3145
538dd0b7
DM
3146 /* Is GS a vectorizable call? */
3147 stmt = dyn_cast <gcall *> (gs);
3148 if (!stmt)
ebfd146a
IR
3149 return false;
3150
5ce9450f 3151 if (gimple_call_internal_p (stmt)
bfaa08b7 3152 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3153 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3154 /* Handled by vectorizable_load and vectorizable_store. */
3155 return false;
5ce9450f 3156
0136f8f0
AH
3157 if (gimple_call_lhs (stmt) == NULL_TREE
3158 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3159 return false;
3160
0136f8f0 3161 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3162
b690cc0f
RG
3163 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3164
ebfd146a
IR
3165 /* Process function arguments. */
3166 rhs_type = NULL_TREE;
b690cc0f 3167 vectype_in = NULL_TREE;
ebfd146a
IR
3168 nargs = gimple_call_num_args (stmt);
3169
1b1562a5
MM
3170 /* Bail out if the function has more than three arguments, we do not have
3171 interesting builtin functions to vectorize with more than two arguments
3172 except for fma. No arguments is also not good. */
3173 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3174 return false;
3175
74bf76ed
JJ
3176 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3177 if (gimple_call_internal_p (stmt)
3178 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3179 {
3180 nargs = 0;
3181 rhs_type = unsigned_type_node;
3182 }
3183
ebfd146a
IR
3184 for (i = 0; i < nargs; i++)
3185 {
b690cc0f
RG
3186 tree opvectype;
3187
ebfd146a
IR
3188 op = gimple_call_arg (stmt, i);
3189
3190 /* We can only handle calls with arguments of the same type. */
3191 if (rhs_type
8533c9d8 3192 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3193 {
73fbfcad 3194 if (dump_enabled_p ())
78c60e3d 3195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3196 "argument types differ.\n");
ebfd146a
IR
3197 return false;
3198 }
b690cc0f
RG
3199 if (!rhs_type)
3200 rhs_type = TREE_TYPE (op);
ebfd146a 3201
894dd753 3202 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
ebfd146a 3203 {
73fbfcad 3204 if (dump_enabled_p ())
78c60e3d 3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3206 "use not simple.\n");
ebfd146a
IR
3207 return false;
3208 }
ebfd146a 3209
b690cc0f
RG
3210 if (!vectype_in)
3211 vectype_in = opvectype;
3212 else if (opvectype
3213 && opvectype != vectype_in)
3214 {
73fbfcad 3215 if (dump_enabled_p ())
78c60e3d 3216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3217 "argument vector types differ.\n");
b690cc0f
RG
3218 return false;
3219 }
3220 }
3221 /* If all arguments are external or constant defs use a vector type with
3222 the same size as the output vector type. */
ebfd146a 3223 if (!vectype_in)
b690cc0f 3224 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3225 if (vec_stmt)
3226 gcc_assert (vectype_in);
3227 if (!vectype_in)
3228 {
73fbfcad 3229 if (dump_enabled_p ())
7d8930a0 3230 {
78c60e3d
SS
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "no vectype for scalar type ");
3233 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3234 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3235 }
3236
3237 return false;
3238 }
ebfd146a
IR
3239
3240 /* FORNOW */
b690cc0f
RG
3241 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3242 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3243 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3244 modifier = NARROW;
c7bda0f4 3245 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3246 modifier = NONE;
c7bda0f4 3247 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3248 modifier = WIDEN;
3249 else
3250 return false;
3251
70439f0d
RS
3252 /* We only handle functions that do not read or clobber memory. */
3253 if (gimple_vuse (stmt))
3254 {
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257 "function reads from or writes to memory.\n");
3258 return false;
3259 }
3260
ebfd146a
IR
3261 /* For now, we only vectorize functions if a target specific builtin
3262 is available. TODO -- in some cases, it might be profitable to
3263 insert the calls for pieces of the vector, in order to be able
3264 to vectorize other operations in the loop. */
70439f0d
RS
3265 fndecl = NULL_TREE;
3266 internal_fn ifn = IFN_LAST;
3267 combined_fn cfn = gimple_call_combined_fn (stmt);
3268 tree callee = gimple_call_fndecl (stmt);
3269
3270 /* First try using an internal function. */
b1b6836e
RS
3271 tree_code convert_code = ERROR_MARK;
3272 if (cfn != CFN_LAST
3273 && (modifier == NONE
3274 || (modifier == NARROW
3275 && simple_integer_narrowing (vectype_out, vectype_in,
3276 &convert_code))))
70439f0d
RS
3277 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3278 vectype_in);
3279
3280 /* If that fails, try asking for a target-specific built-in function. */
3281 if (ifn == IFN_LAST)
3282 {
3283 if (cfn != CFN_LAST)
3284 fndecl = targetm.vectorize.builtin_vectorized_function
3285 (cfn, vectype_out, vectype_in);
7672aa9b 3286 else if (callee)
70439f0d
RS
3287 fndecl = targetm.vectorize.builtin_md_vectorized_function
3288 (callee, vectype_out, vectype_in);
3289 }
3290
3291 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3292 {
70439f0d 3293 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3294 && !slp_node
3295 && loop_vinfo
3296 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3297 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3298 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3299 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3300 {
3301 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3302 { 0, 1, 2, ... vf - 1 } vector. */
3303 gcc_assert (nargs == 0);
3304 }
37b14185
RB
3305 else if (modifier == NONE
3306 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3307 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3309 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3310 vectype_in, dt, cost_vec);
74bf76ed
JJ
3311 else
3312 {
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3315 "function is not vectorizable.\n");
74bf76ed
JJ
3316 return false;
3317 }
ebfd146a
IR
3318 }
3319
fce57248 3320 if (slp_node)
190c2236 3321 ncopies = 1;
b1b6836e 3322 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3323 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3324 else
e8f142e2 3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3326
3327 /* Sanity check: make sure that at least one copy of the vectorized stmt
3328 needs to be generated. */
3329 gcc_assert (ncopies >= 1);
3330
3331 if (!vec_stmt) /* transformation not required. */
3332 {
3333 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3334 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3335 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3336 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3337 record_stmt_cost (cost_vec, ncopies / 2,
3338 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3339
ebfd146a
IR
3340 return true;
3341 }
3342
67b8dbac 3343 /* Transform. */
ebfd146a 3344
73fbfcad 3345 if (dump_enabled_p ())
e645e942 3346 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3347
3348 /* Handle def. */
3349 scalar_dest = gimple_call_lhs (stmt);
3350 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3351
3352 prev_stmt_info = NULL;
b1b6836e 3353 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3354 {
b1b6836e 3355 tree prev_res = NULL_TREE;
ebfd146a
IR
3356 for (j = 0; j < ncopies; ++j)
3357 {
3358 /* Build argument list for the vectorized call. */
3359 if (j == 0)
9771b263 3360 vargs.create (nargs);
ebfd146a 3361 else
9771b263 3362 vargs.truncate (0);
ebfd146a 3363
190c2236
JJ
3364 if (slp_node)
3365 {
ef062b13 3366 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3367 vec<tree> vec_oprnds0;
190c2236
JJ
3368
3369 for (i = 0; i < nargs; i++)
9771b263 3370 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3371 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3372 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3373
3374 /* Arguments are ready. Create the new vector stmt. */
9771b263 3375 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3376 {
3377 size_t k;
3378 for (k = 0; k < nargs; k++)
3379 {
37b5ec8f 3380 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3381 vargs[k] = vec_oprndsk[i];
190c2236 3382 }
b1b6836e
RS
3383 if (modifier == NARROW)
3384 {
3385 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3386 gcall *call
3387 = gimple_build_call_internal_vec (ifn, vargs);
3388 gimple_call_set_lhs (call, half_res);
3389 gimple_call_set_nothrow (call, true);
3390 new_stmt = call;
b1b6836e
RS
3391 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3392 if ((i & 1) == 0)
3393 {
3394 prev_res = half_res;
3395 continue;
3396 }
3397 new_temp = make_ssa_name (vec_dest);
3398 new_stmt = gimple_build_assign (new_temp, convert_code,
3399 prev_res, half_res);
3400 }
70439f0d 3401 else
b1b6836e 3402 {
a844293d 3403 gcall *call;
b1b6836e 3404 if (ifn != IFN_LAST)
a844293d 3405 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3406 else
a844293d
RS
3407 call = gimple_build_call_vec (fndecl, vargs);
3408 new_temp = make_ssa_name (vec_dest, call);
3409 gimple_call_set_lhs (call, new_temp);
3410 gimple_call_set_nothrow (call, true);
3411 new_stmt = call;
b1b6836e 3412 }
190c2236 3413 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3414 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3415 }
3416
3417 for (i = 0; i < nargs; i++)
3418 {
37b5ec8f 3419 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3420 vec_oprndsi.release ();
190c2236 3421 }
190c2236
JJ
3422 continue;
3423 }
3424
ebfd146a
IR
3425 for (i = 0; i < nargs; i++)
3426 {
3427 op = gimple_call_arg (stmt, i);
3428 if (j == 0)
3429 vec_oprnd0
81c40241 3430 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3431 else
63827fb8
IR
3432 {
3433 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3434 vec_oprnd0
3435 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3436 }
ebfd146a 3437
9771b263 3438 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3439 }
3440
74bf76ed
JJ
3441 if (gimple_call_internal_p (stmt)
3442 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3443 {
c7bda0f4 3444 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3445 tree new_var
0e22bb5a 3446 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3447 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3448 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3449 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3450 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3451 }
b1b6836e
RS
3452 else if (modifier == NARROW)
3453 {
3454 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3455 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3456 gimple_call_set_lhs (call, half_res);
3457 gimple_call_set_nothrow (call, true);
3458 new_stmt = call;
b1b6836e
RS
3459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3460 if ((j & 1) == 0)
3461 {
3462 prev_res = half_res;
3463 continue;
3464 }
3465 new_temp = make_ssa_name (vec_dest);
3466 new_stmt = gimple_build_assign (new_temp, convert_code,
3467 prev_res, half_res);
3468 }
74bf76ed
JJ
3469 else
3470 {
a844293d 3471 gcall *call;
70439f0d 3472 if (ifn != IFN_LAST)
a844293d 3473 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3474 else
a844293d 3475 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3476 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3477 gimple_call_set_lhs (call, new_temp);
3478 gimple_call_set_nothrow (call, true);
3479 new_stmt = call;
74bf76ed 3480 }
ebfd146a
IR
3481 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3482
b1b6836e 3483 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3484 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3485 else
3486 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3487
3488 prev_stmt_info = vinfo_for_stmt (new_stmt);
3489 }
b1b6836e
RS
3490 }
3491 else if (modifier == NARROW)
3492 {
ebfd146a
IR
3493 for (j = 0; j < ncopies; ++j)
3494 {
3495 /* Build argument list for the vectorized call. */
3496 if (j == 0)
9771b263 3497 vargs.create (nargs * 2);
ebfd146a 3498 else
9771b263 3499 vargs.truncate (0);
ebfd146a 3500
190c2236
JJ
3501 if (slp_node)
3502 {
ef062b13 3503 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3504 vec<tree> vec_oprnds0;
190c2236
JJ
3505
3506 for (i = 0; i < nargs; i++)
9771b263 3507 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3508 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3509 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3510
3511 /* Arguments are ready. Create the new vector stmt. */
9771b263 3512 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3513 {
3514 size_t k;
9771b263 3515 vargs.truncate (0);
190c2236
JJ
3516 for (k = 0; k < nargs; k++)
3517 {
37b5ec8f 3518 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3519 vargs.quick_push (vec_oprndsk[i]);
3520 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3521 }
a844293d 3522 gcall *call;
70439f0d 3523 if (ifn != IFN_LAST)
a844293d 3524 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3525 else
a844293d
RS
3526 call = gimple_build_call_vec (fndecl, vargs);
3527 new_temp = make_ssa_name (vec_dest, call);
3528 gimple_call_set_lhs (call, new_temp);
3529 gimple_call_set_nothrow (call, true);
3530 new_stmt = call;
190c2236 3531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3533 }
3534
3535 for (i = 0; i < nargs; i++)
3536 {
37b5ec8f 3537 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3538 vec_oprndsi.release ();
190c2236 3539 }
190c2236
JJ
3540 continue;
3541 }
3542
ebfd146a
IR
3543 for (i = 0; i < nargs; i++)
3544 {
3545 op = gimple_call_arg (stmt, i);
3546 if (j == 0)
3547 {
3548 vec_oprnd0
81c40241 3549 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3550 vec_oprnd1
63827fb8 3551 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3552 }
3553 else
3554 {
336ecb65 3555 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3556 vec_oprnd0
63827fb8 3557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3558 vec_oprnd1
63827fb8 3559 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3560 }
3561
9771b263
DN
3562 vargs.quick_push (vec_oprnd0);
3563 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3564 }
3565
b1b6836e 3566 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3567 new_temp = make_ssa_name (vec_dest, new_stmt);
3568 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3570
3571 if (j == 0)
3572 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3573 else
3574 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3575
3576 prev_stmt_info = vinfo_for_stmt (new_stmt);
3577 }
3578
3579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3580 }
b1b6836e
RS
3581 else
3582 /* No current target implements this case. */
3583 return false;
ebfd146a 3584
9771b263 3585 vargs.release ();
ebfd146a 3586
ebfd146a
IR
3587 /* The call in STMT might prevent it from being removed in dce.
3588 We however cannot remove it here, due to the way the ssa name
3589 it defines is mapped to the new definition. So just replace
3590 rhs of the statement with something harmless. */
3591
dd34c087
JJ
3592 if (slp_node)
3593 return true;
3594
9d5e7640 3595 if (is_pattern_stmt_p (stmt_info))
ed7b8123
RS
3596 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3597 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3598
0267732b 3599 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
ebfd146a 3600 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3601 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3602 STMT_VINFO_STMT (stmt_info) = new_stmt;
3603 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3604
3605 return true;
3606}
3607
3608
0136f8f0
AH
3609struct simd_call_arg_info
3610{
3611 tree vectype;
3612 tree op;
0136f8f0 3613 HOST_WIDE_INT linear_step;
34e82342 3614 enum vect_def_type dt;
0136f8f0 3615 unsigned int align;
17b658af 3616 bool simd_lane_linear;
0136f8f0
AH
3617};
3618
17b658af
JJ
3619/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3620 is linear within simd lane (but not within whole loop), note it in
3621 *ARGINFO. */
3622
3623static void
3624vect_simd_lane_linear (tree op, struct loop *loop,
3625 struct simd_call_arg_info *arginfo)
3626{
355fe088 3627 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3628
3629 if (!is_gimple_assign (def_stmt)
3630 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3631 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3632 return;
3633
3634 tree base = gimple_assign_rhs1 (def_stmt);
3635 HOST_WIDE_INT linear_step = 0;
3636 tree v = gimple_assign_rhs2 (def_stmt);
3637 while (TREE_CODE (v) == SSA_NAME)
3638 {
3639 tree t;
3640 def_stmt = SSA_NAME_DEF_STMT (v);
3641 if (is_gimple_assign (def_stmt))
3642 switch (gimple_assign_rhs_code (def_stmt))
3643 {
3644 case PLUS_EXPR:
3645 t = gimple_assign_rhs2 (def_stmt);
3646 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3647 return;
3648 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3649 v = gimple_assign_rhs1 (def_stmt);
3650 continue;
3651 case MULT_EXPR:
3652 t = gimple_assign_rhs2 (def_stmt);
3653 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3654 return;
3655 linear_step = tree_to_shwi (t);
3656 v = gimple_assign_rhs1 (def_stmt);
3657 continue;
3658 CASE_CONVERT:
3659 t = gimple_assign_rhs1 (def_stmt);
3660 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3661 || (TYPE_PRECISION (TREE_TYPE (v))
3662 < TYPE_PRECISION (TREE_TYPE (t))))
3663 return;
3664 if (!linear_step)
3665 linear_step = 1;
3666 v = t;
3667 continue;
3668 default:
3669 return;
3670 }
8e4284d0 3671 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3672 && loop->simduid
3673 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3674 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3675 == loop->simduid))
3676 {
3677 if (!linear_step)
3678 linear_step = 1;
3679 arginfo->linear_step = linear_step;
3680 arginfo->op = base;
3681 arginfo->simd_lane_linear = true;
3682 return;
3683 }
3684 }
3685}
3686
cf1b2ba4
RS
3687/* Return the number of elements in vector type VECTYPE, which is associated
3688 with a SIMD clone. At present these vectors always have a constant
3689 length. */
3690
3691static unsigned HOST_WIDE_INT
3692simd_clone_subparts (tree vectype)
3693{
928686b1 3694 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3695}
3696
0136f8f0
AH
3697/* Function vectorizable_simd_clone_call.
3698
3699 Check if STMT performs a function call that can be vectorized
3700 by calling a simd clone of the function.
3701 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3702 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3704
3705static bool
355fe088 3706vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3707 gimple **vec_stmt, slp_tree slp_node,
3708 stmt_vector_for_cost *)
0136f8f0
AH
3709{
3710 tree vec_dest;
3711 tree scalar_dest;
3712 tree op, type;
3713 tree vec_oprnd0 = NULL_TREE;
3714 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3715 tree vectype;
3716 unsigned int nunits;
3717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3718 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3719 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3720 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3721 tree fndecl, new_temp;
355fe088 3722 gimple *new_stmt = NULL;
0136f8f0 3723 int ncopies, j;
00426f9a 3724 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3725 vec<tree> vargs = vNULL;
3726 size_t i, nargs;
3727 tree lhs, rtype, ratype;
e7a74006 3728 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3729
3730 /* Is STMT a vectorizable call? */
3731 if (!is_gimple_call (stmt))
3732 return false;
3733
3734 fndecl = gimple_call_fndecl (stmt);
3735 if (fndecl == NULL_TREE)
3736 return false;
3737
d52f5295 3738 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3739 if (node == NULL || node->simd_clones == NULL)
3740 return false;
3741
3742 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3743 return false;
3744
66c16fd9
RB
3745 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3746 && ! vec_stmt)
0136f8f0
AH
3747 return false;
3748
3749 if (gimple_call_lhs (stmt)
3750 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3751 return false;
3752
3753 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3754
3755 vectype = STMT_VINFO_VECTYPE (stmt_info);
3756
3757 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3758 return false;
3759
3760 /* FORNOW */
fce57248 3761 if (slp_node)
0136f8f0
AH
3762 return false;
3763
3764 /* Process function arguments. */
3765 nargs = gimple_call_num_args (stmt);
3766
3767 /* Bail out if the function has zero arguments. */
3768 if (nargs == 0)
3769 return false;
3770
00426f9a 3771 arginfo.reserve (nargs, true);
0136f8f0
AH
3772
3773 for (i = 0; i < nargs; i++)
3774 {
3775 simd_call_arg_info thisarginfo;
3776 affine_iv iv;
3777
3778 thisarginfo.linear_step = 0;
3779 thisarginfo.align = 0;
3780 thisarginfo.op = NULL_TREE;
17b658af 3781 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3782
3783 op = gimple_call_arg (stmt, i);
894dd753 3784 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3785 &thisarginfo.vectype)
0136f8f0
AH
3786 || thisarginfo.dt == vect_uninitialized_def)
3787 {
3788 if (dump_enabled_p ())
3789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3790 "use not simple.\n");
0136f8f0
AH
3791 return false;
3792 }
3793
3794 if (thisarginfo.dt == vect_constant_def
3795 || thisarginfo.dt == vect_external_def)
3796 gcc_assert (thisarginfo.vectype == NULL_TREE);
3797 else
3798 gcc_assert (thisarginfo.vectype != NULL_TREE);
3799
6c9e85fb
JJ
3800 /* For linear arguments, the analyze phase should have saved
3801 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3802 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3803 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3804 {
3805 gcc_assert (vec_stmt);
3806 thisarginfo.linear_step
17b658af 3807 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3808 thisarginfo.op
17b658af
JJ
3809 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3810 thisarginfo.simd_lane_linear
3811 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3812 == boolean_true_node);
6c9e85fb
JJ
3813 /* If loop has been peeled for alignment, we need to adjust it. */
3814 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3815 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3816 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3817 {
3818 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3819 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3820 tree opt = TREE_TYPE (thisarginfo.op);
3821 bias = fold_convert (TREE_TYPE (step), bias);
3822 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3823 thisarginfo.op
3824 = fold_build2 (POINTER_TYPE_P (opt)
3825 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3826 thisarginfo.op, bias);
3827 }
3828 }
3829 else if (!vec_stmt
3830 && thisarginfo.dt != vect_constant_def
3831 && thisarginfo.dt != vect_external_def
3832 && loop_vinfo
3833 && TREE_CODE (op) == SSA_NAME
3834 && simple_iv (loop, loop_containing_stmt (stmt), op,
3835 &iv, false)
3836 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3837 {
3838 thisarginfo.linear_step = tree_to_shwi (iv.step);
3839 thisarginfo.op = iv.base;
3840 }
3841 else if ((thisarginfo.dt == vect_constant_def
3842 || thisarginfo.dt == vect_external_def)
3843 && POINTER_TYPE_P (TREE_TYPE (op)))
3844 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3845 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3846 linear too. */
3847 if (POINTER_TYPE_P (TREE_TYPE (op))
3848 && !thisarginfo.linear_step
3849 && !vec_stmt
3850 && thisarginfo.dt != vect_constant_def
3851 && thisarginfo.dt != vect_external_def
3852 && loop_vinfo
3853 && !slp_node
3854 && TREE_CODE (op) == SSA_NAME)
3855 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3856
3857 arginfo.quick_push (thisarginfo);
3858 }
3859
d9f21f6a
RS
3860 unsigned HOST_WIDE_INT vf;
3861 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3862 {
3863 if (dump_enabled_p ())
3864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3865 "not considering SIMD clones; not yet supported"
3866 " for variable-width vectors.\n");
3867 return NULL;
3868 }
3869
0136f8f0
AH
3870 unsigned int badness = 0;
3871 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3872 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3873 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3874 else
3875 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3876 n = n->simdclone->next_clone)
3877 {
3878 unsigned int this_badness = 0;
d9f21f6a 3879 if (n->simdclone->simdlen > vf
0136f8f0
AH
3880 || n->simdclone->nargs != nargs)
3881 continue;
d9f21f6a
RS
3882 if (n->simdclone->simdlen < vf)
3883 this_badness += (exact_log2 (vf)
0136f8f0
AH
3884 - exact_log2 (n->simdclone->simdlen)) * 1024;
3885 if (n->simdclone->inbranch)
3886 this_badness += 2048;
3887 int target_badness = targetm.simd_clone.usable (n);
3888 if (target_badness < 0)
3889 continue;
3890 this_badness += target_badness * 512;
3891 /* FORNOW: Have to add code to add the mask argument. */
3892 if (n->simdclone->inbranch)
3893 continue;
3894 for (i = 0; i < nargs; i++)
3895 {
3896 switch (n->simdclone->args[i].arg_type)
3897 {
3898 case SIMD_CLONE_ARG_TYPE_VECTOR:
3899 if (!useless_type_conversion_p
3900 (n->simdclone->args[i].orig_type,
3901 TREE_TYPE (gimple_call_arg (stmt, i))))
3902 i = -1;
3903 else if (arginfo[i].dt == vect_constant_def
3904 || arginfo[i].dt == vect_external_def
3905 || arginfo[i].linear_step)
3906 this_badness += 64;
3907 break;
3908 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3909 if (arginfo[i].dt != vect_constant_def
3910 && arginfo[i].dt != vect_external_def)
3911 i = -1;
3912 break;
3913 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3914 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3915 if (arginfo[i].dt == vect_constant_def
3916 || arginfo[i].dt == vect_external_def
3917 || (arginfo[i].linear_step
3918 != n->simdclone->args[i].linear_step))
3919 i = -1;
3920 break;
3921 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3922 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3927 /* FORNOW */
3928 i = -1;
3929 break;
3930 case SIMD_CLONE_ARG_TYPE_MASK:
3931 gcc_unreachable ();
3932 }
3933 if (i == (size_t) -1)
3934 break;
3935 if (n->simdclone->args[i].alignment > arginfo[i].align)
3936 {
3937 i = -1;
3938 break;
3939 }
3940 if (arginfo[i].align)
3941 this_badness += (exact_log2 (arginfo[i].align)
3942 - exact_log2 (n->simdclone->args[i].alignment));
3943 }
3944 if (i == (size_t) -1)
3945 continue;
3946 if (bestn == NULL || this_badness < badness)
3947 {
3948 bestn = n;
3949 badness = this_badness;
3950 }
3951 }
3952
3953 if (bestn == NULL)
00426f9a 3954 return false;
0136f8f0
AH
3955
3956 for (i = 0; i < nargs; i++)
3957 if ((arginfo[i].dt == vect_constant_def
3958 || arginfo[i].dt == vect_external_def)
3959 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3960 {
3961 arginfo[i].vectype
3962 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3963 i)));
3964 if (arginfo[i].vectype == NULL
cf1b2ba4 3965 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3966 > bestn->simdclone->simdlen))
00426f9a 3967 return false;
0136f8f0
AH
3968 }
3969
3970 fndecl = bestn->decl;
3971 nunits = bestn->simdclone->simdlen;
d9f21f6a 3972 ncopies = vf / nunits;
0136f8f0
AH
3973
3974 /* If the function isn't const, only allow it in simd loops where user
3975 has asserted that at least nunits consecutive iterations can be
3976 performed using SIMD instructions. */
3977 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3978 && gimple_vuse (stmt))
00426f9a 3979 return false;
0136f8f0
AH
3980
3981 /* Sanity check: make sure that at least one copy of the vectorized stmt
3982 needs to be generated. */
3983 gcc_assert (ncopies >= 1);
3984
3985 if (!vec_stmt) /* transformation not required. */
3986 {
6c9e85fb
JJ
3987 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3988 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3989 if ((bestn->simdclone->args[i].arg_type
3990 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3991 || (bestn->simdclone->args[i].arg_type
3992 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3993 {
17b658af 3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3995 + 1);
3996 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3997 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3998 ? size_type_node : TREE_TYPE (arginfo[i].op);
3999 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4001 tree sll = arginfo[i].simd_lane_linear
4002 ? boolean_true_node : boolean_false_node;
4003 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4004 }
0136f8f0 4005 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4006 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4007/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4008 return true;
4009 }
4010
67b8dbac 4011 /* Transform. */
0136f8f0
AH
4012
4013 if (dump_enabled_p ())
4014 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4015
4016 /* Handle def. */
4017 scalar_dest = gimple_call_lhs (stmt);
4018 vec_dest = NULL_TREE;
4019 rtype = NULL_TREE;
4020 ratype = NULL_TREE;
4021 if (scalar_dest)
4022 {
4023 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4024 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4025 if (TREE_CODE (rtype) == ARRAY_TYPE)
4026 {
4027 ratype = rtype;
4028 rtype = TREE_TYPE (ratype);
4029 }
4030 }
4031
4032 prev_stmt_info = NULL;
4033 for (j = 0; j < ncopies; ++j)
4034 {
4035 /* Build argument list for the vectorized call. */
4036 if (j == 0)
4037 vargs.create (nargs);
4038 else
4039 vargs.truncate (0);
4040
4041 for (i = 0; i < nargs; i++)
4042 {
4043 unsigned int k, l, m, o;
4044 tree atype;
4045 op = gimple_call_arg (stmt, i);
4046 switch (bestn->simdclone->args[i].arg_type)
4047 {
4048 case SIMD_CLONE_ARG_TYPE_VECTOR:
4049 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4050 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4051 for (m = j * o; m < (j + 1) * o; m++)
4052 {
cf1b2ba4
RS
4053 if (simd_clone_subparts (atype)
4054 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4055 {
73a699ae 4056 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4057 k = (simd_clone_subparts (arginfo[i].vectype)
4058 / simd_clone_subparts (atype));
0136f8f0
AH
4059 gcc_assert ((k & (k - 1)) == 0);
4060 if (m == 0)
4061 vec_oprnd0
81c40241 4062 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4063 else
4064 {
4065 vec_oprnd0 = arginfo[i].op;
4066 if ((m & (k - 1)) == 0)
4067 vec_oprnd0
4068 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4069 vec_oprnd0);
4070 }
4071 arginfo[i].op = vec_oprnd0;
4072 vec_oprnd0
4073 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4074 bitsize_int (prec),
0136f8f0
AH
4075 bitsize_int ((m & (k - 1)) * prec));
4076 new_stmt
b731b390 4077 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4078 vec_oprnd0);
4079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4080 vargs.safe_push (gimple_assign_lhs (new_stmt));
4081 }
4082 else
4083 {
cf1b2ba4
RS
4084 k = (simd_clone_subparts (atype)
4085 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4086 gcc_assert ((k & (k - 1)) == 0);
4087 vec<constructor_elt, va_gc> *ctor_elts;
4088 if (k != 1)
4089 vec_alloc (ctor_elts, k);
4090 else
4091 ctor_elts = NULL;
4092 for (l = 0; l < k; l++)
4093 {
4094 if (m == 0 && l == 0)
4095 vec_oprnd0
81c40241 4096 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4097 else
4098 vec_oprnd0
4099 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4100 arginfo[i].op);
4101 arginfo[i].op = vec_oprnd0;
4102 if (k == 1)
4103 break;
4104 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4105 vec_oprnd0);
4106 }
4107 if (k == 1)
4108 vargs.safe_push (vec_oprnd0);
4109 else
4110 {
4111 vec_oprnd0 = build_constructor (atype, ctor_elts);
4112 new_stmt
b731b390 4113 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4114 vec_oprnd0);
4115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4116 vargs.safe_push (gimple_assign_lhs (new_stmt));
4117 }
4118 }
4119 }
4120 break;
4121 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4122 vargs.safe_push (op);
4123 break;
4124 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4125 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4126 if (j == 0)
4127 {
4128 gimple_seq stmts;
4129 arginfo[i].op
4130 = force_gimple_operand (arginfo[i].op, &stmts, true,
4131 NULL_TREE);
4132 if (stmts != NULL)
4133 {
4134 basic_block new_bb;
4135 edge pe = loop_preheader_edge (loop);
4136 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4137 gcc_assert (!new_bb);
4138 }
17b658af
JJ
4139 if (arginfo[i].simd_lane_linear)
4140 {
4141 vargs.safe_push (arginfo[i].op);
4142 break;
4143 }
b731b390 4144 tree phi_res = copy_ssa_name (op);
538dd0b7 4145 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4146 set_vinfo_for_stmt (new_phi,
310213d4 4147 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4148 add_phi_arg (new_phi, arginfo[i].op,
4149 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4150 enum tree_code code
4151 = POINTER_TYPE_P (TREE_TYPE (op))
4152 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4153 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4154 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4155 widest_int cst
4156 = wi::mul (bestn->simdclone->args[i].linear_step,
4157 ncopies * nunits);
4158 tree tcst = wide_int_to_tree (type, cst);
b731b390 4159 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4160 new_stmt
4161 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4162 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4163 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4164 set_vinfo_for_stmt (new_stmt,
310213d4 4165 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4166 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4167 UNKNOWN_LOCATION);
4168 arginfo[i].op = phi_res;
4169 vargs.safe_push (phi_res);
4170 }
4171 else
4172 {
4173 enum tree_code code
4174 = POINTER_TYPE_P (TREE_TYPE (op))
4175 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4176 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4177 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4178 widest_int cst
4179 = wi::mul (bestn->simdclone->args[i].linear_step,
4180 j * nunits);
4181 tree tcst = wide_int_to_tree (type, cst);
b731b390 4182 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4183 new_stmt = gimple_build_assign (new_temp, code,
4184 arginfo[i].op, tcst);
0136f8f0
AH
4185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4186 vargs.safe_push (new_temp);
4187 }
4188 break;
7adb26f2
JJ
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4191 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4195 default:
4196 gcc_unreachable ();
4197 }
4198 }
4199
4200 new_stmt = gimple_build_call_vec (fndecl, vargs);
4201 if (vec_dest)
4202 {
cf1b2ba4 4203 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4204 if (ratype)
b731b390 4205 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4206 else if (simd_clone_subparts (vectype)
4207 == simd_clone_subparts (rtype))
0136f8f0
AH
4208 new_temp = make_ssa_name (vec_dest, new_stmt);
4209 else
4210 new_temp = make_ssa_name (rtype, new_stmt);
4211 gimple_call_set_lhs (new_stmt, new_temp);
4212 }
4213 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4214
4215 if (vec_dest)
4216 {
cf1b2ba4 4217 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4218 {
4219 unsigned int k, l;
73a699ae
RS
4220 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4221 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4222 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4223 gcc_assert ((k & (k - 1)) == 0);
4224 for (l = 0; l < k; l++)
4225 {
4226 tree t;
4227 if (ratype)
4228 {
4229 t = build_fold_addr_expr (new_temp);
4230 t = build2 (MEM_REF, vectype, t,
73a699ae 4231 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4232 }
4233 else
4234 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4235 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4236 new_stmt
b731b390 4237 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4239 if (j == 0 && l == 0)
4240 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4241 else
4242 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4243
4244 prev_stmt_info = vinfo_for_stmt (new_stmt);
4245 }
4246
4247 if (ratype)
3ba4ff41 4248 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4249 continue;
4250 }
cf1b2ba4 4251 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4252 {
cf1b2ba4
RS
4253 unsigned int k = (simd_clone_subparts (vectype)
4254 / simd_clone_subparts (rtype));
0136f8f0
AH
4255 gcc_assert ((k & (k - 1)) == 0);
4256 if ((j & (k - 1)) == 0)
4257 vec_alloc (ret_ctor_elts, k);
4258 if (ratype)
4259 {
cf1b2ba4 4260 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4261 for (m = 0; m < o; m++)
4262 {
4263 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4264 size_int (m), NULL_TREE, NULL_TREE);
4265 new_stmt
b731b390 4266 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4267 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4269 gimple_assign_lhs (new_stmt));
4270 }
3ba4ff41 4271 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4272 }
4273 else
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4275 if ((j & (k - 1)) != k - 1)
4276 continue;
4277 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4278 new_stmt
b731b390 4279 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4281
4282 if ((unsigned) j == k - 1)
4283 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4284 else
4285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4286
4287 prev_stmt_info = vinfo_for_stmt (new_stmt);
4288 continue;
4289 }
4290 else if (ratype)
4291 {
4292 tree t = build_fold_addr_expr (new_temp);
4293 t = build2 (MEM_REF, vectype, t,
4294 build_int_cst (TREE_TYPE (t), 0));
4295 new_stmt
b731b390 4296 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4298 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4299 }
4300 }
4301
4302 if (j == 0)
4303 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4304 else
4305 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4306
4307 prev_stmt_info = vinfo_for_stmt (new_stmt);
4308 }
4309
4310 vargs.release ();
4311
4312 /* The call in STMT might prevent it from being removed in dce.
4313 We however cannot remove it here, due to the way the ssa name
4314 it defines is mapped to the new definition. So just replace
4315 rhs of the statement with something harmless. */
4316
4317 if (slp_node)
4318 return true;
4319
4320 if (scalar_dest)
4321 {
4322 type = TREE_TYPE (scalar_dest);
4323 if (is_pattern_stmt_p (stmt_info))
4324 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4325 else
4326 lhs = gimple_call_lhs (stmt);
4327 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4328 }
4329 else
4330 new_stmt = gimple_build_nop ();
4331 set_vinfo_for_stmt (new_stmt, stmt_info);
4332 set_vinfo_for_stmt (stmt, NULL);
4333 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4334 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4335 unlink_stmt_vdef (stmt);
4336
4337 return true;
4338}
4339
4340
ebfd146a
IR
4341/* Function vect_gen_widened_results_half
4342
4343 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4344 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4345 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4346 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4347 needs to be created (DECL is a function-decl of a target-builtin).
4348 STMT is the original scalar stmt that we are vectorizing. */
4349
355fe088 4350static gimple *
ebfd146a
IR
4351vect_gen_widened_results_half (enum tree_code code,
4352 tree decl,
4353 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4354 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4355 gimple *stmt)
b8698a0f 4356{
355fe088 4357 gimple *new_stmt;
b8698a0f
L
4358 tree new_temp;
4359
4360 /* Generate half of the widened result: */
4361 if (code == CALL_EXPR)
4362 {
4363 /* Target specific support */
ebfd146a
IR
4364 if (op_type == binary_op)
4365 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4366 else
4367 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4368 new_temp = make_ssa_name (vec_dest, new_stmt);
4369 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4370 }
4371 else
ebfd146a 4372 {
b8698a0f
L
4373 /* Generic support */
4374 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4375 if (op_type != binary_op)
4376 vec_oprnd1 = NULL;
0d0e4a03 4377 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4378 new_temp = make_ssa_name (vec_dest, new_stmt);
4379 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4380 }
ebfd146a
IR
4381 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4382
ebfd146a
IR
4383 return new_stmt;
4384}
4385
4a00c761
JJ
4386
4387/* Get vectorized definitions for loop-based vectorization. For the first
4388 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4389 scalar operand), and for the rest we get a copy with
4390 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4391 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4392 The vectors are collected into VEC_OPRNDS. */
4393
4394static void
355fe088 4395vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4396 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4397{
4398 tree vec_oprnd;
4399
4400 /* Get first vector operand. */
4401 /* All the vector operands except the very first one (that is scalar oprnd)
4402 are stmt copies. */
4403 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4404 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4405 else
4406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4407
9771b263 4408 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4409
4410 /* Get second vector operand. */
4411 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4412 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4413
4414 *oprnd = vec_oprnd;
4415
4416 /* For conversion in multiple steps, continue to get operands
4417 recursively. */
4418 if (multi_step_cvt)
4419 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4420}
4421
4422
4423/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4424 For multi-step conversions store the resulting vectors and call the function
4425 recursively. */
4426
4427static void
9771b263 4428vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4429 int multi_step_cvt, gimple *stmt,
9771b263 4430 vec<tree> vec_dsts,
4a00c761
JJ
4431 gimple_stmt_iterator *gsi,
4432 slp_tree slp_node, enum tree_code code,
4433 stmt_vec_info *prev_stmt_info)
4434{
4435 unsigned int i;
4436 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4437 gimple *new_stmt;
4a00c761
JJ
4438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4439
9771b263 4440 vec_dest = vec_dsts.pop ();
4a00c761 4441
9771b263 4442 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4443 {
4444 /* Create demotion operation. */
9771b263
DN
4445 vop0 = (*vec_oprnds)[i];
4446 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4447 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4448 new_tmp = make_ssa_name (vec_dest, new_stmt);
4449 gimple_assign_set_lhs (new_stmt, new_tmp);
4450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4451
4452 if (multi_step_cvt)
4453 /* Store the resulting vector for next recursive call. */
9771b263 4454 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4455 else
4456 {
4457 /* This is the last step of the conversion sequence. Store the
4458 vectors in SLP_NODE or in vector info of the scalar statement
4459 (or in STMT_VINFO_RELATED_STMT chain). */
4460 if (slp_node)
9771b263 4461 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4462 else
c689ce1e
RB
4463 {
4464 if (!*prev_stmt_info)
4465 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4466 else
4467 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4468
c689ce1e
RB
4469 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4470 }
4a00c761
JJ
4471 }
4472 }
4473
4474 /* For multi-step demotion operations we first generate demotion operations
4475 from the source type to the intermediate types, and then combine the
4476 results (stored in VEC_OPRNDS) in demotion operation to the destination
4477 type. */
4478 if (multi_step_cvt)
4479 {
4480 /* At each level of recursion we have half of the operands we had at the
4481 previous level. */
9771b263 4482 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4483 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4484 stmt, vec_dsts, gsi, slp_node,
4485 VEC_PACK_TRUNC_EXPR,
4486 prev_stmt_info);
4487 }
4488
9771b263 4489 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4490}
4491
4492
4493/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4494 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4495 the resulting vectors and call the function recursively. */
4496
4497static void
9771b263
DN
4498vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4499 vec<tree> *vec_oprnds1,
355fe088 4500 gimple *stmt, tree vec_dest,
4a00c761
JJ
4501 gimple_stmt_iterator *gsi,
4502 enum tree_code code1,
4503 enum tree_code code2, tree decl1,
4504 tree decl2, int op_type)
4505{
4506 int i;
4507 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4508 gimple *new_stmt1, *new_stmt2;
6e1aa848 4509 vec<tree> vec_tmp = vNULL;
4a00c761 4510
9771b263
DN
4511 vec_tmp.create (vec_oprnds0->length () * 2);
4512 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4513 {
4514 if (op_type == binary_op)
9771b263 4515 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4516 else
4517 vop1 = NULL_TREE;
4518
4519 /* Generate the two halves of promotion operation. */
4520 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4521 op_type, vec_dest, gsi, stmt);
4522 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4523 op_type, vec_dest, gsi, stmt);
4524 if (is_gimple_call (new_stmt1))
4525 {
4526 new_tmp1 = gimple_call_lhs (new_stmt1);
4527 new_tmp2 = gimple_call_lhs (new_stmt2);
4528 }
4529 else
4530 {
4531 new_tmp1 = gimple_assign_lhs (new_stmt1);
4532 new_tmp2 = gimple_assign_lhs (new_stmt2);
4533 }
4534
4535 /* Store the results for the next step. */
9771b263
DN
4536 vec_tmp.quick_push (new_tmp1);
4537 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4538 }
4539
689eaba3 4540 vec_oprnds0->release ();
4a00c761
JJ
4541 *vec_oprnds0 = vec_tmp;
4542}
4543
4544
b8698a0f
L
4545/* Check if STMT performs a conversion operation, that can be vectorized.
4546 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4547 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4549
4550static bool
355fe088 4551vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4552 gimple **vec_stmt, slp_tree slp_node,
4553 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4554{
4555 tree vec_dest;
4556 tree scalar_dest;
4a00c761 4557 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4558 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4561 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4562 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4563 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4564 tree new_temp;
ebfd146a 4565 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4566 int ndts = 2;
355fe088 4567 gimple *new_stmt = NULL;
ebfd146a 4568 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4569 poly_uint64 nunits_in;
4570 poly_uint64 nunits_out;
ebfd146a 4571 tree vectype_out, vectype_in;
4a00c761
JJ
4572 int ncopies, i, j;
4573 tree lhs_type, rhs_type;
ebfd146a 4574 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4575 vec<tree> vec_oprnds0 = vNULL;
4576 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4577 tree vop0;
4a00c761 4578 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4579 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4580 int multi_step_cvt = 0;
6e1aa848 4581 vec<tree> interm_types = vNULL;
4a00c761
JJ
4582 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4583 int op_type;
4a00c761 4584 unsigned short fltsz;
ebfd146a
IR
4585
4586 /* Is STMT a vectorizable conversion? */
4587
4a00c761 4588 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4589 return false;
4590
66c16fd9
RB
4591 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4592 && ! vec_stmt)
ebfd146a
IR
4593 return false;
4594
4595 if (!is_gimple_assign (stmt))
4596 return false;
4597
4598 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4599 return false;
4600
4601 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4602 if (!CONVERT_EXPR_CODE_P (code)
4603 && code != FIX_TRUNC_EXPR
4604 && code != FLOAT_EXPR
4605 && code != WIDEN_MULT_EXPR
4606 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4607 return false;
4608
4a00c761
JJ
4609 op_type = TREE_CODE_LENGTH (code);
4610
ebfd146a 4611 /* Check types of lhs and rhs. */
b690cc0f 4612 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4613 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4614 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4615
ebfd146a
IR
4616 op0 = gimple_assign_rhs1 (stmt);
4617 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4618
4619 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4620 && !((INTEGRAL_TYPE_P (lhs_type)
4621 && INTEGRAL_TYPE_P (rhs_type))
4622 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4623 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4624 return false;
4625
e6f5c25d
IE
4626 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4627 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4628 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4629 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4630 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4631 {
73fbfcad 4632 if (dump_enabled_p ())
78c60e3d 4633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4634 "type conversion to/from bit-precision unsupported."
4635 "\n");
4a00c761
JJ
4636 return false;
4637 }
4638
b690cc0f 4639 /* Check the operands of the operation. */
894dd753 4640 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4641 {
73fbfcad 4642 if (dump_enabled_p ())
78c60e3d 4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4644 "use not simple.\n");
b690cc0f
RG
4645 return false;
4646 }
4a00c761
JJ
4647 if (op_type == binary_op)
4648 {
4649 bool ok;
4650
4651 op1 = gimple_assign_rhs2 (stmt);
4652 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4653 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4654 OP1. */
4655 if (CONSTANT_CLASS_P (op0))
894dd753 4656 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4657 else
894dd753 4658 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4659
4660 if (!ok)
4661 {
73fbfcad 4662 if (dump_enabled_p ())
78c60e3d 4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4664 "use not simple.\n");
4a00c761
JJ
4665 return false;
4666 }
4667 }
4668
b690cc0f
RG
4669 /* If op0 is an external or constant defs use a vector type of
4670 the same size as the output vector type. */
ebfd146a 4671 if (!vectype_in)
b690cc0f 4672 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4673 if (vec_stmt)
4674 gcc_assert (vectype_in);
4675 if (!vectype_in)
4676 {
73fbfcad 4677 if (dump_enabled_p ())
4a00c761 4678 {
78c60e3d
SS
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "no vectype for scalar type ");
4681 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4682 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4683 }
7d8930a0
IR
4684
4685 return false;
4686 }
ebfd146a 4687
e6f5c25d
IE
4688 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4689 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4690 {
4691 if (dump_enabled_p ())
4692 {
4693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4694 "can't convert between boolean and non "
4695 "boolean vectors");
4696 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4698 }
4699
4700 return false;
4701 }
4702
b690cc0f
RG
4703 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4704 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4705 if (known_eq (nunits_out, nunits_in))
ebfd146a 4706 modifier = NONE;
062d5ccc
RS
4707 else if (multiple_p (nunits_out, nunits_in))
4708 modifier = NARROW;
ebfd146a 4709 else
062d5ccc
RS
4710 {
4711 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4712 modifier = WIDEN;
4713 }
ebfd146a 4714
ff802fa1
IR
4715 /* Multiple types in SLP are handled by creating the appropriate number of
4716 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4717 case of SLP. */
fce57248 4718 if (slp_node)
ebfd146a 4719 ncopies = 1;
4a00c761 4720 else if (modifier == NARROW)
e8f142e2 4721 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4722 else
e8f142e2 4723 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4724
ebfd146a
IR
4725 /* Sanity check: make sure that at least one copy of the vectorized stmt
4726 needs to be generated. */
4727 gcc_assert (ncopies >= 1);
4728
16d22000
RS
4729 bool found_mode = false;
4730 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4731 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4732 opt_scalar_mode rhs_mode_iter;
b397965c 4733
ebfd146a 4734 /* Supportable by target? */
4a00c761 4735 switch (modifier)
ebfd146a 4736 {
4a00c761
JJ
4737 case NONE:
4738 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4739 return false;
4740 if (supportable_convert_operation (code, vectype_out, vectype_in,
4741 &decl1, &code1))
4742 break;
4743 /* FALLTHRU */
4744 unsupported:
73fbfcad 4745 if (dump_enabled_p ())
78c60e3d 4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4747 "conversion not supported by target.\n");
ebfd146a 4748 return false;
ebfd146a 4749
4a00c761
JJ
4750 case WIDEN:
4751 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4752 &code1, &code2, &multi_step_cvt,
4753 &interm_types))
4a00c761
JJ
4754 {
4755 /* Binary widening operation can only be supported directly by the
4756 architecture. */
4757 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4758 break;
4759 }
4760
4761 if (code != FLOAT_EXPR
b397965c 4762 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4763 goto unsupported;
4764
b397965c 4765 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4766 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4767 {
16d22000 4768 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4769 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4770 break;
4771
4a00c761
JJ
4772 cvt_type
4773 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4774 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4775 if (cvt_type == NULL_TREE)
4776 goto unsupported;
4777
4778 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4779 {
4780 if (!supportable_convert_operation (code, vectype_out,
4781 cvt_type, &decl1, &codecvt1))
4782 goto unsupported;
4783 }
4784 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4785 cvt_type, &codecvt1,
4786 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4787 &interm_types))
4788 continue;
4789 else
4790 gcc_assert (multi_step_cvt == 0);
4791
4792 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4793 vectype_in, &code1, &code2,
4794 &multi_step_cvt, &interm_types))
16d22000
RS
4795 {
4796 found_mode = true;
4797 break;
4798 }
4a00c761
JJ
4799 }
4800
16d22000 4801 if (!found_mode)
4a00c761
JJ
4802 goto unsupported;
4803
4804 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4805 codecvt2 = ERROR_MARK;
4806 else
4807 {
4808 multi_step_cvt++;
9771b263 4809 interm_types.safe_push (cvt_type);
4a00c761
JJ
4810 cvt_type = NULL_TREE;
4811 }
4812 break;
4813
4814 case NARROW:
4815 gcc_assert (op_type == unary_op);
4816 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4817 &code1, &multi_step_cvt,
4818 &interm_types))
4819 break;
4820
4821 if (code != FIX_TRUNC_EXPR
b397965c 4822 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4823 goto unsupported;
4824
4a00c761
JJ
4825 cvt_type
4826 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4827 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4828 if (cvt_type == NULL_TREE)
4829 goto unsupported;
4830 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4831 &decl1, &codecvt1))
4832 goto unsupported;
4833 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4834 &code1, &multi_step_cvt,
4835 &interm_types))
4836 break;
4837 goto unsupported;
4838
4839 default:
4840 gcc_unreachable ();
ebfd146a
IR
4841 }
4842
4843 if (!vec_stmt) /* transformation not required. */
4844 {
adac3a68 4845 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4846 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4847 {
4848 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4849 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4850 cost_vec);
8bd37302 4851 }
4a00c761
JJ
4852 else if (modifier == NARROW)
4853 {
4854 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4855 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4856 cost_vec);
4a00c761
JJ
4857 }
4858 else
4859 {
4860 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4861 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4862 cost_vec);
4a00c761 4863 }
9771b263 4864 interm_types.release ();
ebfd146a
IR
4865 return true;
4866 }
4867
67b8dbac 4868 /* Transform. */
73fbfcad 4869 if (dump_enabled_p ())
78c60e3d 4870 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4871 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4872
4a00c761
JJ
4873 if (op_type == binary_op)
4874 {
4875 if (CONSTANT_CLASS_P (op0))
4876 op0 = fold_convert (TREE_TYPE (op1), op0);
4877 else if (CONSTANT_CLASS_P (op1))
4878 op1 = fold_convert (TREE_TYPE (op0), op1);
4879 }
4880
4881 /* In case of multi-step conversion, we first generate conversion operations
4882 to the intermediate types, and then from that types to the final one.
4883 We create vector destinations for the intermediate type (TYPES) received
4884 from supportable_*_operation, and store them in the correct order
4885 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4886 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4887 vec_dest = vect_create_destination_var (scalar_dest,
4888 (cvt_type && modifier == WIDEN)
4889 ? cvt_type : vectype_out);
9771b263 4890 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4891
4892 if (multi_step_cvt)
4893 {
9771b263
DN
4894 for (i = interm_types.length () - 1;
4895 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4896 {
4897 vec_dest = vect_create_destination_var (scalar_dest,
4898 intermediate_type);
9771b263 4899 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4900 }
4901 }
ebfd146a 4902
4a00c761 4903 if (cvt_type)
82294ec1
JJ
4904 vec_dest = vect_create_destination_var (scalar_dest,
4905 modifier == WIDEN
4906 ? vectype_out : cvt_type);
4a00c761
JJ
4907
4908 if (!slp_node)
4909 {
30862efc 4910 if (modifier == WIDEN)
4a00c761 4911 {
c3284718 4912 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4913 if (op_type == binary_op)
9771b263 4914 vec_oprnds1.create (1);
4a00c761 4915 }
30862efc 4916 else if (modifier == NARROW)
9771b263
DN
4917 vec_oprnds0.create (
4918 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4919 }
4920 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4921 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4922
4a00c761 4923 last_oprnd = op0;
ebfd146a
IR
4924 prev_stmt_info = NULL;
4925 switch (modifier)
4926 {
4927 case NONE:
4928 for (j = 0; j < ncopies; j++)
4929 {
ebfd146a 4930 if (j == 0)
306b0c92 4931 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4932 else
4933 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4934
9771b263 4935 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4936 {
4937 /* Arguments are ready, create the new vector stmt. */
4938 if (code1 == CALL_EXPR)
4939 {
4940 new_stmt = gimple_build_call (decl1, 1, vop0);
4941 new_temp = make_ssa_name (vec_dest, new_stmt);
4942 gimple_call_set_lhs (new_stmt, new_temp);
4943 }
4944 else
4945 {
4946 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4947 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4948 new_temp = make_ssa_name (vec_dest, new_stmt);
4949 gimple_assign_set_lhs (new_stmt, new_temp);
4950 }
4951
4952 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4953 if (slp_node)
9771b263 4954 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4955 else
4956 {
4957 if (!prev_stmt_info)
4958 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4959 else
4960 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4961 prev_stmt_info = vinfo_for_stmt (new_stmt);
4962 }
4a00c761 4963 }
ebfd146a
IR
4964 }
4965 break;
4966
4967 case WIDEN:
4968 /* In case the vectorization factor (VF) is bigger than the number
4969 of elements that we can fit in a vectype (nunits), we have to
4970 generate more than one vector stmt - i.e - we need to "unroll"
4971 the vector stmt by a factor VF/nunits. */
4972 for (j = 0; j < ncopies; j++)
4973 {
4a00c761 4974 /* Handle uses. */
ebfd146a 4975 if (j == 0)
4a00c761
JJ
4976 {
4977 if (slp_node)
4978 {
4979 if (code == WIDEN_LSHIFT_EXPR)
4980 {
4981 unsigned int k;
ebfd146a 4982
4a00c761
JJ
4983 vec_oprnd1 = op1;
4984 /* Store vec_oprnd1 for every vector stmt to be created
4985 for SLP_NODE. We check during the analysis that all
4986 the shift arguments are the same. */
4987 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4988 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4989
4990 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4991 slp_node);
4a00c761
JJ
4992 }
4993 else
4994 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4995 &vec_oprnds1, slp_node);
4a00c761
JJ
4996 }
4997 else
4998 {
81c40241 4999 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5000 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5001 if (op_type == binary_op)
5002 {
5003 if (code == WIDEN_LSHIFT_EXPR)
5004 vec_oprnd1 = op1;
5005 else
81c40241 5006 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5007 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5008 }
5009 }
5010 }
ebfd146a 5011 else
4a00c761
JJ
5012 {
5013 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5014 vec_oprnds0.truncate (0);
5015 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5016 if (op_type == binary_op)
5017 {
5018 if (code == WIDEN_LSHIFT_EXPR)
5019 vec_oprnd1 = op1;
5020 else
5021 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5022 vec_oprnd1);
9771b263
DN
5023 vec_oprnds1.truncate (0);
5024 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5025 }
5026 }
ebfd146a 5027
4a00c761
JJ
5028 /* Arguments are ready. Create the new vector stmts. */
5029 for (i = multi_step_cvt; i >= 0; i--)
5030 {
9771b263 5031 tree this_dest = vec_dsts[i];
4a00c761
JJ
5032 enum tree_code c1 = code1, c2 = code2;
5033 if (i == 0 && codecvt2 != ERROR_MARK)
5034 {
5035 c1 = codecvt1;
5036 c2 = codecvt2;
5037 }
5038 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5039 &vec_oprnds1,
5040 stmt, this_dest, gsi,
5041 c1, c2, decl1, decl2,
5042 op_type);
5043 }
5044
9771b263 5045 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5046 {
5047 if (cvt_type)
5048 {
5049 if (codecvt1 == CALL_EXPR)
5050 {
5051 new_stmt = gimple_build_call (decl1, 1, vop0);
5052 new_temp = make_ssa_name (vec_dest, new_stmt);
5053 gimple_call_set_lhs (new_stmt, new_temp);
5054 }
5055 else
5056 {
5057 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5058 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5059 new_stmt = gimple_build_assign (new_temp, codecvt1,
5060 vop0);
4a00c761
JJ
5061 }
5062
5063 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5064 }
5065 else
5066 new_stmt = SSA_NAME_DEF_STMT (vop0);
5067
5068 if (slp_node)
9771b263 5069 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5070 else
c689ce1e
RB
5071 {
5072 if (!prev_stmt_info)
5073 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5074 else
5075 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5076 prev_stmt_info = vinfo_for_stmt (new_stmt);
5077 }
4a00c761 5078 }
ebfd146a 5079 }
4a00c761
JJ
5080
5081 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5082 break;
5083
5084 case NARROW:
5085 /* In case the vectorization factor (VF) is bigger than the number
5086 of elements that we can fit in a vectype (nunits), we have to
5087 generate more than one vector stmt - i.e - we need to "unroll"
5088 the vector stmt by a factor VF/nunits. */
5089 for (j = 0; j < ncopies; j++)
5090 {
5091 /* Handle uses. */
4a00c761
JJ
5092 if (slp_node)
5093 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5094 slp_node);
ebfd146a
IR
5095 else
5096 {
9771b263 5097 vec_oprnds0.truncate (0);
4a00c761
JJ
5098 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5099 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5100 }
5101
4a00c761
JJ
5102 /* Arguments are ready. Create the new vector stmts. */
5103 if (cvt_type)
9771b263 5104 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5105 {
5106 if (codecvt1 == CALL_EXPR)
5107 {
5108 new_stmt = gimple_build_call (decl1, 1, vop0);
5109 new_temp = make_ssa_name (vec_dest, new_stmt);
5110 gimple_call_set_lhs (new_stmt, new_temp);
5111 }
5112 else
5113 {
5114 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5115 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5116 new_stmt = gimple_build_assign (new_temp, codecvt1,
5117 vop0);
4a00c761 5118 }
ebfd146a 5119
4a00c761 5120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5121 vec_oprnds0[i] = new_temp;
4a00c761 5122 }
ebfd146a 5123
4a00c761
JJ
5124 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5125 stmt, vec_dsts, gsi,
5126 slp_node, code1,
5127 &prev_stmt_info);
ebfd146a
IR
5128 }
5129
5130 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5131 break;
ebfd146a
IR
5132 }
5133
9771b263
DN
5134 vec_oprnds0.release ();
5135 vec_oprnds1.release ();
9771b263 5136 interm_types.release ();
ebfd146a
IR
5137
5138 return true;
5139}
ff802fa1
IR
5140
5141
ebfd146a
IR
5142/* Function vectorizable_assignment.
5143
b8698a0f
L
5144 Check if STMT performs an assignment (copy) that can be vectorized.
5145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5148
5149static bool
355fe088 5150vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5151 gimple **vec_stmt, slp_tree slp_node,
5152 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5153{
5154 tree vec_dest;
5155 tree scalar_dest;
5156 tree op;
5157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5159 tree new_temp;
4fc5ebf1
JG
5160 enum vect_def_type dt[1] = {vect_unknown_def_type};
5161 int ndts = 1;
ebfd146a 5162 int ncopies;
f18b55bd 5163 int i, j;
6e1aa848 5164 vec<tree> vec_oprnds = vNULL;
ebfd146a 5165 tree vop;
a70d6342 5166 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5167 vec_info *vinfo = stmt_info->vinfo;
355fe088 5168 gimple *new_stmt = NULL;
f18b55bd 5169 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5170 enum tree_code code;
5171 tree vectype_in;
ebfd146a 5172
a70d6342 5173 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5174 return false;
5175
66c16fd9
RB
5176 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5177 && ! vec_stmt)
ebfd146a
IR
5178 return false;
5179
5180 /* Is vectorizable assignment? */
5181 if (!is_gimple_assign (stmt))
5182 return false;
5183
5184 scalar_dest = gimple_assign_lhs (stmt);
5185 if (TREE_CODE (scalar_dest) != SSA_NAME)
5186 return false;
5187
fde9c428 5188 code = gimple_assign_rhs_code (stmt);
ebfd146a 5189 if (gimple_assign_single_p (stmt)
fde9c428
RG
5190 || code == PAREN_EXPR
5191 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5192 op = gimple_assign_rhs1 (stmt);
5193 else
5194 return false;
5195
7b7ec6c5
RG
5196 if (code == VIEW_CONVERT_EXPR)
5197 op = TREE_OPERAND (op, 0);
5198
465c8c19 5199 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5200 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5201
5202 /* Multiple types in SLP are handled by creating the appropriate number of
5203 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5204 case of SLP. */
fce57248 5205 if (slp_node)
465c8c19
JJ
5206 ncopies = 1;
5207 else
e8f142e2 5208 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5209
5210 gcc_assert (ncopies >= 1);
5211
894dd753 5212 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5213 {
73fbfcad 5214 if (dump_enabled_p ())
78c60e3d 5215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5216 "use not simple.\n");
ebfd146a
IR
5217 return false;
5218 }
5219
fde9c428
RG
5220 /* We can handle NOP_EXPR conversions that do not change the number
5221 of elements or the vector size. */
7b7ec6c5
RG
5222 if ((CONVERT_EXPR_CODE_P (code)
5223 || code == VIEW_CONVERT_EXPR)
fde9c428 5224 && (!vectype_in
928686b1 5225 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5226 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5227 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5228 return false;
5229
7b7b1813
RG
5230 /* We do not handle bit-precision changes. */
5231 if ((CONVERT_EXPR_CODE_P (code)
5232 || code == VIEW_CONVERT_EXPR)
5233 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5234 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5235 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5236 /* But a conversion that does not change the bit-pattern is ok. */
5237 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5238 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5239 && TYPE_UNSIGNED (TREE_TYPE (op)))
5240 /* Conversion between boolean types of different sizes is
5241 a simple assignment in case their vectypes are same
5242 boolean vectors. */
5243 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5244 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5245 {
73fbfcad 5246 if (dump_enabled_p ())
78c60e3d
SS
5247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5248 "type conversion to/from bit-precision "
e645e942 5249 "unsupported.\n");
7b7b1813
RG
5250 return false;
5251 }
5252
ebfd146a
IR
5253 if (!vec_stmt) /* transformation not required. */
5254 {
5255 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5256 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5257 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5258 return true;
5259 }
5260
67b8dbac 5261 /* Transform. */
73fbfcad 5262 if (dump_enabled_p ())
e645e942 5263 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5264
5265 /* Handle def. */
5266 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5267
5268 /* Handle use. */
f18b55bd 5269 for (j = 0; j < ncopies; j++)
ebfd146a 5270 {
f18b55bd
IR
5271 /* Handle uses. */
5272 if (j == 0)
306b0c92 5273 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5274 else
5275 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5276
5277 /* Arguments are ready. create the new vector stmt. */
9771b263 5278 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5279 {
7b7ec6c5
RG
5280 if (CONVERT_EXPR_CODE_P (code)
5281 || code == VIEW_CONVERT_EXPR)
4a73490d 5282 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5283 new_stmt = gimple_build_assign (vec_dest, vop);
5284 new_temp = make_ssa_name (vec_dest, new_stmt);
5285 gimple_assign_set_lhs (new_stmt, new_temp);
5286 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5287 if (slp_node)
9771b263 5288 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5289 }
ebfd146a
IR
5290
5291 if (slp_node)
f18b55bd
IR
5292 continue;
5293
5294 if (j == 0)
5295 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5296 else
5297 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5298
5299 prev_stmt_info = vinfo_for_stmt (new_stmt);
5300 }
b8698a0f 5301
9771b263 5302 vec_oprnds.release ();
ebfd146a
IR
5303 return true;
5304}
5305
9dc3f7de 5306
1107f3ae
IR
5307/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5308 either as shift by a scalar or by a vector. */
5309
5310bool
5311vect_supportable_shift (enum tree_code code, tree scalar_type)
5312{
5313
ef4bddc2 5314 machine_mode vec_mode;
1107f3ae
IR
5315 optab optab;
5316 int icode;
5317 tree vectype;
5318
5319 vectype = get_vectype_for_scalar_type (scalar_type);
5320 if (!vectype)
5321 return false;
5322
5323 optab = optab_for_tree_code (code, vectype, optab_scalar);
5324 if (!optab
5325 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5326 {
5327 optab = optab_for_tree_code (code, vectype, optab_vector);
5328 if (!optab
5329 || (optab_handler (optab, TYPE_MODE (vectype))
5330 == CODE_FOR_nothing))
5331 return false;
5332 }
5333
5334 vec_mode = TYPE_MODE (vectype);
5335 icode = (int) optab_handler (optab, vec_mode);
5336 if (icode == CODE_FOR_nothing)
5337 return false;
5338
5339 return true;
5340}
5341
5342
9dc3f7de
IR
5343/* Function vectorizable_shift.
5344
5345 Check if STMT performs a shift operation that can be vectorized.
5346 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5347 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5348 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5349
5350static bool
355fe088 5351vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5352 gimple **vec_stmt, slp_tree slp_node,
5353 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5354{
5355 tree vec_dest;
5356 tree scalar_dest;
5357 tree op0, op1 = NULL;
5358 tree vec_oprnd1 = NULL_TREE;
5359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5360 tree vectype;
5361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5362 enum tree_code code;
ef4bddc2 5363 machine_mode vec_mode;
9dc3f7de
IR
5364 tree new_temp;
5365 optab optab;
5366 int icode;
ef4bddc2 5367 machine_mode optab_op2_mode;
9dc3f7de 5368 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5369 int ndts = 2;
355fe088 5370 gimple *new_stmt = NULL;
9dc3f7de 5371 stmt_vec_info prev_stmt_info;
928686b1
RS
5372 poly_uint64 nunits_in;
5373 poly_uint64 nunits_out;
9dc3f7de 5374 tree vectype_out;
cede2577 5375 tree op1_vectype;
9dc3f7de
IR
5376 int ncopies;
5377 int j, i;
6e1aa848
DN
5378 vec<tree> vec_oprnds0 = vNULL;
5379 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5380 tree vop0, vop1;
5381 unsigned int k;
49eab32e 5382 bool scalar_shift_arg = true;
9dc3f7de 5383 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5384 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5385
5386 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5387 return false;
5388
66c16fd9
RB
5389 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5390 && ! vec_stmt)
9dc3f7de
IR
5391 return false;
5392
5393 /* Is STMT a vectorizable binary/unary operation? */
5394 if (!is_gimple_assign (stmt))
5395 return false;
5396
5397 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5398 return false;
5399
5400 code = gimple_assign_rhs_code (stmt);
5401
5402 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5403 || code == RROTATE_EXPR))
5404 return false;
5405
5406 scalar_dest = gimple_assign_lhs (stmt);
5407 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5408 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5409 {
73fbfcad 5410 if (dump_enabled_p ())
78c60e3d 5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5412 "bit-precision shifts not supported.\n");
7b7b1813
RG
5413 return false;
5414 }
9dc3f7de
IR
5415
5416 op0 = gimple_assign_rhs1 (stmt);
894dd753 5417 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5418 {
73fbfcad 5419 if (dump_enabled_p ())
78c60e3d 5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5421 "use not simple.\n");
9dc3f7de
IR
5422 return false;
5423 }
5424 /* If op0 is an external or constant def use a vector type with
5425 the same size as the output vector type. */
5426 if (!vectype)
5427 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5428 if (vec_stmt)
5429 gcc_assert (vectype);
5430 if (!vectype)
5431 {
73fbfcad 5432 if (dump_enabled_p ())
78c60e3d 5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5434 "no vectype for scalar type\n");
9dc3f7de
IR
5435 return false;
5436 }
5437
5438 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5439 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5440 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5441 return false;
5442
5443 op1 = gimple_assign_rhs2 (stmt);
894dd753 5444 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype))
9dc3f7de 5445 {
73fbfcad 5446 if (dump_enabled_p ())
78c60e3d 5447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5448 "use not simple.\n");
9dc3f7de
IR
5449 return false;
5450 }
5451
9dc3f7de
IR
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5454 case of SLP. */
fce57248 5455 if (slp_node)
9dc3f7de
IR
5456 ncopies = 1;
5457 else
e8f142e2 5458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5459
5460 gcc_assert (ncopies >= 1);
5461
5462 /* Determine whether the shift amount is a vector, or scalar. If the
5463 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5464
dbfa87aa
YR
5465 if ((dt[1] == vect_internal_def
5466 || dt[1] == vect_induction_def)
5467 && !slp_node)
49eab32e
JJ
5468 scalar_shift_arg = false;
5469 else if (dt[1] == vect_constant_def
5470 || dt[1] == vect_external_def
5471 || dt[1] == vect_internal_def)
5472 {
5473 /* In SLP, need to check whether the shift count is the same,
5474 in loops if it is a constant or invariant, it is always
5475 a scalar shift. */
5476 if (slp_node)
5477 {
355fe088
TS
5478 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5479 gimple *slpstmt;
49eab32e 5480
9771b263 5481 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5482 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5483 scalar_shift_arg = false;
5484 }
60d393e8
RB
5485
5486 /* If the shift amount is computed by a pattern stmt we cannot
5487 use the scalar amount directly thus give up and use a vector
5488 shift. */
5489 if (dt[1] == vect_internal_def)
5490 {
5491 gimple *def = SSA_NAME_DEF_STMT (op1);
5492 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5493 scalar_shift_arg = false;
5494 }
49eab32e
JJ
5495 }
5496 else
5497 {
73fbfcad 5498 if (dump_enabled_p ())
78c60e3d 5499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5500 "operand mode requires invariant argument.\n");
49eab32e
JJ
5501 return false;
5502 }
5503
9dc3f7de 5504 /* Vector shifted by vector. */
49eab32e 5505 if (!scalar_shift_arg)
9dc3f7de
IR
5506 {
5507 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5508 if (dump_enabled_p ())
78c60e3d 5509 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5510 "vector/vector shift/rotate found.\n");
78c60e3d 5511
aa948027
JJ
5512 if (!op1_vectype)
5513 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5514 if (op1_vectype == NULL_TREE
5515 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5516 {
73fbfcad 5517 if (dump_enabled_p ())
78c60e3d
SS
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5519 "unusable type for last operand in"
e645e942 5520 " vector/vector shift/rotate.\n");
cede2577
JJ
5521 return false;
5522 }
9dc3f7de
IR
5523 }
5524 /* See if the machine has a vector shifted by scalar insn and if not
5525 then see if it has a vector shifted by vector insn. */
49eab32e 5526 else
9dc3f7de
IR
5527 {
5528 optab = optab_for_tree_code (code, vectype, optab_scalar);
5529 if (optab
5530 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5531 {
73fbfcad 5532 if (dump_enabled_p ())
78c60e3d 5533 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5534 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5535 }
5536 else
5537 {
5538 optab = optab_for_tree_code (code, vectype, optab_vector);
5539 if (optab
5540 && (optab_handler (optab, TYPE_MODE (vectype))
5541 != CODE_FOR_nothing))
5542 {
49eab32e
JJ
5543 scalar_shift_arg = false;
5544
73fbfcad 5545 if (dump_enabled_p ())
78c60e3d 5546 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5547 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5548
5549 /* Unlike the other binary operators, shifts/rotates have
5550 the rhs being int, instead of the same type as the lhs,
5551 so make sure the scalar is the right type if we are
aa948027 5552 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5553 if (dt[1] == vect_constant_def)
5554 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5555 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5556 TREE_TYPE (op1)))
5557 {
5558 if (slp_node
5559 && TYPE_MODE (TREE_TYPE (vectype))
5560 != TYPE_MODE (TREE_TYPE (op1)))
5561 {
73fbfcad 5562 if (dump_enabled_p ())
78c60e3d
SS
5563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5564 "unusable type for last operand in"
e645e942 5565 " vector/vector shift/rotate.\n");
21c0a521 5566 return false;
aa948027
JJ
5567 }
5568 if (vec_stmt && !slp_node)
5569 {
5570 op1 = fold_convert (TREE_TYPE (vectype), op1);
5571 op1 = vect_init_vector (stmt, op1,
5572 TREE_TYPE (vectype), NULL);
5573 }
5574 }
9dc3f7de
IR
5575 }
5576 }
5577 }
9dc3f7de
IR
5578
5579 /* Supportable by target? */
5580 if (!optab)
5581 {
73fbfcad 5582 if (dump_enabled_p ())
78c60e3d 5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5584 "no optab.\n");
9dc3f7de
IR
5585 return false;
5586 }
5587 vec_mode = TYPE_MODE (vectype);
5588 icode = (int) optab_handler (optab, vec_mode);
5589 if (icode == CODE_FOR_nothing)
5590 {
73fbfcad 5591 if (dump_enabled_p ())
78c60e3d 5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5593 "op not supported by target.\n");
9dc3f7de 5594 /* Check only during analysis. */
cf098191 5595 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5596 || (!vec_stmt
5597 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5598 return false;
73fbfcad 5599 if (dump_enabled_p ())
e645e942
TJ
5600 dump_printf_loc (MSG_NOTE, vect_location,
5601 "proceeding using word mode.\n");
9dc3f7de
IR
5602 }
5603
5604 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5605 if (!vec_stmt
5606 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5607 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5608 {
73fbfcad 5609 if (dump_enabled_p ())
78c60e3d 5610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5611 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5612 return false;
5613 }
5614
5615 if (!vec_stmt) /* transformation not required. */
5616 {
5617 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5618 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5619 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5620 return true;
5621 }
5622
67b8dbac 5623 /* Transform. */
9dc3f7de 5624
73fbfcad 5625 if (dump_enabled_p ())
78c60e3d 5626 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5627 "transform binary/unary operation.\n");
9dc3f7de
IR
5628
5629 /* Handle def. */
5630 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5631
9dc3f7de
IR
5632 prev_stmt_info = NULL;
5633 for (j = 0; j < ncopies; j++)
5634 {
5635 /* Handle uses. */
5636 if (j == 0)
5637 {
5638 if (scalar_shift_arg)
5639 {
5640 /* Vector shl and shr insn patterns can be defined with scalar
5641 operand 2 (shift operand). In this case, use constant or loop
5642 invariant op1 directly, without extending it to vector mode
5643 first. */
5644 optab_op2_mode = insn_data[icode].operand[2].mode;
5645 if (!VECTOR_MODE_P (optab_op2_mode))
5646 {
73fbfcad 5647 if (dump_enabled_p ())
78c60e3d 5648 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5649 "operand 1 using scalar mode.\n");
9dc3f7de 5650 vec_oprnd1 = op1;
8930f723 5651 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5652 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5653 if (slp_node)
5654 {
5655 /* Store vec_oprnd1 for every vector stmt to be created
5656 for SLP_NODE. We check during the analysis that all
5657 the shift arguments are the same.
5658 TODO: Allow different constants for different vector
5659 stmts generated for an SLP instance. */
5660 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5661 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5662 }
5663 }
5664 }
5665
5666 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5667 (a special case for certain kind of vector shifts); otherwise,
5668 operand 1 should be of a vector type (the usual case). */
5669 if (vec_oprnd1)
5670 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5671 slp_node);
9dc3f7de
IR
5672 else
5673 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5674 slp_node);
9dc3f7de
IR
5675 }
5676 else
5677 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5678
5679 /* Arguments are ready. Create the new vector stmt. */
9771b263 5680 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5681 {
9771b263 5682 vop1 = vec_oprnds1[i];
0d0e4a03 5683 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5684 new_temp = make_ssa_name (vec_dest, new_stmt);
5685 gimple_assign_set_lhs (new_stmt, new_temp);
5686 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5687 if (slp_node)
9771b263 5688 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5689 }
5690
5691 if (slp_node)
5692 continue;
5693
5694 if (j == 0)
5695 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5696 else
5697 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5698 prev_stmt_info = vinfo_for_stmt (new_stmt);
5699 }
5700
9771b263
DN
5701 vec_oprnds0.release ();
5702 vec_oprnds1.release ();
9dc3f7de
IR
5703
5704 return true;
5705}
5706
5707
ebfd146a
IR
5708/* Function vectorizable_operation.
5709
16949072
RG
5710 Check if STMT performs a binary, unary or ternary operation that can
5711 be vectorized.
b8698a0f 5712 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5713 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5715
5716static bool
355fe088 5717vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5718 gimple **vec_stmt, slp_tree slp_node,
5719 stmt_vector_for_cost *cost_vec)
ebfd146a 5720{
00f07b86 5721 tree vec_dest;
ebfd146a 5722 tree scalar_dest;
16949072 5723 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5725 tree vectype;
ebfd146a 5726 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5727 enum tree_code code, orig_code;
ef4bddc2 5728 machine_mode vec_mode;
ebfd146a
IR
5729 tree new_temp;
5730 int op_type;
00f07b86 5731 optab optab;
523ba738 5732 bool target_support_p;
16949072
RG
5733 enum vect_def_type dt[3]
5734 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5735 int ndts = 3;
355fe088 5736 gimple *new_stmt = NULL;
ebfd146a 5737 stmt_vec_info prev_stmt_info;
928686b1
RS
5738 poly_uint64 nunits_in;
5739 poly_uint64 nunits_out;
ebfd146a
IR
5740 tree vectype_out;
5741 int ncopies;
5742 int j, i;
6e1aa848
DN
5743 vec<tree> vec_oprnds0 = vNULL;
5744 vec<tree> vec_oprnds1 = vNULL;
5745 vec<tree> vec_oprnds2 = vNULL;
16949072 5746 tree vop0, vop1, vop2;
a70d6342 5747 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5748 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5749
a70d6342 5750 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5751 return false;
5752
66c16fd9
RB
5753 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5754 && ! vec_stmt)
ebfd146a
IR
5755 return false;
5756
5757 /* Is STMT a vectorizable binary/unary operation? */
5758 if (!is_gimple_assign (stmt))
5759 return false;
5760
5761 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5762 return false;
5763
0eb952ea 5764 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5765
1af4ebf5
MG
5766 /* For pointer addition and subtraction, we should use the normal
5767 plus and minus for the vector operation. */
ebfd146a
IR
5768 if (code == POINTER_PLUS_EXPR)
5769 code = PLUS_EXPR;
1af4ebf5
MG
5770 if (code == POINTER_DIFF_EXPR)
5771 code = MINUS_EXPR;
ebfd146a
IR
5772
5773 /* Support only unary or binary operations. */
5774 op_type = TREE_CODE_LENGTH (code);
16949072 5775 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5776 {
73fbfcad 5777 if (dump_enabled_p ())
78c60e3d 5778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5779 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5780 op_type);
ebfd146a
IR
5781 return false;
5782 }
5783
b690cc0f
RG
5784 scalar_dest = gimple_assign_lhs (stmt);
5785 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5786
7b7b1813
RG
5787 /* Most operations cannot handle bit-precision types without extra
5788 truncations. */
045c1278 5789 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5790 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5791 /* Exception are bitwise binary operations. */
5792 && code != BIT_IOR_EXPR
5793 && code != BIT_XOR_EXPR
5794 && code != BIT_AND_EXPR)
5795 {
73fbfcad 5796 if (dump_enabled_p ())
78c60e3d 5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5798 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5799 return false;
5800 }
5801
ebfd146a 5802 op0 = gimple_assign_rhs1 (stmt);
894dd753 5803 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5804 {
73fbfcad 5805 if (dump_enabled_p ())
78c60e3d 5806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5807 "use not simple.\n");
ebfd146a
IR
5808 return false;
5809 }
b690cc0f
RG
5810 /* If op0 is an external or constant def use a vector type with
5811 the same size as the output vector type. */
5812 if (!vectype)
b036c6c5
IE
5813 {
5814 /* For boolean type we cannot determine vectype by
5815 invariant value (don't know whether it is a vector
5816 of booleans or vector of integers). We use output
5817 vectype because operations on boolean don't change
5818 type. */
2568d8a1 5819 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5820 {
2568d8a1 5821 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5822 {
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5825 "not supported operation on bool value.\n");
5826 return false;
5827 }
5828 vectype = vectype_out;
5829 }
5830 else
5831 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5832 }
7d8930a0
IR
5833 if (vec_stmt)
5834 gcc_assert (vectype);
5835 if (!vectype)
5836 {
73fbfcad 5837 if (dump_enabled_p ())
7d8930a0 5838 {
78c60e3d
SS
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5840 "no vectype for scalar type ");
5841 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5842 TREE_TYPE (op0));
e645e942 5843 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5844 }
5845
5846 return false;
5847 }
b690cc0f
RG
5848
5849 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5850 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5851 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5852 return false;
ebfd146a 5853
16949072 5854 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5855 {
5856 op1 = gimple_assign_rhs2 (stmt);
894dd753 5857 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5858 {
73fbfcad 5859 if (dump_enabled_p ())
78c60e3d 5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5861 "use not simple.\n");
ebfd146a
IR
5862 return false;
5863 }
5864 }
16949072
RG
5865 if (op_type == ternary_op)
5866 {
5867 op2 = gimple_assign_rhs3 (stmt);
894dd753 5868 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5869 {
73fbfcad 5870 if (dump_enabled_p ())
78c60e3d 5871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5872 "use not simple.\n");
16949072
RG
5873 return false;
5874 }
5875 }
ebfd146a 5876
b690cc0f 5877 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5878 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5879 case of SLP. */
fce57248 5880 if (slp_node)
b690cc0f
RG
5881 ncopies = 1;
5882 else
e8f142e2 5883 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5884
5885 gcc_assert (ncopies >= 1);
5886
9dc3f7de 5887 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5888 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5889 || code == RROTATE_EXPR)
9dc3f7de 5890 return false;
ebfd146a 5891
ebfd146a 5892 /* Supportable by target? */
00f07b86
RH
5893
5894 vec_mode = TYPE_MODE (vectype);
5895 if (code == MULT_HIGHPART_EXPR)
523ba738 5896 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5897 else
5898 {
5899 optab = optab_for_tree_code (code, vectype, optab_default);
5900 if (!optab)
5deb57cb 5901 {
73fbfcad 5902 if (dump_enabled_p ())
78c60e3d 5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5904 "no optab.\n");
00f07b86 5905 return false;
5deb57cb 5906 }
523ba738
RS
5907 target_support_p = (optab_handler (optab, vec_mode)
5908 != CODE_FOR_nothing);
5deb57cb
JJ
5909 }
5910
523ba738 5911 if (!target_support_p)
ebfd146a 5912 {
73fbfcad 5913 if (dump_enabled_p ())
78c60e3d 5914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5915 "op not supported by target.\n");
ebfd146a 5916 /* Check only during analysis. */
cf098191 5917 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5918 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5919 return false;
73fbfcad 5920 if (dump_enabled_p ())
e645e942
TJ
5921 dump_printf_loc (MSG_NOTE, vect_location,
5922 "proceeding using word mode.\n");
383d9c83
IR
5923 }
5924
4a00c761 5925 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5926 if (!VECTOR_MODE_P (vec_mode)
5927 && !vec_stmt
ca09abcb 5928 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5929 {
73fbfcad 5930 if (dump_enabled_p ())
78c60e3d 5931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5932 "not worthwhile without SIMD support.\n");
e34842c6 5933 return false;
7d8930a0 5934 }
ebfd146a 5935
ebfd146a
IR
5936 if (!vec_stmt) /* transformation not required. */
5937 {
4a00c761 5938 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5939 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5940 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5941 return true;
5942 }
5943
67b8dbac 5944 /* Transform. */
ebfd146a 5945
73fbfcad 5946 if (dump_enabled_p ())
78c60e3d 5947 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5948 "transform binary/unary operation.\n");
383d9c83 5949
0eb952ea
JJ
5950 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5951 vectors with unsigned elements, but the result is signed. So, we
5952 need to compute the MINUS_EXPR into vectype temporary and
5953 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5954 tree vec_cvt_dest = NULL_TREE;
5955 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
5956 {
5957 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5958 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5959 }
5960 /* Handle def. */
5961 else
5962 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 5963
ebfd146a
IR
5964 /* In case the vectorization factor (VF) is bigger than the number
5965 of elements that we can fit in a vectype (nunits), we have to generate
5966 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5967 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5968 from one copy of the vector stmt to the next, in the field
5969 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5970 stages to find the correct vector defs to be used when vectorizing
5971 stmts that use the defs of the current stmt. The example below
5972 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5973 we need to create 4 vectorized stmts):
5974
5975 before vectorization:
5976 RELATED_STMT VEC_STMT
5977 S1: x = memref - -
5978 S2: z = x + 1 - -
5979
5980 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5981 there):
5982 RELATED_STMT VEC_STMT
5983 VS1_0: vx0 = memref0 VS1_1 -
5984 VS1_1: vx1 = memref1 VS1_2 -
5985 VS1_2: vx2 = memref2 VS1_3 -
5986 VS1_3: vx3 = memref3 - -
5987 S1: x = load - VS1_0
5988 S2: z = x + 1 - -
5989
5990 step2: vectorize stmt S2 (done here):
5991 To vectorize stmt S2 we first need to find the relevant vector
5992 def for the first operand 'x'. This is, as usual, obtained from
5993 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5994 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5995 relevant vector def 'vx0'. Having found 'vx0' we can generate
5996 the vector stmt VS2_0, and as usual, record it in the
5997 STMT_VINFO_VEC_STMT of stmt S2.
5998 When creating the second copy (VS2_1), we obtain the relevant vector
5999 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6000 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6001 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6002 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6003 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6004 chain of stmts and pointers:
6005 RELATED_STMT VEC_STMT
6006 VS1_0: vx0 = memref0 VS1_1 -
6007 VS1_1: vx1 = memref1 VS1_2 -
6008 VS1_2: vx2 = memref2 VS1_3 -
6009 VS1_3: vx3 = memref3 - -
6010 S1: x = load - VS1_0
6011 VS2_0: vz0 = vx0 + v1 VS2_1 -
6012 VS2_1: vz1 = vx1 + v1 VS2_2 -
6013 VS2_2: vz2 = vx2 + v1 VS2_3 -
6014 VS2_3: vz3 = vx3 + v1 - -
6015 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6016
6017 prev_stmt_info = NULL;
6018 for (j = 0; j < ncopies; j++)
6019 {
6020 /* Handle uses. */
6021 if (j == 0)
4a00c761 6022 {
d6476f90 6023 if (op_type == binary_op)
4a00c761 6024 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6025 slp_node);
d6476f90
RB
6026 else if (op_type == ternary_op)
6027 {
6028 if (slp_node)
6029 {
6030 auto_vec<tree> ops(3);
6031 ops.quick_push (op0);
6032 ops.quick_push (op1);
6033 ops.quick_push (op2);
6034 auto_vec<vec<tree> > vec_defs(3);
6035 vect_get_slp_defs (ops, slp_node, &vec_defs);
6036 vec_oprnds0 = vec_defs[0];
6037 vec_oprnds1 = vec_defs[1];
6038 vec_oprnds2 = vec_defs[2];
6039 }
6040 else
6041 {
6042 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6043 NULL);
6044 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6045 NULL);
6046 }
6047 }
4a00c761
JJ
6048 else
6049 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6050 slp_node);
4a00c761 6051 }
ebfd146a 6052 else
4a00c761
JJ
6053 {
6054 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6055 if (op_type == ternary_op)
6056 {
9771b263
DN
6057 tree vec_oprnd = vec_oprnds2.pop ();
6058 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6059 vec_oprnd));
4a00c761
JJ
6060 }
6061 }
6062
6063 /* Arguments are ready. Create the new vector stmt. */
9771b263 6064 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6065 {
4a00c761 6066 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6067 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6068 vop2 = ((op_type == ternary_op)
9771b263 6069 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6070 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6071 new_temp = make_ssa_name (vec_dest, new_stmt);
6072 gimple_assign_set_lhs (new_stmt, new_temp);
6073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6074 if (vec_cvt_dest)
6075 {
6076 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6077 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6078 new_temp);
6079 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6080 gimple_assign_set_lhs (new_stmt, new_temp);
6081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6082 }
4a00c761 6083 if (slp_node)
9771b263 6084 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6085 }
6086
4a00c761
JJ
6087 if (slp_node)
6088 continue;
6089
6090 if (j == 0)
6091 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6092 else
6093 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6094 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6095 }
6096
9771b263
DN
6097 vec_oprnds0.release ();
6098 vec_oprnds1.release ();
6099 vec_oprnds2.release ();
ebfd146a 6100
ebfd146a
IR
6101 return true;
6102}
6103
f702e7d4 6104/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6105
6106static void
f702e7d4 6107ensure_base_align (struct data_reference *dr)
c716e67f 6108{
ca823c85 6109 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6110 return;
6111
52639a61 6112 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6113 {
52639a61 6114 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6115
f702e7d4
RS
6116 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6117
428f0c67 6118 if (decl_in_symtab_p (base_decl))
f702e7d4 6119 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6120 else
6121 {
f702e7d4 6122 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6123 DECL_USER_ALIGN (base_decl) = 1;
6124 }
52639a61 6125 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6126 }
6127}
6128
ebfd146a 6129
44fc7854
BE
6130/* Function get_group_alias_ptr_type.
6131
6132 Return the alias type for the group starting at FIRST_STMT. */
6133
6134static tree
6135get_group_alias_ptr_type (gimple *first_stmt)
6136{
6137 struct data_reference *first_dr, *next_dr;
6138 gimple *next_stmt;
6139
6140 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6141 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
44fc7854
BE
6142 while (next_stmt)
6143 {
6144 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6145 if (get_alias_set (DR_REF (first_dr))
6146 != get_alias_set (DR_REF (next_dr)))
6147 {
6148 if (dump_enabled_p ())
6149 dump_printf_loc (MSG_NOTE, vect_location,
6150 "conflicting alias set types.\n");
6151 return ptr_type_node;
6152 }
2c53b149 6153 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
44fc7854
BE
6154 }
6155 return reference_alias_ptr_type (DR_REF (first_dr));
6156}
6157
6158
ebfd146a
IR
6159/* Function vectorizable_store.
6160
b8698a0f
L
6161 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6162 can be vectorized.
6163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6164 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6166
6167static bool
355fe088 6168vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6169 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6170{
ebfd146a
IR
6171 tree data_ref;
6172 tree op;
6173 tree vec_oprnd = NULL_TREE;
6174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6175 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6176 tree elem_type;
ebfd146a 6177 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6178 struct loop *loop = NULL;
ef4bddc2 6179 machine_mode vec_mode;
ebfd146a
IR
6180 tree dummy;
6181 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6182 enum vect_def_type rhs_dt = vect_unknown_def_type;
6183 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6184 stmt_vec_info prev_stmt_info = NULL;
6185 tree dataref_ptr = NULL_TREE;
74bf76ed 6186 tree dataref_offset = NULL_TREE;
355fe088 6187 gimple *ptr_incr = NULL;
ebfd146a
IR
6188 int ncopies;
6189 int j;
2de001ee
RS
6190 gimple *next_stmt, *first_stmt;
6191 bool grouped_store;
ebfd146a 6192 unsigned int group_size, i;
6e1aa848
DN
6193 vec<tree> oprnds = vNULL;
6194 vec<tree> result_chain = vNULL;
ebfd146a 6195 bool inv_p;
09dfa495 6196 tree offset = NULL_TREE;
6e1aa848 6197 vec<tree> vec_oprnds = vNULL;
ebfd146a 6198 bool slp = (slp_node != NULL);
ebfd146a 6199 unsigned int vec_num;
a70d6342 6200 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6201 vec_info *vinfo = stmt_info->vinfo;
272c6793 6202 tree aggr_type;
134c85ca 6203 gather_scatter_info gs_info;
355fe088 6204 gimple *new_stmt;
d9f21f6a 6205 poly_uint64 vf;
2de001ee 6206 vec_load_store_type vls_type;
44fc7854 6207 tree ref_type;
a70d6342 6208
a70d6342 6209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6210 return false;
6211
66c16fd9
RB
6212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6213 && ! vec_stmt)
ebfd146a
IR
6214 return false;
6215
6216 /* Is vectorizable store? */
6217
c3a8f964
RS
6218 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6219 if (is_gimple_assign (stmt))
6220 {
6221 tree scalar_dest = gimple_assign_lhs (stmt);
6222 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6223 && is_pattern_stmt_p (stmt_info))
6224 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6225 if (TREE_CODE (scalar_dest) != ARRAY_REF
6226 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6227 && TREE_CODE (scalar_dest) != INDIRECT_REF
6228 && TREE_CODE (scalar_dest) != COMPONENT_REF
6229 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6230 && TREE_CODE (scalar_dest) != REALPART_EXPR
6231 && TREE_CODE (scalar_dest) != MEM_REF)
6232 return false;
6233 }
6234 else
6235 {
6236 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6237 if (!call || !gimple_call_internal_p (call))
6238 return false;
6239
6240 internal_fn ifn = gimple_call_internal_fn (call);
6241 if (!internal_store_fn_p (ifn))
c3a8f964 6242 return false;
ebfd146a 6243
c3a8f964
RS
6244 if (slp_node != NULL)
6245 {
6246 if (dump_enabled_p ())
6247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6248 "SLP of masked stores not supported.\n");
6249 return false;
6250 }
6251
f307441a
RS
6252 int mask_index = internal_fn_mask_index (ifn);
6253 if (mask_index >= 0)
6254 {
6255 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6256 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6257 &mask_vectype))
f307441a
RS
6258 return false;
6259 }
c3a8f964
RS
6260 }
6261
6262 op = vect_get_store_rhs (stmt);
ebfd146a 6263
fce57248
RS
6264 /* Cannot have hybrid store SLP -- that would mean storing to the
6265 same location twice. */
6266 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6267
f4d09712 6268 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6269 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6270
6271 if (loop_vinfo)
b17dc4d4
RB
6272 {
6273 loop = LOOP_VINFO_LOOP (loop_vinfo);
6274 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6275 }
6276 else
6277 vf = 1;
465c8c19
JJ
6278
6279 /* Multiple types in SLP are handled by creating the appropriate number of
6280 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6281 case of SLP. */
fce57248 6282 if (slp)
465c8c19
JJ
6283 ncopies = 1;
6284 else
e8f142e2 6285 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6286
6287 gcc_assert (ncopies >= 1);
6288
6289 /* FORNOW. This restriction should be relaxed. */
6290 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6291 {
6292 if (dump_enabled_p ())
6293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6294 "multiple types in nested loop.\n");
6295 return false;
6296 }
6297
929b4411 6298 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6299 return false;
6300
272c6793 6301 elem_type = TREE_TYPE (vectype);
ebfd146a 6302 vec_mode = TYPE_MODE (vectype);
7b7b1813 6303
ebfd146a
IR
6304 if (!STMT_VINFO_DATA_REF (stmt_info))
6305 return false;
6306
2de001ee 6307 vect_memory_access_type memory_access_type;
7e11fc7f 6308 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6309 &memory_access_type, &gs_info))
6310 return false;
3bab6342 6311
c3a8f964
RS
6312 if (mask)
6313 {
7e11fc7f
RS
6314 if (memory_access_type == VMAT_CONTIGUOUS)
6315 {
6316 if (!VECTOR_MODE_P (vec_mode)
6317 || !can_vec_mask_load_store_p (vec_mode,
6318 TYPE_MODE (mask_vectype), false))
6319 return false;
6320 }
f307441a
RS
6321 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6322 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6323 {
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "unsupported access type for masked store.\n");
6327 return false;
6328 }
c3a8f964
RS
6329 }
6330 else
6331 {
6332 /* FORNOW. In some cases can vectorize even if data-type not supported
6333 (e.g. - array initialization with 0). */
6334 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6335 return false;
6336 }
6337
f307441a 6338 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6339 && memory_access_type != VMAT_GATHER_SCATTER
6340 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6341 if (grouped_store)
6342 {
2c53b149 6343 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7cfb4d93 6344 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6345 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7cfb4d93
RS
6346 }
6347 else
6348 {
6349 first_stmt = stmt;
6350 first_dr = dr;
6351 group_size = vec_num = 1;
6352 }
6353
ebfd146a
IR
6354 if (!vec_stmt) /* transformation not required. */
6355 {
2de001ee 6356 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6357
6358 if (loop_vinfo
6359 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6360 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6361 memory_access_type, &gs_info);
7cfb4d93 6362
ebfd146a 6363 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6364 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6365 vls_type, slp_node, cost_vec);
ebfd146a
IR
6366 return true;
6367 }
2de001ee 6368 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6369
67b8dbac 6370 /* Transform. */
ebfd146a 6371
f702e7d4 6372 ensure_base_align (dr);
c716e67f 6373
f307441a 6374 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6375 {
c3a8f964 6376 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6377 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6378 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6379 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6380 edge pe = loop_preheader_edge (loop);
6381 gimple_seq seq;
6382 basic_block new_bb;
6383 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6384 poly_uint64 scatter_off_nunits
6385 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6386
4d694b27 6387 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6388 modifier = NONE;
4d694b27 6389 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6390 {
3bab6342
AT
6391 modifier = WIDEN;
6392
4d694b27
RS
6393 /* Currently gathers and scatters are only supported for
6394 fixed-length vectors. */
6395 unsigned int count = scatter_off_nunits.to_constant ();
6396 vec_perm_builder sel (count, count, 1);
6397 for (i = 0; i < (unsigned int) count; ++i)
6398 sel.quick_push (i | (count / 2));
3bab6342 6399
4d694b27 6400 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6401 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6402 indices);
3bab6342
AT
6403 gcc_assert (perm_mask != NULL_TREE);
6404 }
4d694b27 6405 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6406 {
3bab6342
AT
6407 modifier = NARROW;
6408
4d694b27
RS
6409 /* Currently gathers and scatters are only supported for
6410 fixed-length vectors. */
6411 unsigned int count = nunits.to_constant ();
6412 vec_perm_builder sel (count, count, 1);
6413 for (i = 0; i < (unsigned int) count; ++i)
6414 sel.quick_push (i | (count / 2));
3bab6342 6415
4d694b27 6416 vec_perm_indices indices (sel, 2, count);
e3342de4 6417 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6418 gcc_assert (perm_mask != NULL_TREE);
6419 ncopies *= 2;
6420 }
6421 else
6422 gcc_unreachable ();
6423
134c85ca 6424 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6425 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6426 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6427 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6428 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6429 scaletype = TREE_VALUE (arglist);
6430
6431 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6432 && TREE_CODE (rettype) == VOID_TYPE);
6433
134c85ca 6434 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6435 if (!is_gimple_min_invariant (ptr))
6436 {
6437 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6438 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6439 gcc_assert (!new_bb);
6440 }
6441
6442 /* Currently we support only unconditional scatter stores,
6443 so mask should be all ones. */
6444 mask = build_int_cst (masktype, -1);
6445 mask = vect_init_vector (stmt, mask, masktype, NULL);
6446
134c85ca 6447 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6448
6449 prev_stmt_info = NULL;
6450 for (j = 0; j < ncopies; ++j)
6451 {
6452 if (j == 0)
6453 {
6454 src = vec_oprnd1
c3a8f964 6455 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6456 op = vec_oprnd0
134c85ca 6457 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6458 }
6459 else if (modifier != NONE && (j & 1))
6460 {
6461 if (modifier == WIDEN)
6462 {
6463 src = vec_oprnd1
929b4411 6464 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6465 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6466 stmt, gsi);
6467 }
6468 else if (modifier == NARROW)
6469 {
6470 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6471 stmt, gsi);
6472 op = vec_oprnd0
134c85ca
RS
6473 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6474 vec_oprnd0);
3bab6342
AT
6475 }
6476 else
6477 gcc_unreachable ();
6478 }
6479 else
6480 {
6481 src = vec_oprnd1
929b4411 6482 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6483 op = vec_oprnd0
134c85ca
RS
6484 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6485 vec_oprnd0);
3bab6342
AT
6486 }
6487
6488 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6489 {
928686b1
RS
6490 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6491 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6492 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6493 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6494 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6496 src = var;
6497 }
6498
6499 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6500 {
928686b1
RS
6501 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6502 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6503 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6504 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6505 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6507 op = var;
6508 }
6509
6510 new_stmt
134c85ca 6511 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6512
6513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6514
6515 if (prev_stmt_info == NULL)
6516 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6517 else
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6519 prev_stmt_info = vinfo_for_stmt (new_stmt);
6520 }
6521 return true;
6522 }
6523
f307441a 6524 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6525 {
2c53b149
RB
6526 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6527 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
f307441a 6528 }
ebfd146a 6529
f307441a
RS
6530 if (grouped_store)
6531 {
ebfd146a 6532 /* FORNOW */
a70d6342 6533 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6534
6535 /* We vectorize all the stmts of the interleaving group when we
6536 reach the last stmt in the group. */
2c53b149
RB
6537 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6538 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6539 && !slp)
6540 {
6541 *vec_stmt = NULL;
6542 return true;
6543 }
6544
6545 if (slp)
4b5caab7 6546 {
0d0293ac 6547 grouped_store = false;
4b5caab7
IR
6548 /* VEC_NUM is the number of vect stmts to be created for this
6549 group. */
6550 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6551 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2c53b149 6552 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6553 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6554 op = vect_get_store_rhs (first_stmt);
4b5caab7 6555 }
ebfd146a 6556 else
4b5caab7
IR
6557 /* VEC_NUM is the number of vect stmts to be created for this
6558 group. */
ebfd146a 6559 vec_num = group_size;
44fc7854
BE
6560
6561 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6562 }
b8698a0f 6563 else
7cfb4d93 6564 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6565
73fbfcad 6566 if (dump_enabled_p ())
78c60e3d 6567 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6568 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6569
2de001ee
RS
6570 if (memory_access_type == VMAT_ELEMENTWISE
6571 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6572 {
6573 gimple_stmt_iterator incr_gsi;
6574 bool insert_after;
355fe088 6575 gimple *incr;
f2e2a985
MM
6576 tree offvar;
6577 tree ivstep;
6578 tree running_off;
f2e2a985
MM
6579 tree stride_base, stride_step, alias_off;
6580 tree vec_oprnd;
f502d50e 6581 unsigned int g;
4d694b27
RS
6582 /* Checked by get_load_store_type. */
6583 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6584
7cfb4d93 6585 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6586 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6587
6588 stride_base
6589 = fold_build_pointer_plus
b210f45f 6590 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6591 size_binop (PLUS_EXPR,
b210f45f 6592 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6593 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6594 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6595
6596 /* For a store with loop-invariant (but other than power-of-2)
6597 stride (i.e. not a grouped access) like so:
6598
6599 for (i = 0; i < n; i += stride)
6600 array[i] = ...;
6601
6602 we generate a new induction variable and new stores from
6603 the components of the (vectorized) rhs:
6604
6605 for (j = 0; ; j += VF*stride)
6606 vectemp = ...;
6607 tmp1 = vectemp[0];
6608 array[j] = tmp1;
6609 tmp2 = vectemp[1];
6610 array[j + stride] = tmp2;
6611 ...
6612 */
6613
4d694b27 6614 unsigned nstores = const_nunits;
b17dc4d4 6615 unsigned lnel = 1;
cee62fee 6616 tree ltype = elem_type;
04199738 6617 tree lvectype = vectype;
cee62fee
MM
6618 if (slp)
6619 {
4d694b27
RS
6620 if (group_size < const_nunits
6621 && const_nunits % group_size == 0)
b17dc4d4 6622 {
4d694b27 6623 nstores = const_nunits / group_size;
b17dc4d4
RB
6624 lnel = group_size;
6625 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6626 lvectype = vectype;
6627
6628 /* First check if vec_extract optab doesn't support extraction
6629 of vector elts directly. */
b397965c 6630 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6631 machine_mode vmode;
6632 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6633 || !VECTOR_MODE_P (vmode)
414fef4e 6634 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6635 || (convert_optab_handler (vec_extract_optab,
6636 TYPE_MODE (vectype), vmode)
6637 == CODE_FOR_nothing))
6638 {
6639 /* Try to avoid emitting an extract of vector elements
6640 by performing the extracts using an integer type of the
6641 same size, extracting from a vector of those and then
6642 re-interpreting it as the original vector type if
6643 supported. */
6644 unsigned lsize
6645 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6646 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6647 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6648 /* If we can't construct such a vector fall back to
6649 element extracts from the original vector type and
6650 element size stores. */
4d694b27 6651 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6652 && VECTOR_MODE_P (vmode)
414fef4e 6653 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6654 && (convert_optab_handler (vec_extract_optab,
6655 vmode, elmode)
6656 != CODE_FOR_nothing))
6657 {
4d694b27 6658 nstores = lnunits;
04199738
RB
6659 lnel = group_size;
6660 ltype = build_nonstandard_integer_type (lsize, 1);
6661 lvectype = build_vector_type (ltype, nstores);
6662 }
6663 /* Else fall back to vector extraction anyway.
6664 Fewer stores are more important than avoiding spilling
6665 of the vector we extract from. Compared to the
6666 construction case in vectorizable_load no store-forwarding
6667 issue exists here for reasonable archs. */
6668 }
b17dc4d4 6669 }
4d694b27
RS
6670 else if (group_size >= const_nunits
6671 && group_size % const_nunits == 0)
b17dc4d4
RB
6672 {
6673 nstores = 1;
4d694b27 6674 lnel = const_nunits;
b17dc4d4 6675 ltype = vectype;
04199738 6676 lvectype = vectype;
b17dc4d4 6677 }
cee62fee
MM
6678 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6679 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6680 }
6681
f2e2a985
MM
6682 ivstep = stride_step;
6683 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6684 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6685
6686 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6687
b210f45f
RB
6688 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6689 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6690 create_iv (stride_base, ivstep, NULL,
6691 loop, &incr_gsi, insert_after,
6692 &offvar, NULL);
6693 incr = gsi_stmt (incr_gsi);
310213d4 6694 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6695
b210f45f 6696 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6697
6698 prev_stmt_info = NULL;
44fc7854 6699 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6700 next_stmt = first_stmt;
6701 for (g = 0; g < group_size; g++)
f2e2a985 6702 {
f502d50e
MM
6703 running_off = offvar;
6704 if (g)
f2e2a985 6705 {
f502d50e
MM
6706 tree size = TYPE_SIZE_UNIT (ltype);
6707 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6708 size);
f502d50e 6709 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6710 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6711 running_off, pos);
f2e2a985 6712 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6713 running_off = newoff;
f502d50e 6714 }
b17dc4d4
RB
6715 unsigned int group_el = 0;
6716 unsigned HOST_WIDE_INT
6717 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6718 for (j = 0; j < ncopies; j++)
6719 {
c3a8f964 6720 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6721 and first_stmt == stmt. */
6722 if (j == 0)
6723 {
6724 if (slp)
6725 {
6726 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6727 slp_node);
f502d50e
MM
6728 vec_oprnd = vec_oprnds[0];
6729 }
6730 else
6731 {
c3a8f964 6732 op = vect_get_store_rhs (next_stmt);
81c40241 6733 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6734 }
6735 }
f2e2a985 6736 else
f502d50e
MM
6737 {
6738 if (slp)
6739 vec_oprnd = vec_oprnds[j];
6740 else
c079cbac 6741 {
894dd753 6742 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6743 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6744 vec_oprnd);
c079cbac 6745 }
f502d50e 6746 }
04199738
RB
6747 /* Pun the vector to extract from if necessary. */
6748 if (lvectype != vectype)
6749 {
6750 tree tem = make_ssa_name (lvectype);
6751 gimple *pun
6752 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6753 lvectype, vec_oprnd));
6754 vect_finish_stmt_generation (stmt, pun, gsi);
6755 vec_oprnd = tem;
6756 }
f502d50e
MM
6757 for (i = 0; i < nstores; i++)
6758 {
6759 tree newref, newoff;
355fe088 6760 gimple *incr, *assign;
f502d50e
MM
6761 tree size = TYPE_SIZE (ltype);
6762 /* Extract the i'th component. */
6763 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6764 bitsize_int (i), size);
6765 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6766 size, pos);
6767
6768 elem = force_gimple_operand_gsi (gsi, elem, true,
6769 NULL_TREE, true,
6770 GSI_SAME_STMT);
6771
b17dc4d4
RB
6772 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6773 group_el * elsz);
f502d50e 6774 newref = build2 (MEM_REF, ltype,
b17dc4d4 6775 running_off, this_off);
19986382 6776 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6777
6778 /* And store it to *running_off. */
6779 assign = gimple_build_assign (newref, elem);
6780 vect_finish_stmt_generation (stmt, assign, gsi);
6781
b17dc4d4
RB
6782 group_el += lnel;
6783 if (! slp
6784 || group_el == group_size)
6785 {
6786 newoff = copy_ssa_name (running_off, NULL);
6787 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6788 running_off, stride_step);
6789 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6790
b17dc4d4
RB
6791 running_off = newoff;
6792 group_el = 0;
6793 }
225ce44b
RB
6794 if (g == group_size - 1
6795 && !slp)
f502d50e
MM
6796 {
6797 if (j == 0 && i == 0)
225ce44b
RB
6798 STMT_VINFO_VEC_STMT (stmt_info)
6799 = *vec_stmt = assign;
f502d50e
MM
6800 else
6801 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6802 prev_stmt_info = vinfo_for_stmt (assign);
6803 }
6804 }
f2e2a985 6805 }
2c53b149 6806 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6807 if (slp)
6808 break;
f2e2a985 6809 }
778dd3b6
RB
6810
6811 vec_oprnds.release ();
f2e2a985
MM
6812 return true;
6813 }
6814
8c681247 6815 auto_vec<tree> dr_chain (group_size);
9771b263 6816 oprnds.create (group_size);
ebfd146a 6817
720f5239 6818 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6819 gcc_assert (alignment_support_scheme);
70088b95
RS
6820 vec_loop_masks *loop_masks
6821 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6822 ? &LOOP_VINFO_MASKS (loop_vinfo)
6823 : NULL);
272c6793 6824 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6825 realignment. vect_supportable_dr_alignment always returns either
6826 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6827 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6828 && !mask
70088b95 6829 && !loop_masks)
272c6793
RS
6830 || alignment_support_scheme == dr_aligned
6831 || alignment_support_scheme == dr_unaligned_supported);
6832
62da9e14
RS
6833 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6834 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6835 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6836
f307441a
RS
6837 tree bump;
6838 tree vec_offset = NULL_TREE;
6839 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6840 {
6841 aggr_type = NULL_TREE;
6842 bump = NULL_TREE;
6843 }
6844 else if (memory_access_type == VMAT_GATHER_SCATTER)
6845 {
6846 aggr_type = elem_type;
6847 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6848 &bump, &vec_offset);
6849 }
272c6793 6850 else
f307441a
RS
6851 {
6852 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6853 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6854 else
6855 aggr_type = vectype;
6856 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6857 }
ebfd146a 6858
c3a8f964
RS
6859 if (mask)
6860 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6861
ebfd146a
IR
6862 /* In case the vectorization factor (VF) is bigger than the number
6863 of elements that we can fit in a vectype (nunits), we have to generate
6864 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6865 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6866 vect_get_vec_def_for_copy_stmt. */
6867
0d0293ac 6868 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6869
6870 S1: &base + 2 = x2
6871 S2: &base = x0
6872 S3: &base + 1 = x1
6873 S4: &base + 3 = x3
6874
6875 We create vectorized stores starting from base address (the access of the
6876 first stmt in the chain (S2 in the above example), when the last store stmt
6877 of the chain (S4) is reached:
6878
6879 VS1: &base = vx2
6880 VS2: &base + vec_size*1 = vx0
6881 VS3: &base + vec_size*2 = vx1
6882 VS4: &base + vec_size*3 = vx3
6883
6884 Then permutation statements are generated:
6885
3fcc1b55
JJ
6886 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6887 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6888 ...
b8698a0f 6889
ebfd146a
IR
6890 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6891 (the order of the data-refs in the output of vect_permute_store_chain
6892 corresponds to the order of scalar stmts in the interleaving chain - see
6893 the documentation of vect_permute_store_chain()).
6894
6895 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6896 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6897 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6898 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6899 */
6900
6901 prev_stmt_info = NULL;
c3a8f964 6902 tree vec_mask = NULL_TREE;
ebfd146a
IR
6903 for (j = 0; j < ncopies; j++)
6904 {
ebfd146a
IR
6905
6906 if (j == 0)
6907 {
6908 if (slp)
6909 {
6910 /* Get vectorized arguments for SLP_NODE. */
d092494c 6911 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6912 NULL, slp_node);
ebfd146a 6913
9771b263 6914 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6915 }
6916 else
6917 {
b8698a0f
L
6918 /* For interleaved stores we collect vectorized defs for all the
6919 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6920 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6921 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6922
2c53b149 6923 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6924 OPRNDS are of size 1. */
b8698a0f 6925 next_stmt = first_stmt;
ebfd146a
IR
6926 for (i = 0; i < group_size; i++)
6927 {
b8698a0f 6928 /* Since gaps are not supported for interleaved stores,
2c53b149 6929 DR_GROUP_SIZE is the exact number of stmts in the chain.
b8698a0f 6930 Therefore, NEXT_STMT can't be NULL_TREE. In case that
2c53b149 6931 there is no interleaving, DR_GROUP_SIZE is 1, and only one
ebfd146a 6932 iteration of the loop will be executed. */
c3a8f964 6933 op = vect_get_store_rhs (next_stmt);
81c40241 6934 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6935 dr_chain.quick_push (vec_oprnd);
6936 oprnds.quick_push (vec_oprnd);
2c53b149 6937 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6938 }
c3a8f964
RS
6939 if (mask)
6940 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6941 mask_vectype);
ebfd146a
IR
6942 }
6943
6944 /* We should have catched mismatched types earlier. */
6945 gcc_assert (useless_type_conversion_p (vectype,
6946 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6947 bool simd_lane_access_p
6948 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6949 if (simd_lane_access_p
6950 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6951 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6952 && integer_zerop (DR_OFFSET (first_dr))
6953 && integer_zerop (DR_INIT (first_dr))
6954 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6955 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6956 {
6957 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6958 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6959 inv_p = false;
74bf76ed 6960 }
f307441a
RS
6961 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6962 {
6963 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6964 &dataref_ptr, &vec_offset);
6965 inv_p = false;
6966 }
74bf76ed
JJ
6967 else
6968 dataref_ptr
6969 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6970 simd_lane_access_p ? loop : NULL,
09dfa495 6971 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
6972 simd_lane_access_p, &inv_p,
6973 NULL_TREE, bump);
a70d6342 6974 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6975 }
b8698a0f 6976 else
ebfd146a 6977 {
b8698a0f
L
6978 /* For interleaved stores we created vectorized defs for all the
6979 defs stored in OPRNDS in the previous iteration (previous copy).
6980 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6981 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6982 next copy.
2c53b149 6983 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6984 OPRNDS are of size 1. */
6985 for (i = 0; i < group_size; i++)
6986 {
9771b263 6987 op = oprnds[i];
894dd753 6988 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 6989 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
6990 dr_chain[i] = vec_oprnd;
6991 oprnds[i] = vec_oprnd;
ebfd146a 6992 }
c3a8f964 6993 if (mask)
929b4411 6994 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
6995 if (dataref_offset)
6996 dataref_offset
f307441a
RS
6997 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6998 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
6999 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7000 vec_offset);
74bf76ed
JJ
7001 else
7002 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7003 bump);
ebfd146a
IR
7004 }
7005
2de001ee 7006 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7007 {
272c6793 7008 tree vec_array;
267d3070 7009
3ba4ff41 7010 /* Get an array into which we can store the individual vectors. */
272c6793 7011 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7012
7013 /* Invalidate the current contents of VEC_ARRAY. This should
7014 become an RTL clobber too, which prevents the vector registers
7015 from being upward-exposed. */
7016 vect_clobber_variable (stmt, gsi, vec_array);
7017
7018 /* Store the individual vectors into the array. */
272c6793 7019 for (i = 0; i < vec_num; i++)
c2d7ab2a 7020 {
9771b263 7021 vec_oprnd = dr_chain[i];
272c6793 7022 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7023 }
b8698a0f 7024
7cfb4d93 7025 tree final_mask = NULL;
70088b95
RS
7026 if (loop_masks)
7027 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7028 vectype, j);
7cfb4d93
RS
7029 if (vec_mask)
7030 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7031 vec_mask, gsi);
7032
7e11fc7f 7033 gcall *call;
7cfb4d93 7034 if (final_mask)
7e11fc7f
RS
7035 {
7036 /* Emit:
7037 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7038 VEC_ARRAY). */
7039 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7040 tree alias_ptr = build_int_cst (ref_type, align);
7041 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7042 dataref_ptr, alias_ptr,
7cfb4d93 7043 final_mask, vec_array);
7e11fc7f
RS
7044 }
7045 else
7046 {
7047 /* Emit:
7048 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7049 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7050 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7051 vec_array);
7052 gimple_call_set_lhs (call, data_ref);
7053 }
a844293d
RS
7054 gimple_call_set_nothrow (call, true);
7055 new_stmt = call;
267d3070 7056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7057
7058 /* Record that VEC_ARRAY is now dead. */
7059 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7060 }
7061 else
7062 {
7063 new_stmt = NULL;
0d0293ac 7064 if (grouped_store)
272c6793 7065 {
b6b9227d
JJ
7066 if (j == 0)
7067 result_chain.create (group_size);
272c6793
RS
7068 /* Permute. */
7069 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7070 &result_chain);
7071 }
c2d7ab2a 7072
272c6793
RS
7073 next_stmt = first_stmt;
7074 for (i = 0; i < vec_num; i++)
7075 {
644ffefd 7076 unsigned align, misalign;
272c6793 7077
7cfb4d93 7078 tree final_mask = NULL_TREE;
70088b95
RS
7079 if (loop_masks)
7080 final_mask = vect_get_loop_mask (gsi, loop_masks,
7081 vec_num * ncopies,
7cfb4d93
RS
7082 vectype, vec_num * j + i);
7083 if (vec_mask)
7084 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7085 vec_mask, gsi);
7086
f307441a
RS
7087 if (memory_access_type == VMAT_GATHER_SCATTER)
7088 {
7089 tree scale = size_int (gs_info.scale);
7090 gcall *call;
70088b95 7091 if (loop_masks)
f307441a
RS
7092 call = gimple_build_call_internal
7093 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7094 scale, vec_oprnd, final_mask);
7095 else
7096 call = gimple_build_call_internal
7097 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7098 scale, vec_oprnd);
7099 gimple_call_set_nothrow (call, true);
7100 new_stmt = call;
7101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7102 break;
7103 }
7104
272c6793
RS
7105 if (i > 0)
7106 /* Bump the vector pointer. */
7107 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7108 stmt, bump);
272c6793
RS
7109
7110 if (slp)
9771b263 7111 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7112 else if (grouped_store)
7113 /* For grouped stores vectorized defs are interleaved in
272c6793 7114 vect_permute_store_chain(). */
9771b263 7115 vec_oprnd = result_chain[i];
272c6793 7116
f702e7d4 7117 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7118 if (aligned_access_p (first_dr))
644ffefd 7119 misalign = 0;
272c6793
RS
7120 else if (DR_MISALIGNMENT (first_dr) == -1)
7121 {
25f68d90 7122 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7123 misalign = 0;
272c6793
RS
7124 }
7125 else
c3a8f964 7126 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7127 if (dataref_offset == NULL_TREE
7128 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7129 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7130 misalign);
c2d7ab2a 7131
62da9e14 7132 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7133 {
7134 tree perm_mask = perm_mask_for_reverse (vectype);
7135 tree perm_dest
c3a8f964 7136 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7137 vectype);
b731b390 7138 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7139
7140 /* Generate the permute statement. */
355fe088 7141 gimple *perm_stmt
0d0e4a03
JJ
7142 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7143 vec_oprnd, perm_mask);
09dfa495
BM
7144 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7145
7146 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7147 vec_oprnd = new_temp;
7148 }
7149
272c6793 7150 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7151 if (final_mask)
c3a8f964
RS
7152 {
7153 align = least_bit_hwi (misalign | align);
7154 tree ptr = build_int_cst (ref_type, align);
7155 gcall *call
7156 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7157 dataref_ptr, ptr,
7cfb4d93 7158 final_mask, vec_oprnd);
c3a8f964
RS
7159 gimple_call_set_nothrow (call, true);
7160 new_stmt = call;
7161 }
7162 else
7163 {
7164 data_ref = fold_build2 (MEM_REF, vectype,
7165 dataref_ptr,
7166 dataref_offset
7167 ? dataref_offset
7168 : build_int_cst (ref_type, 0));
7169 if (aligned_access_p (first_dr))
7170 ;
7171 else if (DR_MISALIGNMENT (first_dr) == -1)
7172 TREE_TYPE (data_ref)
7173 = build_aligned_type (TREE_TYPE (data_ref),
7174 align * BITS_PER_UNIT);
7175 else
7176 TREE_TYPE (data_ref)
7177 = build_aligned_type (TREE_TYPE (data_ref),
7178 TYPE_ALIGN (elem_type));
19986382 7179 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7180 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7181 }
272c6793 7182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7183
7184 if (slp)
7185 continue;
7186
2c53b149 7187 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7188 if (!next_stmt)
7189 break;
7190 }
ebfd146a 7191 }
1da0876c
RS
7192 if (!slp)
7193 {
7194 if (j == 0)
7195 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7196 else
7197 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7198 prev_stmt_info = vinfo_for_stmt (new_stmt);
7199 }
ebfd146a
IR
7200 }
7201
9771b263
DN
7202 oprnds.release ();
7203 result_chain.release ();
7204 vec_oprnds.release ();
ebfd146a
IR
7205
7206 return true;
7207}
7208
557be5a8
AL
7209/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7210 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7211 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7212 vect_gen_perm_mask_checked. */
a1e53f3f 7213
3fcc1b55 7214tree
4aae3cb3 7215vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7216{
b00cb3bf 7217 tree mask_type;
a1e53f3f 7218
0ecc2b7d
RS
7219 poly_uint64 nunits = sel.length ();
7220 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7221
7222 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7223 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7224}
7225
7ac7e286 7226/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7227 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7228
7229tree
4aae3cb3 7230vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7231{
7ac7e286 7232 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7233 return vect_gen_perm_mask_any (vectype, sel);
7234}
7235
aec7ae7d
JJ
7236/* Given a vector variable X and Y, that was generated for the scalar
7237 STMT, generate instructions to permute the vector elements of X and Y
7238 using permutation mask MASK_VEC, insert them at *GSI and return the
7239 permuted vector variable. */
a1e53f3f
L
7240
7241static tree
355fe088 7242permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7243 gimple_stmt_iterator *gsi)
a1e53f3f
L
7244{
7245 tree vectype = TREE_TYPE (x);
aec7ae7d 7246 tree perm_dest, data_ref;
355fe088 7247 gimple *perm_stmt;
a1e53f3f 7248
7ad429a4
RS
7249 tree scalar_dest = gimple_get_lhs (stmt);
7250 if (TREE_CODE (scalar_dest) == SSA_NAME)
7251 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7252 else
7253 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7254 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7255
7256 /* Generate the permute statement. */
0d0e4a03 7257 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7258 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7259
7260 return data_ref;
7261}
7262
6b916b36
RB
7263/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7264 inserting them on the loops preheader edge. Returns true if we
7265 were successful in doing so (and thus STMT can be moved then),
7266 otherwise returns false. */
7267
7268static bool
355fe088 7269hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7270{
7271 ssa_op_iter i;
7272 tree op;
7273 bool any = false;
7274
7275 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7276 {
355fe088 7277 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7278 if (!gimple_nop_p (def_stmt)
7279 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7280 {
7281 /* Make sure we don't need to recurse. While we could do
7282 so in simple cases when there are more complex use webs
7283 we don't have an easy way to preserve stmt order to fulfil
7284 dependencies within them. */
7285 tree op2;
7286 ssa_op_iter i2;
d1417442
JJ
7287 if (gimple_code (def_stmt) == GIMPLE_PHI)
7288 return false;
6b916b36
RB
7289 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7290 {
355fe088 7291 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7292 if (!gimple_nop_p (def_stmt2)
7293 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7294 return false;
7295 }
7296 any = true;
7297 }
7298 }
7299
7300 if (!any)
7301 return true;
7302
7303 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7304 {
355fe088 7305 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7306 if (!gimple_nop_p (def_stmt)
7307 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7308 {
7309 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7310 gsi_remove (&gsi, false);
7311 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7312 }
7313 }
7314
7315 return true;
7316}
7317
ebfd146a
IR
7318/* vectorizable_load.
7319
b8698a0f
L
7320 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7321 can be vectorized.
7322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7323 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7325
7326static bool
355fe088 7327vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7328 slp_tree slp_node, slp_instance slp_node_instance,
7329 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7330{
7331 tree scalar_dest;
7332 tree vec_dest = NULL;
7333 tree data_ref = NULL;
7334 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7335 stmt_vec_info prev_stmt_info;
ebfd146a 7336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7337 struct loop *loop = NULL;
ebfd146a 7338 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7339 bool nested_in_vect_loop = false;
c716e67f 7340 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7341 tree elem_type;
ebfd146a 7342 tree new_temp;
ef4bddc2 7343 machine_mode mode;
355fe088 7344 gimple *new_stmt = NULL;
ebfd146a
IR
7345 tree dummy;
7346 enum dr_alignment_support alignment_support_scheme;
7347 tree dataref_ptr = NULL_TREE;
74bf76ed 7348 tree dataref_offset = NULL_TREE;
355fe088 7349 gimple *ptr_incr = NULL;
ebfd146a 7350 int ncopies;
4d694b27
RS
7351 int i, j;
7352 unsigned int group_size;
7353 poly_uint64 group_gap_adj;
ebfd146a
IR
7354 tree msq = NULL_TREE, lsq;
7355 tree offset = NULL_TREE;
356bbc4c 7356 tree byte_offset = NULL_TREE;
ebfd146a 7357 tree realignment_token = NULL_TREE;
538dd0b7 7358 gphi *phi = NULL;
6e1aa848 7359 vec<tree> dr_chain = vNULL;
0d0293ac 7360 bool grouped_load = false;
355fe088 7361 gimple *first_stmt;
4f0a0218 7362 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7363 bool inv_p;
7364 bool compute_in_loop = false;
7365 struct loop *at_loop;
7366 int vec_num;
7367 bool slp = (slp_node != NULL);
7368 bool slp_perm = false;
a70d6342 7369 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7370 poly_uint64 vf;
272c6793 7371 tree aggr_type;
134c85ca 7372 gather_scatter_info gs_info;
310213d4 7373 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7374 tree ref_type;
929b4411 7375 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7376
465c8c19
JJ
7377 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7378 return false;
7379
66c16fd9
RB
7380 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7381 && ! vec_stmt)
465c8c19
JJ
7382 return false;
7383
c3a8f964
RS
7384 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7385 if (is_gimple_assign (stmt))
7386 {
7387 scalar_dest = gimple_assign_lhs (stmt);
7388 if (TREE_CODE (scalar_dest) != SSA_NAME)
7389 return false;
465c8c19 7390
c3a8f964
RS
7391 tree_code code = gimple_assign_rhs_code (stmt);
7392 if (code != ARRAY_REF
7393 && code != BIT_FIELD_REF
7394 && code != INDIRECT_REF
7395 && code != COMPONENT_REF
7396 && code != IMAGPART_EXPR
7397 && code != REALPART_EXPR
7398 && code != MEM_REF
7399 && TREE_CODE_CLASS (code) != tcc_declaration)
7400 return false;
7401 }
7402 else
7403 {
7404 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7405 if (!call || !gimple_call_internal_p (call))
7406 return false;
7407
7408 internal_fn ifn = gimple_call_internal_fn (call);
7409 if (!internal_load_fn_p (ifn))
c3a8f964 7410 return false;
465c8c19 7411
c3a8f964
RS
7412 scalar_dest = gimple_call_lhs (call);
7413 if (!scalar_dest)
7414 return false;
7415
7416 if (slp_node != NULL)
7417 {
7418 if (dump_enabled_p ())
7419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7420 "SLP of masked loads not supported.\n");
7421 return false;
7422 }
7423
bfaa08b7
RS
7424 int mask_index = internal_fn_mask_index (ifn);
7425 if (mask_index >= 0)
7426 {
7427 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7428 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7429 &mask_vectype))
bfaa08b7
RS
7430 return false;
7431 }
c3a8f964 7432 }
465c8c19
JJ
7433
7434 if (!STMT_VINFO_DATA_REF (stmt_info))
7435 return false;
7436
7437 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7438 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7439
a70d6342
IR
7440 if (loop_vinfo)
7441 {
7442 loop = LOOP_VINFO_LOOP (loop_vinfo);
7443 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7444 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7445 }
7446 else
3533e503 7447 vf = 1;
ebfd146a
IR
7448
7449 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7450 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7451 case of SLP. */
fce57248 7452 if (slp)
ebfd146a
IR
7453 ncopies = 1;
7454 else
e8f142e2 7455 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7456
7457 gcc_assert (ncopies >= 1);
7458
7459 /* FORNOW. This restriction should be relaxed. */
7460 if (nested_in_vect_loop && ncopies > 1)
7461 {
73fbfcad 7462 if (dump_enabled_p ())
78c60e3d 7463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7464 "multiple types in nested loop.\n");
ebfd146a
IR
7465 return false;
7466 }
7467
f2556b68
RB
7468 /* Invalidate assumptions made by dependence analysis when vectorization
7469 on the unrolled body effectively re-orders stmts. */
7470 if (ncopies > 1
7471 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7472 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7473 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7474 {
7475 if (dump_enabled_p ())
7476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7477 "cannot perform implicit CSE when unrolling "
7478 "with negative dependence distance\n");
7479 return false;
7480 }
7481
7b7b1813 7482 elem_type = TREE_TYPE (vectype);
947131ba 7483 mode = TYPE_MODE (vectype);
ebfd146a
IR
7484
7485 /* FORNOW. In some cases can vectorize even if data-type not supported
7486 (e.g. - data copies). */
947131ba 7487 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7488 {
73fbfcad 7489 if (dump_enabled_p ())
78c60e3d 7490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7491 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7492 return false;
7493 }
7494
ebfd146a 7495 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7496 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7497 {
0d0293ac 7498 grouped_load = true;
ebfd146a 7499 /* FORNOW */
2de001ee
RS
7500 gcc_assert (!nested_in_vect_loop);
7501 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7502
2c53b149
RB
7503 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7504 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7505
b1af7da6
RB
7506 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7507 slp_perm = true;
7508
f2556b68
RB
7509 /* Invalidate assumptions made by dependence analysis when vectorization
7510 on the unrolled body effectively re-orders stmts. */
7511 if (!PURE_SLP_STMT (stmt_info)
7512 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7513 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7514 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7515 {
7516 if (dump_enabled_p ())
7517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7518 "cannot perform implicit CSE when performing "
7519 "group loads with negative dependence distance\n");
7520 return false;
7521 }
96bb56b2
RB
7522
7523 /* Similarly when the stmt is a load that is both part of a SLP
7524 instance and a loop vectorized stmt via the same-dr mechanism
7525 we have to give up. */
2c53b149 7526 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2
RB
7527 && (STMT_SLP_TYPE (stmt_info)
7528 != STMT_SLP_TYPE (vinfo_for_stmt
2c53b149 7529 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
96bb56b2
RB
7530 {
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7533 "conflicting SLP types for CSEd load\n");
7534 return false;
7535 }
ebfd146a 7536 }
7cfb4d93
RS
7537 else
7538 group_size = 1;
ebfd146a 7539
2de001ee 7540 vect_memory_access_type memory_access_type;
7e11fc7f 7541 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7542 &memory_access_type, &gs_info))
7543 return false;
a1e53f3f 7544
c3a8f964
RS
7545 if (mask)
7546 {
7547 if (memory_access_type == VMAT_CONTIGUOUS)
7548 {
7e11fc7f
RS
7549 machine_mode vec_mode = TYPE_MODE (vectype);
7550 if (!VECTOR_MODE_P (vec_mode)
7551 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7552 TYPE_MODE (mask_vectype), true))
7553 return false;
7554 }
bfaa08b7 7555 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7556 {
7557 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7558 tree masktype
7559 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7560 if (TREE_CODE (masktype) == INTEGER_TYPE)
7561 {
7562 if (dump_enabled_p ())
7563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7564 "masked gather with integer mask not"
7565 " supported.");
7566 return false;
7567 }
7568 }
bfaa08b7
RS
7569 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7570 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7571 {
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "unsupported access type for masked load.\n");
7575 return false;
7576 }
7577 }
7578
ebfd146a
IR
7579 if (!vec_stmt) /* transformation not required. */
7580 {
2de001ee
RS
7581 if (!slp)
7582 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7583
7584 if (loop_vinfo
7585 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7586 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7587 memory_access_type, &gs_info);
7cfb4d93 7588
ebfd146a 7589 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7590 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7591 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7592 return true;
7593 }
7594
2de001ee
RS
7595 if (!slp)
7596 gcc_assert (memory_access_type
7597 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7598
73fbfcad 7599 if (dump_enabled_p ())
78c60e3d 7600 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7601 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7602
67b8dbac 7603 /* Transform. */
ebfd146a 7604
f702e7d4 7605 ensure_base_align (dr);
c716e67f 7606
bfaa08b7 7607 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7608 {
929b4411
RS
7609 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7610 mask_dt);
aec7ae7d
JJ
7611 return true;
7612 }
2de001ee
RS
7613
7614 if (memory_access_type == VMAT_ELEMENTWISE
7615 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7616 {
7617 gimple_stmt_iterator incr_gsi;
7618 bool insert_after;
355fe088 7619 gimple *incr;
7d75abc8 7620 tree offvar;
7d75abc8
MM
7621 tree ivstep;
7622 tree running_off;
9771b263 7623 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7624 tree stride_base, stride_step, alias_off;
4d694b27
RS
7625 /* Checked by get_load_store_type. */
7626 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7627 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7628
7cfb4d93 7629 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7630 gcc_assert (!nested_in_vect_loop);
7d75abc8 7631
b210f45f 7632 if (grouped_load)
44fc7854 7633 {
2c53b149 7634 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7635 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7636 }
ab313a8c 7637 else
44fc7854
BE
7638 {
7639 first_stmt = stmt;
7640 first_dr = dr;
b210f45f
RB
7641 }
7642 if (slp && grouped_load)
7643 {
2c53b149 7644 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
b210f45f
RB
7645 ref_type = get_group_alias_ptr_type (first_stmt);
7646 }
7647 else
7648 {
7649 if (grouped_load)
7650 cst_offset
7651 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7652 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7653 group_size = 1;
b210f45f 7654 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7655 }
ab313a8c 7656
14ac6aa2
RB
7657 stride_base
7658 = fold_build_pointer_plus
ab313a8c 7659 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7660 size_binop (PLUS_EXPR,
ab313a8c
RB
7661 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7662 convert_to_ptrofftype (DR_INIT (first_dr))));
7663 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7664
7665 /* For a load with loop-invariant (but other than power-of-2)
7666 stride (i.e. not a grouped access) like so:
7667
7668 for (i = 0; i < n; i += stride)
7669 ... = array[i];
7670
7671 we generate a new induction variable and new accesses to
7672 form a new vector (or vectors, depending on ncopies):
7673
7674 for (j = 0; ; j += VF*stride)
7675 tmp1 = array[j];
7676 tmp2 = array[j + stride];
7677 ...
7678 vectemp = {tmp1, tmp2, ...}
7679 */
7680
ab313a8c
RB
7681 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7682 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7683
7684 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7685
b210f45f
RB
7686 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7687 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7688 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7689 loop, &incr_gsi, insert_after,
7690 &offvar, NULL);
7691 incr = gsi_stmt (incr_gsi);
310213d4 7692 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7693
b210f45f 7694 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7695
7696 prev_stmt_info = NULL;
7697 running_off = offvar;
44fc7854 7698 alias_off = build_int_cst (ref_type, 0);
4d694b27 7699 int nloads = const_nunits;
e09b4c37 7700 int lnel = 1;
7b5fc413 7701 tree ltype = TREE_TYPE (vectype);
ea60dd34 7702 tree lvectype = vectype;
b266b968 7703 auto_vec<tree> dr_chain;
2de001ee 7704 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7705 {
4d694b27 7706 if (group_size < const_nunits)
e09b4c37 7707 {
ff03930a
JJ
7708 /* First check if vec_init optab supports construction from
7709 vector elts directly. */
b397965c 7710 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7711 machine_mode vmode;
7712 if (mode_for_vector (elmode, group_size).exists (&vmode)
7713 && VECTOR_MODE_P (vmode)
414fef4e 7714 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7715 && (convert_optab_handler (vec_init_optab,
7716 TYPE_MODE (vectype), vmode)
7717 != CODE_FOR_nothing))
ea60dd34 7718 {
4d694b27 7719 nloads = const_nunits / group_size;
ea60dd34 7720 lnel = group_size;
ff03930a
JJ
7721 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7722 }
7723 else
7724 {
7725 /* Otherwise avoid emitting a constructor of vector elements
7726 by performing the loads using an integer type of the same
7727 size, constructing a vector of those and then
7728 re-interpreting it as the original vector type.
7729 This avoids a huge runtime penalty due to the general
7730 inability to perform store forwarding from smaller stores
7731 to a larger load. */
7732 unsigned lsize
7733 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7734 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7735 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7736 /* If we can't construct such a vector fall back to
7737 element loads of the original vector type. */
4d694b27 7738 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7739 && VECTOR_MODE_P (vmode)
414fef4e 7740 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7741 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7742 != CODE_FOR_nothing))
7743 {
4d694b27 7744 nloads = lnunits;
ff03930a
JJ
7745 lnel = group_size;
7746 ltype = build_nonstandard_integer_type (lsize, 1);
7747 lvectype = build_vector_type (ltype, nloads);
7748 }
ea60dd34 7749 }
e09b4c37 7750 }
2de001ee 7751 else
e09b4c37 7752 {
ea60dd34 7753 nloads = 1;
4d694b27 7754 lnel = const_nunits;
e09b4c37 7755 ltype = vectype;
e09b4c37 7756 }
2de001ee
RS
7757 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7758 }
bb4e4747
BC
7759 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7760 else if (nloads == 1)
7761 ltype = vectype;
7762
2de001ee
RS
7763 if (slp)
7764 {
66c16fd9
RB
7765 /* For SLP permutation support we need to load the whole group,
7766 not only the number of vector stmts the permutation result
7767 fits in. */
b266b968 7768 if (slp_perm)
66c16fd9 7769 {
d9f21f6a
RS
7770 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7771 variable VF. */
7772 unsigned int const_vf = vf.to_constant ();
4d694b27 7773 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7774 dr_chain.create (ncopies);
7775 }
7776 else
7777 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7778 }
4d694b27 7779 unsigned int group_el = 0;
e09b4c37
RB
7780 unsigned HOST_WIDE_INT
7781 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7782 for (j = 0; j < ncopies; j++)
7783 {
7b5fc413 7784 if (nloads > 1)
e09b4c37
RB
7785 vec_alloc (v, nloads);
7786 for (i = 0; i < nloads; i++)
7b5fc413 7787 {
e09b4c37 7788 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7789 group_el * elsz + cst_offset);
19986382
RB
7790 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7791 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7792 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7794 if (nloads > 1)
7795 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7796 gimple_assign_lhs (new_stmt));
7797
7798 group_el += lnel;
7799 if (! slp
7800 || group_el == group_size)
7b5fc413 7801 {
e09b4c37
RB
7802 tree newoff = copy_ssa_name (running_off);
7803 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7804 running_off, stride_step);
7b5fc413
RB
7805 vect_finish_stmt_generation (stmt, incr, gsi);
7806
7807 running_off = newoff;
e09b4c37 7808 group_el = 0;
7b5fc413 7809 }
7b5fc413 7810 }
e09b4c37 7811 if (nloads > 1)
7d75abc8 7812 {
ea60dd34
RB
7813 tree vec_inv = build_constructor (lvectype, v);
7814 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7815 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7816 if (lvectype != vectype)
7817 {
7818 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7819 VIEW_CONVERT_EXPR,
7820 build1 (VIEW_CONVERT_EXPR,
7821 vectype, new_temp));
7822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7823 }
7d75abc8
MM
7824 }
7825
7b5fc413 7826 if (slp)
b266b968 7827 {
b266b968
RB
7828 if (slp_perm)
7829 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7830 else
7831 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7832 }
7d75abc8 7833 else
225ce44b
RB
7834 {
7835 if (j == 0)
7836 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7837 else
7838 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7839 prev_stmt_info = vinfo_for_stmt (new_stmt);
7840 }
7d75abc8 7841 }
b266b968 7842 if (slp_perm)
29afecdf
RB
7843 {
7844 unsigned n_perms;
7845 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7846 slp_node_instance, false, &n_perms);
7847 }
7d75abc8
MM
7848 return true;
7849 }
aec7ae7d 7850
b5ec4de7
RS
7851 if (memory_access_type == VMAT_GATHER_SCATTER
7852 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7853 grouped_load = false;
7854
0d0293ac 7855 if (grouped_load)
ebfd146a 7856 {
2c53b149
RB
7857 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7858 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7859 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7860 without permutation. */
7861 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7862 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7863 /* For BB vectorization always use the first stmt to base
7864 the data ref pointer on. */
7865 if (bb_vinfo)
7866 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7867
ebfd146a 7868 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7869 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7870 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7871 ??? But we can only do so if there is exactly one
7872 as we have no way to get at the rest. Leave the CSE
7873 opportunity alone.
7874 ??? With the group load eventually participating
7875 in multiple different permutations (having multiple
7876 slp nodes which refer to the same group) the CSE
7877 is even wrong code. See PR56270. */
7878 && !slp)
ebfd146a
IR
7879 {
7880 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7881 return true;
7882 }
7883 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7884 group_gap_adj = 0;
ebfd146a
IR
7885
7886 /* VEC_NUM is the number of vect stmts to be created for this group. */
7887 if (slp)
7888 {
0d0293ac 7889 grouped_load = false;
91ff1504
RB
7890 /* For SLP permutation support we need to load the whole group,
7891 not only the number of vector stmts the permutation result
7892 fits in. */
7893 if (slp_perm)
b267968e 7894 {
d9f21f6a
RS
7895 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7896 variable VF. */
7897 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7898 unsigned int const_nunits = nunits.to_constant ();
7899 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7900 group_gap_adj = vf * group_size - nunits * vec_num;
7901 }
91ff1504 7902 else
b267968e
RB
7903 {
7904 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7905 group_gap_adj
7906 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7907 }
a70d6342 7908 }
ebfd146a 7909 else
9b999e8c 7910 vec_num = group_size;
44fc7854
BE
7911
7912 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7913 }
7914 else
7915 {
7916 first_stmt = stmt;
7917 first_dr = dr;
7918 group_size = vec_num = 1;
9b999e8c 7919 group_gap_adj = 0;
44fc7854 7920 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7921 }
7922
720f5239 7923 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7924 gcc_assert (alignment_support_scheme);
70088b95
RS
7925 vec_loop_masks *loop_masks
7926 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7927 ? &LOOP_VINFO_MASKS (loop_vinfo)
7928 : NULL);
7cfb4d93
RS
7929 /* Targets with store-lane instructions must not require explicit
7930 realignment. vect_supportable_dr_alignment always returns either
7931 dr_aligned or dr_unaligned_supported for masked operations. */
7932 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7933 && !mask
70088b95 7934 && !loop_masks)
272c6793
RS
7935 || alignment_support_scheme == dr_aligned
7936 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7937
7938 /* In case the vectorization factor (VF) is bigger than the number
7939 of elements that we can fit in a vectype (nunits), we have to generate
7940 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7941 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7942 from one copy of the vector stmt to the next, in the field
ff802fa1 7943 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7944 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7945 stmts that use the defs of the current stmt. The example below
7946 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7947 need to create 4 vectorized stmts):
ebfd146a
IR
7948
7949 before vectorization:
7950 RELATED_STMT VEC_STMT
7951 S1: x = memref - -
7952 S2: z = x + 1 - -
7953
7954 step 1: vectorize stmt S1:
7955 We first create the vector stmt VS1_0, and, as usual, record a
7956 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7957 Next, we create the vector stmt VS1_1, and record a pointer to
7958 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7959 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7960 stmts and pointers:
7961 RELATED_STMT VEC_STMT
7962 VS1_0: vx0 = memref0 VS1_1 -
7963 VS1_1: vx1 = memref1 VS1_2 -
7964 VS1_2: vx2 = memref2 VS1_3 -
7965 VS1_3: vx3 = memref3 - -
7966 S1: x = load - VS1_0
7967 S2: z = x + 1 - -
7968
b8698a0f
L
7969 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7970 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7971 stmt S2. */
7972
0d0293ac 7973 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7974
7975 S1: x2 = &base + 2
7976 S2: x0 = &base
7977 S3: x1 = &base + 1
7978 S4: x3 = &base + 3
7979
b8698a0f 7980 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7981 starting from the access of the first stmt of the chain:
7982
7983 VS1: vx0 = &base
7984 VS2: vx1 = &base + vec_size*1
7985 VS3: vx3 = &base + vec_size*2
7986 VS4: vx4 = &base + vec_size*3
7987
7988 Then permutation statements are generated:
7989
e2c83630
RH
7990 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7991 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7992 ...
7993
7994 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7995 (the order of the data-refs in the output of vect_permute_load_chain
7996 corresponds to the order of scalar stmts in the interleaving chain - see
7997 the documentation of vect_permute_load_chain()).
7998 The generation of permutation stmts and recording them in
0d0293ac 7999 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8000
b8698a0f 8001 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8002 permutation stmts above are created for every copy. The result vector
8003 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8004 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8005
8006 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8007 on a target that supports unaligned accesses (dr_unaligned_supported)
8008 we generate the following code:
8009 p = initial_addr;
8010 indx = 0;
8011 loop {
8012 p = p + indx * vectype_size;
8013 vec_dest = *(p);
8014 indx = indx + 1;
8015 }
8016
8017 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8018 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8019 then generate the following code, in which the data in each iteration is
8020 obtained by two vector loads, one from the previous iteration, and one
8021 from the current iteration:
8022 p1 = initial_addr;
8023 msq_init = *(floor(p1))
8024 p2 = initial_addr + VS - 1;
8025 realignment_token = call target_builtin;
8026 indx = 0;
8027 loop {
8028 p2 = p2 + indx * vectype_size
8029 lsq = *(floor(p2))
8030 vec_dest = realign_load (msq, lsq, realignment_token)
8031 indx = indx + 1;
8032 msq = lsq;
8033 } */
8034
8035 /* If the misalignment remains the same throughout the execution of the
8036 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8037 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8038 This can only occur when vectorizing memory accesses in the inner-loop
8039 nested within an outer-loop that is being vectorized. */
8040
d1e4b493 8041 if (nested_in_vect_loop
cf098191
RS
8042 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8043 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8044 {
8045 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8046 compute_in_loop = true;
8047 }
8048
8049 if ((alignment_support_scheme == dr_explicit_realign_optimized
8050 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8051 && !compute_in_loop)
ebfd146a
IR
8052 {
8053 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8054 alignment_support_scheme, NULL_TREE,
8055 &at_loop);
8056 if (alignment_support_scheme == dr_explicit_realign_optimized)
8057 {
538dd0b7 8058 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8059 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8060 size_one_node);
ebfd146a
IR
8061 }
8062 }
8063 else
8064 at_loop = loop;
8065
62da9e14 8066 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8067 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8068
ab2fc782
RS
8069 tree bump;
8070 tree vec_offset = NULL_TREE;
8071 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8072 {
8073 aggr_type = NULL_TREE;
8074 bump = NULL_TREE;
8075 }
8076 else if (memory_access_type == VMAT_GATHER_SCATTER)
8077 {
8078 aggr_type = elem_type;
8079 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8080 &bump, &vec_offset);
8081 }
272c6793 8082 else
ab2fc782
RS
8083 {
8084 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8085 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8086 else
8087 aggr_type = vectype;
8088 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8089 }
272c6793 8090
c3a8f964 8091 tree vec_mask = NULL_TREE;
ebfd146a 8092 prev_stmt_info = NULL;
4d694b27 8093 poly_uint64 group_elt = 0;
ebfd146a 8094 for (j = 0; j < ncopies; j++)
b8698a0f 8095 {
272c6793 8096 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8097 if (j == 0)
74bf76ed
JJ
8098 {
8099 bool simd_lane_access_p
8100 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8101 if (simd_lane_access_p
8102 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8103 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8104 && integer_zerop (DR_OFFSET (first_dr))
8105 && integer_zerop (DR_INIT (first_dr))
8106 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8107 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8108 && (alignment_support_scheme == dr_aligned
8109 || alignment_support_scheme == dr_unaligned_supported))
8110 {
8111 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8112 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8113 inv_p = false;
74bf76ed 8114 }
4f0a0218
RB
8115 else if (first_stmt_for_drptr
8116 && first_stmt != first_stmt_for_drptr)
8117 {
8118 dataref_ptr
8119 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8120 at_loop, offset, &dummy, gsi,
8121 &ptr_incr, simd_lane_access_p,
ab2fc782 8122 &inv_p, byte_offset, bump);
4f0a0218
RB
8123 /* Adjust the pointer by the difference to first_stmt. */
8124 data_reference_p ptrdr
8125 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8126 tree diff = fold_convert (sizetype,
8127 size_binop (MINUS_EXPR,
8128 DR_INIT (first_dr),
8129 DR_INIT (ptrdr)));
8130 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8131 stmt, diff);
8132 }
bfaa08b7
RS
8133 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8134 {
8135 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8136 &dataref_ptr, &vec_offset);
8137 inv_p = false;
8138 }
74bf76ed
JJ
8139 else
8140 dataref_ptr
8141 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8142 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8143 simd_lane_access_p, &inv_p,
ab2fc782 8144 byte_offset, bump);
c3a8f964
RS
8145 if (mask)
8146 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8147 mask_vectype);
74bf76ed 8148 }
ebfd146a 8149 else
c3a8f964
RS
8150 {
8151 if (dataref_offset)
8152 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8153 bump);
bfaa08b7 8154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8155 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8156 vec_offset);
c3a8f964 8157 else
ab2fc782
RS
8158 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8159 stmt, bump);
c3a8f964 8160 if (mask)
929b4411 8161 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8162 }
ebfd146a 8163
0d0293ac 8164 if (grouped_load || slp_perm)
9771b263 8165 dr_chain.create (vec_num);
5ce1ee7f 8166
2de001ee 8167 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8168 {
272c6793
RS
8169 tree vec_array;
8170
8171 vec_array = create_vector_array (vectype, vec_num);
8172
7cfb4d93 8173 tree final_mask = NULL_TREE;
70088b95
RS
8174 if (loop_masks)
8175 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8176 vectype, j);
7cfb4d93
RS
8177 if (vec_mask)
8178 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8179 vec_mask, gsi);
8180
7e11fc7f 8181 gcall *call;
7cfb4d93 8182 if (final_mask)
7e11fc7f
RS
8183 {
8184 /* Emit:
8185 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8186 VEC_MASK). */
8187 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8188 tree alias_ptr = build_int_cst (ref_type, align);
8189 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8190 dataref_ptr, alias_ptr,
7cfb4d93 8191 final_mask);
7e11fc7f
RS
8192 }
8193 else
8194 {
8195 /* Emit:
8196 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8197 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8198 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8199 }
a844293d
RS
8200 gimple_call_set_lhs (call, vec_array);
8201 gimple_call_set_nothrow (call, true);
8202 new_stmt = call;
272c6793 8203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8204
272c6793
RS
8205 /* Extract each vector into an SSA_NAME. */
8206 for (i = 0; i < vec_num; i++)
ebfd146a 8207 {
272c6793
RS
8208 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8209 vec_array, i);
9771b263 8210 dr_chain.quick_push (new_temp);
272c6793
RS
8211 }
8212
8213 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8214 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8215
8216 /* Record that VEC_ARRAY is now dead. */
8217 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8218 }
8219 else
8220 {
8221 for (i = 0; i < vec_num; i++)
8222 {
7cfb4d93 8223 tree final_mask = NULL_TREE;
70088b95 8224 if (loop_masks
7cfb4d93 8225 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8226 final_mask = vect_get_loop_mask (gsi, loop_masks,
8227 vec_num * ncopies,
7cfb4d93
RS
8228 vectype, vec_num * j + i);
8229 if (vec_mask)
8230 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8231 vec_mask, gsi);
8232
272c6793
RS
8233 if (i > 0)
8234 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8235 stmt, bump);
272c6793
RS
8236
8237 /* 2. Create the vector-load in the loop. */
8238 switch (alignment_support_scheme)
8239 {
8240 case dr_aligned:
8241 case dr_unaligned_supported:
be1ac4ec 8242 {
644ffefd
MJ
8243 unsigned int align, misalign;
8244
bfaa08b7
RS
8245 if (memory_access_type == VMAT_GATHER_SCATTER)
8246 {
8247 tree scale = size_int (gs_info.scale);
8248 gcall *call;
70088b95 8249 if (loop_masks)
bfaa08b7
RS
8250 call = gimple_build_call_internal
8251 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8252 vec_offset, scale, final_mask);
8253 else
8254 call = gimple_build_call_internal
8255 (IFN_GATHER_LOAD, 3, dataref_ptr,
8256 vec_offset, scale);
8257 gimple_call_set_nothrow (call, true);
8258 new_stmt = call;
8259 data_ref = NULL_TREE;
8260 break;
8261 }
8262
f702e7d4 8263 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8264 if (alignment_support_scheme == dr_aligned)
8265 {
8266 gcc_assert (aligned_access_p (first_dr));
644ffefd 8267 misalign = 0;
272c6793
RS
8268 }
8269 else if (DR_MISALIGNMENT (first_dr) == -1)
8270 {
25f68d90 8271 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8272 misalign = 0;
272c6793
RS
8273 }
8274 else
c3a8f964 8275 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8276 if (dataref_offset == NULL_TREE
8277 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8278 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8279 align, misalign);
c3a8f964 8280
7cfb4d93 8281 if (final_mask)
c3a8f964
RS
8282 {
8283 align = least_bit_hwi (misalign | align);
8284 tree ptr = build_int_cst (ref_type, align);
8285 gcall *call
8286 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8287 dataref_ptr, ptr,
7cfb4d93 8288 final_mask);
c3a8f964
RS
8289 gimple_call_set_nothrow (call, true);
8290 new_stmt = call;
8291 data_ref = NULL_TREE;
8292 }
8293 else
8294 {
8295 data_ref
8296 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8297 dataref_offset
8298 ? dataref_offset
8299 : build_int_cst (ref_type, 0));
8300 if (alignment_support_scheme == dr_aligned)
8301 ;
8302 else if (DR_MISALIGNMENT (first_dr) == -1)
8303 TREE_TYPE (data_ref)
8304 = build_aligned_type (TREE_TYPE (data_ref),
8305 align * BITS_PER_UNIT);
8306 else
8307 TREE_TYPE (data_ref)
8308 = build_aligned_type (TREE_TYPE (data_ref),
8309 TYPE_ALIGN (elem_type));
8310 }
272c6793 8311 break;
be1ac4ec 8312 }
272c6793 8313 case dr_explicit_realign:
267d3070 8314 {
272c6793 8315 tree ptr, bump;
272c6793 8316
d88981fc 8317 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8318
8319 if (compute_in_loop)
8320 msq = vect_setup_realignment (first_stmt, gsi,
8321 &realignment_token,
8322 dr_explicit_realign,
8323 dataref_ptr, NULL);
8324
aed93b23
RB
8325 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8326 ptr = copy_ssa_name (dataref_ptr);
8327 else
8328 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8329 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8330 new_stmt = gimple_build_assign
8331 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8332 build_int_cst
8333 (TREE_TYPE (dataref_ptr),
f702e7d4 8334 -(HOST_WIDE_INT) align));
272c6793
RS
8335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8336 data_ref
8337 = build2 (MEM_REF, vectype, ptr,
44fc7854 8338 build_int_cst (ref_type, 0));
19986382 8339 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8340 vec_dest = vect_create_destination_var (scalar_dest,
8341 vectype);
8342 new_stmt = gimple_build_assign (vec_dest, data_ref);
8343 new_temp = make_ssa_name (vec_dest, new_stmt);
8344 gimple_assign_set_lhs (new_stmt, new_temp);
8345 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8346 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8347 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8348 msq = new_temp;
8349
d88981fc 8350 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8351 TYPE_SIZE_UNIT (elem_type));
d88981fc 8352 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8353 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8354 new_stmt = gimple_build_assign
8355 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8356 build_int_cst
f702e7d4 8357 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8358 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8359 gimple_assign_set_lhs (new_stmt, ptr);
8360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8361 data_ref
8362 = build2 (MEM_REF, vectype, ptr,
44fc7854 8363 build_int_cst (ref_type, 0));
272c6793 8364 break;
267d3070 8365 }
272c6793 8366 case dr_explicit_realign_optimized:
f702e7d4
RS
8367 {
8368 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8369 new_temp = copy_ssa_name (dataref_ptr);
8370 else
8371 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8372 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8373 new_stmt = gimple_build_assign
8374 (new_temp, BIT_AND_EXPR, dataref_ptr,
8375 build_int_cst (TREE_TYPE (dataref_ptr),
8376 -(HOST_WIDE_INT) align));
8377 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8378 data_ref
8379 = build2 (MEM_REF, vectype, new_temp,
8380 build_int_cst (ref_type, 0));
8381 break;
8382 }
272c6793
RS
8383 default:
8384 gcc_unreachable ();
8385 }
ebfd146a 8386 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8387 /* DATA_REF is null if we've already built the statement. */
8388 if (data_ref)
19986382
RB
8389 {
8390 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8391 new_stmt = gimple_build_assign (vec_dest, data_ref);
8392 }
ebfd146a 8393 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8394 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8396
272c6793
RS
8397 /* 3. Handle explicit realignment if necessary/supported.
8398 Create in loop:
8399 vec_dest = realign_load (msq, lsq, realignment_token) */
8400 if (alignment_support_scheme == dr_explicit_realign_optimized
8401 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8402 {
272c6793
RS
8403 lsq = gimple_assign_lhs (new_stmt);
8404 if (!realignment_token)
8405 realignment_token = dataref_ptr;
8406 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8407 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8408 msq, lsq, realignment_token);
272c6793
RS
8409 new_temp = make_ssa_name (vec_dest, new_stmt);
8410 gimple_assign_set_lhs (new_stmt, new_temp);
8411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8412
8413 if (alignment_support_scheme == dr_explicit_realign_optimized)
8414 {
8415 gcc_assert (phi);
8416 if (i == vec_num - 1 && j == ncopies - 1)
8417 add_phi_arg (phi, lsq,
8418 loop_latch_edge (containing_loop),
9e227d60 8419 UNKNOWN_LOCATION);
272c6793
RS
8420 msq = lsq;
8421 }
ebfd146a 8422 }
ebfd146a 8423
59fd17e3
RB
8424 /* 4. Handle invariant-load. */
8425 if (inv_p && !bb_vinfo)
8426 {
59fd17e3 8427 gcc_assert (!grouped_load);
d1417442
JJ
8428 /* If we have versioned for aliasing or the loop doesn't
8429 have any data dependencies that would preclude this,
8430 then we are sure this is a loop invariant load and
8431 thus we can insert it on the preheader edge. */
8432 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8433 && !nested_in_vect_loop
6b916b36 8434 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8435 {
8436 if (dump_enabled_p ())
8437 {
8438 dump_printf_loc (MSG_NOTE, vect_location,
8439 "hoisting out of the vectorized "
8440 "loop: ");
8441 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8442 }
b731b390 8443 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8444 gsi_insert_on_edge_immediate
8445 (loop_preheader_edge (loop),
8446 gimple_build_assign (tem,
8447 unshare_expr
8448 (gimple_assign_rhs1 (stmt))));
8449 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8450 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8451 set_vinfo_for_stmt (new_stmt,
8452 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8453 }
8454 else
8455 {
8456 gimple_stmt_iterator gsi2 = *gsi;
8457 gsi_next (&gsi2);
8458 new_temp = vect_init_vector (stmt, scalar_dest,
8459 vectype, &gsi2);
34cd48e5 8460 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8461 }
59fd17e3
RB
8462 }
8463
62da9e14 8464 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8465 {
aec7ae7d
JJ
8466 tree perm_mask = perm_mask_for_reverse (vectype);
8467 new_temp = permute_vec_elements (new_temp, new_temp,
8468 perm_mask, stmt, gsi);
ebfd146a
IR
8469 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8470 }
267d3070 8471
272c6793 8472 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8473 vect_transform_grouped_load (). */
8474 if (grouped_load || slp_perm)
9771b263 8475 dr_chain.quick_push (new_temp);
267d3070 8476
272c6793
RS
8477 /* Store vector loads in the corresponding SLP_NODE. */
8478 if (slp && !slp_perm)
9771b263 8479 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8480
8481 /* With SLP permutation we load the gaps as well, without
8482 we need to skip the gaps after we manage to fully load
2c53b149 8483 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8484 group_elt += nunits;
d9f21f6a
RS
8485 if (maybe_ne (group_gap_adj, 0U)
8486 && !slp_perm
8487 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8488 {
d9f21f6a
RS
8489 poly_wide_int bump_val
8490 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8491 * group_gap_adj);
8e6cdc90 8492 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8493 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8494 stmt, bump);
8495 group_elt = 0;
8496 }
272c6793 8497 }
9b999e8c
RB
8498 /* Bump the vector pointer to account for a gap or for excess
8499 elements loaded for a permuted SLP load. */
d9f21f6a 8500 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8501 {
d9f21f6a
RS
8502 poly_wide_int bump_val
8503 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8504 * group_gap_adj);
8e6cdc90 8505 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8506 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8507 stmt, bump);
8508 }
ebfd146a
IR
8509 }
8510
8511 if (slp && !slp_perm)
8512 continue;
8513
8514 if (slp_perm)
8515 {
29afecdf 8516 unsigned n_perms;
01d8bf07 8517 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8518 slp_node_instance, false,
8519 &n_perms))
ebfd146a 8520 {
9771b263 8521 dr_chain.release ();
ebfd146a
IR
8522 return false;
8523 }
8524 }
8525 else
8526 {
0d0293ac 8527 if (grouped_load)
ebfd146a 8528 {
2de001ee 8529 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8530 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8531 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8532 }
8533 else
8534 {
8535 if (j == 0)
8536 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8537 else
8538 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8539 prev_stmt_info = vinfo_for_stmt (new_stmt);
8540 }
8541 }
9771b263 8542 dr_chain.release ();
ebfd146a
IR
8543 }
8544
ebfd146a
IR
8545 return true;
8546}
8547
8548/* Function vect_is_simple_cond.
b8698a0f 8549
ebfd146a
IR
8550 Input:
8551 LOOP - the loop that is being vectorized.
8552 COND - Condition that is checked for simple use.
8553
e9e1d143
RG
8554 Output:
8555 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8556 *DTS - The def types for the arguments of the comparison
e9e1d143 8557
ebfd146a
IR
8558 Returns whether a COND can be vectorized. Checks whether
8559 condition operands are supportable using vec_is_simple_use. */
8560
87aab9b2 8561static bool
4fc5ebf1 8562vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8563 tree *comp_vectype, enum vect_def_type *dts,
8564 tree vectype)
ebfd146a
IR
8565{
8566 tree lhs, rhs;
e9e1d143 8567 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8568
a414c77f
IE
8569 /* Mask case. */
8570 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8571 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8572 {
894dd753 8573 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8574 || !*comp_vectype
8575 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8576 return false;
8577 return true;
8578 }
8579
ebfd146a
IR
8580 if (!COMPARISON_CLASS_P (cond))
8581 return false;
8582
8583 lhs = TREE_OPERAND (cond, 0);
8584 rhs = TREE_OPERAND (cond, 1);
8585
8586 if (TREE_CODE (lhs) == SSA_NAME)
8587 {
894dd753 8588 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8589 return false;
8590 }
4fc5ebf1
JG
8591 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8592 || TREE_CODE (lhs) == FIXED_CST)
8593 dts[0] = vect_constant_def;
8594 else
ebfd146a
IR
8595 return false;
8596
8597 if (TREE_CODE (rhs) == SSA_NAME)
8598 {
894dd753 8599 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8600 return false;
8601 }
4fc5ebf1
JG
8602 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8603 || TREE_CODE (rhs) == FIXED_CST)
8604 dts[1] = vect_constant_def;
8605 else
ebfd146a
IR
8606 return false;
8607
28b33016 8608 if (vectype1 && vectype2
928686b1
RS
8609 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8610 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8611 return false;
8612
e9e1d143 8613 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8614 /* Invariant comparison. */
4515e413 8615 if (! *comp_vectype && vectype)
8da4c8d8
RB
8616 {
8617 tree scalar_type = TREE_TYPE (lhs);
8618 /* If we can widen the comparison to match vectype do so. */
8619 if (INTEGRAL_TYPE_P (scalar_type)
8620 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8621 TYPE_SIZE (TREE_TYPE (vectype))))
8622 scalar_type = build_nonstandard_integer_type
8623 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8624 TYPE_UNSIGNED (scalar_type));
8625 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8626 }
8627
ebfd146a
IR
8628 return true;
8629}
8630
8631/* vectorizable_condition.
8632
b8698a0f
L
8633 Check if STMT is conditional modify expression that can be vectorized.
8634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8635 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8636 at GSI.
8637
8638 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8639 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8640 else clause if it is 2).
ebfd146a
IR
8641
8642 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8643
4bbe8262 8644bool
355fe088
TS
8645vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8646 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8647 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8648{
8649 tree scalar_dest = NULL_TREE;
8650 tree vec_dest = NULL_TREE;
01216d27
JJ
8651 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8652 tree then_clause, else_clause;
ebfd146a 8653 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8654 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8655 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8656 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8657 tree vec_compare;
ebfd146a
IR
8658 tree new_temp;
8659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8660 enum vect_def_type dts[4]
8661 = {vect_unknown_def_type, vect_unknown_def_type,
8662 vect_unknown_def_type, vect_unknown_def_type};
8663 int ndts = 4;
f7e531cf 8664 int ncopies;
01216d27 8665 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8666 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8667 int i, j;
8668 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8669 vec<tree> vec_oprnds0 = vNULL;
8670 vec<tree> vec_oprnds1 = vNULL;
8671 vec<tree> vec_oprnds2 = vNULL;
8672 vec<tree> vec_oprnds3 = vNULL;
74946978 8673 tree vec_cmp_type;
a414c77f 8674 bool masked = false;
b8698a0f 8675
f7e531cf
IR
8676 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8677 return false;
8678
bb6c2b68
RS
8679 vect_reduction_type reduction_type
8680 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8681 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8682 {
8683 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8684 return false;
ebfd146a 8685
af29617a
AH
8686 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8687 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8688 && reduc_def))
8689 return false;
ebfd146a 8690
af29617a
AH
8691 /* FORNOW: not yet supported. */
8692 if (STMT_VINFO_LIVE_P (stmt_info))
8693 {
8694 if (dump_enabled_p ())
8695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8696 "value used after loop.\n");
8697 return false;
8698 }
ebfd146a
IR
8699 }
8700
8701 /* Is vectorizable conditional operation? */
8702 if (!is_gimple_assign (stmt))
8703 return false;
8704
8705 code = gimple_assign_rhs_code (stmt);
8706
8707 if (code != COND_EXPR)
8708 return false;
8709
465c8c19 8710 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8711 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8712
fce57248 8713 if (slp_node)
465c8c19
JJ
8714 ncopies = 1;
8715 else
e8f142e2 8716 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8717
8718 gcc_assert (ncopies >= 1);
8719 if (reduc_index && ncopies > 1)
8720 return false; /* FORNOW */
8721
4e71066d
RG
8722 cond_expr = gimple_assign_rhs1 (stmt);
8723 then_clause = gimple_assign_rhs2 (stmt);
8724 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8725
4fc5ebf1 8726 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8727 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8728 || !comp_vectype)
ebfd146a
IR
8729 return false;
8730
894dd753 8731 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8732 return false;
894dd753 8733 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8734 return false;
2947d3b2
IE
8735
8736 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8737 return false;
8738
8739 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8740 return false;
8741
28b33016
IE
8742 masked = !COMPARISON_CLASS_P (cond_expr);
8743 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8744
74946978
MP
8745 if (vec_cmp_type == NULL_TREE)
8746 return false;
784fb9b3 8747
01216d27
JJ
8748 cond_code = TREE_CODE (cond_expr);
8749 if (!masked)
8750 {
8751 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8752 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8753 }
8754
8755 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8756 {
8757 /* Boolean values may have another representation in vectors
8758 and therefore we prefer bit operations over comparison for
8759 them (which also works for scalar masks). We store opcodes
8760 to use in bitop1 and bitop2. Statement is vectorized as
8761 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8762 depending on bitop1 and bitop2 arity. */
8763 switch (cond_code)
8764 {
8765 case GT_EXPR:
8766 bitop1 = BIT_NOT_EXPR;
8767 bitop2 = BIT_AND_EXPR;
8768 break;
8769 case GE_EXPR:
8770 bitop1 = BIT_NOT_EXPR;
8771 bitop2 = BIT_IOR_EXPR;
8772 break;
8773 case LT_EXPR:
8774 bitop1 = BIT_NOT_EXPR;
8775 bitop2 = BIT_AND_EXPR;
8776 std::swap (cond_expr0, cond_expr1);
8777 break;
8778 case LE_EXPR:
8779 bitop1 = BIT_NOT_EXPR;
8780 bitop2 = BIT_IOR_EXPR;
8781 std::swap (cond_expr0, cond_expr1);
8782 break;
8783 case NE_EXPR:
8784 bitop1 = BIT_XOR_EXPR;
8785 break;
8786 case EQ_EXPR:
8787 bitop1 = BIT_XOR_EXPR;
8788 bitop2 = BIT_NOT_EXPR;
8789 break;
8790 default:
8791 return false;
8792 }
8793 cond_code = SSA_NAME;
8794 }
8795
b8698a0f 8796 if (!vec_stmt)
ebfd146a 8797 {
01216d27
JJ
8798 if (bitop1 != NOP_EXPR)
8799 {
8800 machine_mode mode = TYPE_MODE (comp_vectype);
8801 optab optab;
8802
8803 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8804 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8805 return false;
8806
8807 if (bitop2 != NOP_EXPR)
8808 {
8809 optab = optab_for_tree_code (bitop2, comp_vectype,
8810 optab_default);
8811 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8812 return false;
8813 }
8814 }
4fc5ebf1
JG
8815 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8816 cond_code))
8817 {
68435eb2
RB
8818 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8819 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8820 cost_vec);
4fc5ebf1
JG
8821 return true;
8822 }
8823 return false;
ebfd146a
IR
8824 }
8825
f7e531cf
IR
8826 /* Transform. */
8827
8828 if (!slp_node)
8829 {
9771b263
DN
8830 vec_oprnds0.create (1);
8831 vec_oprnds1.create (1);
8832 vec_oprnds2.create (1);
8833 vec_oprnds3.create (1);
f7e531cf 8834 }
ebfd146a
IR
8835
8836 /* Handle def. */
8837 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8838 if (reduction_type != EXTRACT_LAST_REDUCTION)
8839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8840
8841 /* Handle cond expr. */
a855b1b1
MM
8842 for (j = 0; j < ncopies; j++)
8843 {
bb6c2b68 8844 gimple *new_stmt = NULL;
a855b1b1
MM
8845 if (j == 0)
8846 {
f7e531cf
IR
8847 if (slp_node)
8848 {
00f96dc9
TS
8849 auto_vec<tree, 4> ops;
8850 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8851
a414c77f 8852 if (masked)
01216d27 8853 ops.safe_push (cond_expr);
a414c77f
IE
8854 else
8855 {
01216d27
JJ
8856 ops.safe_push (cond_expr0);
8857 ops.safe_push (cond_expr1);
a414c77f 8858 }
9771b263
DN
8859 ops.safe_push (then_clause);
8860 ops.safe_push (else_clause);
306b0c92 8861 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8862 vec_oprnds3 = vec_defs.pop ();
8863 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8864 if (!masked)
8865 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8866 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8867 }
8868 else
8869 {
a414c77f
IE
8870 if (masked)
8871 {
8872 vec_cond_lhs
8873 = vect_get_vec_def_for_operand (cond_expr, stmt,
8874 comp_vectype);
894dd753 8875 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8876 }
8877 else
8878 {
01216d27
JJ
8879 vec_cond_lhs
8880 = vect_get_vec_def_for_operand (cond_expr0,
8881 stmt, comp_vectype);
894dd753 8882 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8883
8884 vec_cond_rhs
8885 = vect_get_vec_def_for_operand (cond_expr1,
8886 stmt, comp_vectype);
894dd753 8887 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8888 }
f7e531cf
IR
8889 if (reduc_index == 1)
8890 vec_then_clause = reduc_def;
8891 else
8892 {
8893 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8894 stmt);
894dd753 8895 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8896 }
8897 if (reduc_index == 2)
8898 vec_else_clause = reduc_def;
8899 else
8900 {
8901 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8902 stmt);
894dd753 8903 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8904 }
a855b1b1
MM
8905 }
8906 }
8907 else
8908 {
a414c77f
IE
8909 vec_cond_lhs
8910 = vect_get_vec_def_for_stmt_copy (dts[0],
8911 vec_oprnds0.pop ());
8912 if (!masked)
8913 vec_cond_rhs
8914 = vect_get_vec_def_for_stmt_copy (dts[1],
8915 vec_oprnds1.pop ());
8916
a855b1b1 8917 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8918 vec_oprnds2.pop ());
a855b1b1 8919 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8920 vec_oprnds3.pop ());
f7e531cf
IR
8921 }
8922
8923 if (!slp_node)
8924 {
9771b263 8925 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8926 if (!masked)
8927 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8928 vec_oprnds2.quick_push (vec_then_clause);
8929 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8930 }
8931
9dc3f7de 8932 /* Arguments are ready. Create the new vector stmt. */
9771b263 8933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8934 {
9771b263
DN
8935 vec_then_clause = vec_oprnds2[i];
8936 vec_else_clause = vec_oprnds3[i];
a855b1b1 8937
a414c77f
IE
8938 if (masked)
8939 vec_compare = vec_cond_lhs;
8940 else
8941 {
8942 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8943 if (bitop1 == NOP_EXPR)
8944 vec_compare = build2 (cond_code, vec_cmp_type,
8945 vec_cond_lhs, vec_cond_rhs);
8946 else
8947 {
8948 new_temp = make_ssa_name (vec_cmp_type);
8949 if (bitop1 == BIT_NOT_EXPR)
8950 new_stmt = gimple_build_assign (new_temp, bitop1,
8951 vec_cond_rhs);
8952 else
8953 new_stmt
8954 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8955 vec_cond_rhs);
8956 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8957 if (bitop2 == NOP_EXPR)
8958 vec_compare = new_temp;
8959 else if (bitop2 == BIT_NOT_EXPR)
8960 {
8961 /* Instead of doing ~x ? y : z do x ? z : y. */
8962 vec_compare = new_temp;
8963 std::swap (vec_then_clause, vec_else_clause);
8964 }
8965 else
8966 {
8967 vec_compare = make_ssa_name (vec_cmp_type);
8968 new_stmt
8969 = gimple_build_assign (vec_compare, bitop2,
8970 vec_cond_lhs, new_temp);
8971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8972 }
8973 }
a414c77f 8974 }
bb6c2b68
RS
8975 if (reduction_type == EXTRACT_LAST_REDUCTION)
8976 {
8977 if (!is_gimple_val (vec_compare))
8978 {
8979 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8980 new_stmt = gimple_build_assign (vec_compare_name,
8981 vec_compare);
8982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8983 vec_compare = vec_compare_name;
8984 }
8985 gcc_assert (reduc_index == 2);
8986 new_stmt = gimple_build_call_internal
8987 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8988 vec_then_clause);
8989 gimple_call_set_lhs (new_stmt, scalar_dest);
8990 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8991 if (stmt == gsi_stmt (*gsi))
8992 vect_finish_replace_stmt (stmt, new_stmt);
8993 else
8994 {
8995 /* In this case we're moving the definition to later in the
8996 block. That doesn't matter because the only uses of the
8997 lhs are in phi statements. */
8998 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8999 gsi_remove (&old_gsi, true);
9000 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9001 }
9002 }
9003 else
9004 {
9005 new_temp = make_ssa_name (vec_dest);
9006 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9007 vec_compare, vec_then_clause,
9008 vec_else_clause);
9009 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9010 }
f7e531cf 9011 if (slp_node)
9771b263 9012 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9013 }
9014
9015 if (slp_node)
9016 continue;
9017
9018 if (j == 0)
9019 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9020 else
9021 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9022
9023 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9024 }
b8698a0f 9025
9771b263
DN
9026 vec_oprnds0.release ();
9027 vec_oprnds1.release ();
9028 vec_oprnds2.release ();
9029 vec_oprnds3.release ();
f7e531cf 9030
ebfd146a
IR
9031 return true;
9032}
9033
42fd8198
IE
9034/* vectorizable_comparison.
9035
9036 Check if STMT is comparison expression that can be vectorized.
9037 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9038 comparison, put it in VEC_STMT, and insert it at GSI.
9039
9040 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9041
fce57248 9042static bool
42fd8198
IE
9043vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9044 gimple **vec_stmt, tree reduc_def,
68435eb2 9045 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9046{
9047 tree lhs, rhs1, rhs2;
9048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9049 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9050 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9051 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9052 tree new_temp;
9053 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9054 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9055 int ndts = 2;
928686b1 9056 poly_uint64 nunits;
42fd8198 9057 int ncopies;
49e76ff1 9058 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9059 stmt_vec_info prev_stmt_info = NULL;
9060 int i, j;
9061 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9062 vec<tree> vec_oprnds0 = vNULL;
9063 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9064 tree mask_type;
9065 tree mask;
9066
c245362b
IE
9067 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9068 return false;
9069
30480bcd 9070 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9071 return false;
9072
9073 mask_type = vectype;
9074 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9075
fce57248 9076 if (slp_node)
42fd8198
IE
9077 ncopies = 1;
9078 else
e8f142e2 9079 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9080
9081 gcc_assert (ncopies >= 1);
42fd8198
IE
9082 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9083 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9084 && reduc_def))
9085 return false;
9086
9087 if (STMT_VINFO_LIVE_P (stmt_info))
9088 {
9089 if (dump_enabled_p ())
9090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9091 "value used after loop.\n");
9092 return false;
9093 }
9094
9095 if (!is_gimple_assign (stmt))
9096 return false;
9097
9098 code = gimple_assign_rhs_code (stmt);
9099
9100 if (TREE_CODE_CLASS (code) != tcc_comparison)
9101 return false;
9102
9103 rhs1 = gimple_assign_rhs1 (stmt);
9104 rhs2 = gimple_assign_rhs2 (stmt);
9105
894dd753 9106 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9107 return false;
9108
894dd753 9109 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9110 return false;
9111
9112 if (vectype1 && vectype2
928686b1
RS
9113 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9114 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9115 return false;
9116
9117 vectype = vectype1 ? vectype1 : vectype2;
9118
9119 /* Invariant comparison. */
9120 if (!vectype)
9121 {
69a9a66f 9122 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9123 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9124 return false;
9125 }
928686b1 9126 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9127 return false;
9128
49e76ff1
IE
9129 /* Can't compare mask and non-mask types. */
9130 if (vectype1 && vectype2
9131 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9132 return false;
9133
9134 /* Boolean values may have another representation in vectors
9135 and therefore we prefer bit operations over comparison for
9136 them (which also works for scalar masks). We store opcodes
9137 to use in bitop1 and bitop2. Statement is vectorized as
9138 BITOP2 (rhs1 BITOP1 rhs2) or
9139 rhs1 BITOP2 (BITOP1 rhs2)
9140 depending on bitop1 and bitop2 arity. */
9141 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9142 {
9143 if (code == GT_EXPR)
9144 {
9145 bitop1 = BIT_NOT_EXPR;
9146 bitop2 = BIT_AND_EXPR;
9147 }
9148 else if (code == GE_EXPR)
9149 {
9150 bitop1 = BIT_NOT_EXPR;
9151 bitop2 = BIT_IOR_EXPR;
9152 }
9153 else if (code == LT_EXPR)
9154 {
9155 bitop1 = BIT_NOT_EXPR;
9156 bitop2 = BIT_AND_EXPR;
9157 std::swap (rhs1, rhs2);
264d951a 9158 std::swap (dts[0], dts[1]);
49e76ff1
IE
9159 }
9160 else if (code == LE_EXPR)
9161 {
9162 bitop1 = BIT_NOT_EXPR;
9163 bitop2 = BIT_IOR_EXPR;
9164 std::swap (rhs1, rhs2);
264d951a 9165 std::swap (dts[0], dts[1]);
49e76ff1
IE
9166 }
9167 else
9168 {
9169 bitop1 = BIT_XOR_EXPR;
9170 if (code == EQ_EXPR)
9171 bitop2 = BIT_NOT_EXPR;
9172 }
9173 }
9174
42fd8198
IE
9175 if (!vec_stmt)
9176 {
49e76ff1 9177 if (bitop1 == NOP_EXPR)
68435eb2
RB
9178 {
9179 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9180 return false;
9181 }
49e76ff1
IE
9182 else
9183 {
9184 machine_mode mode = TYPE_MODE (vectype);
9185 optab optab;
9186
9187 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9188 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9189 return false;
9190
9191 if (bitop2 != NOP_EXPR)
9192 {
9193 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9194 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9195 return false;
9196 }
49e76ff1 9197 }
68435eb2
RB
9198
9199 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9200 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9201 dts, ndts, slp_node, cost_vec);
9202 return true;
42fd8198
IE
9203 }
9204
9205 /* Transform. */
9206 if (!slp_node)
9207 {
9208 vec_oprnds0.create (1);
9209 vec_oprnds1.create (1);
9210 }
9211
9212 /* Handle def. */
9213 lhs = gimple_assign_lhs (stmt);
9214 mask = vect_create_destination_var (lhs, mask_type);
9215
9216 /* Handle cmp expr. */
9217 for (j = 0; j < ncopies; j++)
9218 {
9219 gassign *new_stmt = NULL;
9220 if (j == 0)
9221 {
9222 if (slp_node)
9223 {
9224 auto_vec<tree, 2> ops;
9225 auto_vec<vec<tree>, 2> vec_defs;
9226
9227 ops.safe_push (rhs1);
9228 ops.safe_push (rhs2);
306b0c92 9229 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9230 vec_oprnds1 = vec_defs.pop ();
9231 vec_oprnds0 = vec_defs.pop ();
9232 }
9233 else
9234 {
e4af0bc4
IE
9235 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9236 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9237 }
9238 }
9239 else
9240 {
9241 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9242 vec_oprnds0.pop ());
9243 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9244 vec_oprnds1.pop ());
9245 }
9246
9247 if (!slp_node)
9248 {
9249 vec_oprnds0.quick_push (vec_rhs1);
9250 vec_oprnds1.quick_push (vec_rhs2);
9251 }
9252
9253 /* Arguments are ready. Create the new vector stmt. */
9254 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9255 {
9256 vec_rhs2 = vec_oprnds1[i];
9257
9258 new_temp = make_ssa_name (mask);
49e76ff1
IE
9259 if (bitop1 == NOP_EXPR)
9260 {
9261 new_stmt = gimple_build_assign (new_temp, code,
9262 vec_rhs1, vec_rhs2);
9263 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9264 }
9265 else
9266 {
9267 if (bitop1 == BIT_NOT_EXPR)
9268 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9269 else
9270 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9271 vec_rhs2);
9272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9273 if (bitop2 != NOP_EXPR)
9274 {
9275 tree res = make_ssa_name (mask);
9276 if (bitop2 == BIT_NOT_EXPR)
9277 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9278 else
9279 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9280 new_temp);
9281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9282 }
9283 }
42fd8198
IE
9284 if (slp_node)
9285 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9286 }
9287
9288 if (slp_node)
9289 continue;
9290
9291 if (j == 0)
9292 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9293 else
9294 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9295
9296 prev_stmt_info = vinfo_for_stmt (new_stmt);
9297 }
9298
9299 vec_oprnds0.release ();
9300 vec_oprnds1.release ();
9301
9302 return true;
9303}
ebfd146a 9304
68a0f2ff
RS
9305/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9306 can handle all live statements in the node. Otherwise return true
9307 if STMT is not live or if vectorizable_live_operation can handle it.
9308 GSI and VEC_STMT are as for vectorizable_live_operation. */
9309
9310static bool
9311can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9312 slp_tree slp_node, gimple **vec_stmt,
9313 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9314{
9315 if (slp_node)
9316 {
9317 gimple *slp_stmt;
9318 unsigned int i;
9319 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9320 {
9321 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9322 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9323 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9324 vec_stmt, cost_vec))
68a0f2ff
RS
9325 return false;
9326 }
9327 }
9328 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9329 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9330 cost_vec))
68a0f2ff
RS
9331 return false;
9332
9333 return true;
9334}
9335
8644a673 9336/* Make sure the statement is vectorizable. */
ebfd146a
IR
9337
9338bool
891ad31c 9339vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9340 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9341{
8644a673 9342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9343 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9344 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9345 bool ok;
355fe088 9346 gimple *pattern_stmt;
363477c0 9347 gimple_seq pattern_def_seq;
ebfd146a 9348
73fbfcad 9349 if (dump_enabled_p ())
ebfd146a 9350 {
78c60e3d
SS
9351 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9352 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9353 }
ebfd146a 9354
1825a1f3 9355 if (gimple_has_volatile_ops (stmt))
b8698a0f 9356 {
73fbfcad 9357 if (dump_enabled_p ())
78c60e3d 9358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9359 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9360
9361 return false;
9362 }
b8698a0f 9363
d54a098e
RS
9364 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9365 && node == NULL
9366 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9367 {
9368 gimple_stmt_iterator si;
9369
9370 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9371 {
9372 gimple *pattern_def_stmt = gsi_stmt (si);
9373 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9374 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9375 {
9376 /* Analyze def stmt of STMT if it's a pattern stmt. */
9377 if (dump_enabled_p ())
9378 {
9379 dump_printf_loc (MSG_NOTE, vect_location,
9380 "==> examining pattern def statement: ");
9381 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9382 }
9383
9384 if (!vect_analyze_stmt (pattern_def_stmt,
9385 need_to_vectorize, node, node_instance,
9386 cost_vec))
9387 return false;
9388 }
9389 }
9390 }
9391
b8698a0f 9392 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9393 to include:
9394 - the COND_EXPR which is the loop exit condition
9395 - any LABEL_EXPRs in the loop
b8698a0f 9396 - computations that are used only for array indexing or loop control.
8644a673 9397 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9398 instance, therefore, all the statements are relevant.
ebfd146a 9399
d092494c 9400 Pattern statement needs to be analyzed instead of the original statement
83197f37 9401 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9402 statements. In basic blocks we are called from some SLP instance
9403 traversal, don't analyze pattern stmts instead, the pattern stmts
9404 already will be part of SLP instance. */
83197f37
IR
9405
9406 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9407 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9408 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9409 {
9d5e7640 9410 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9411 && pattern_stmt
9d5e7640
IR
9412 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9413 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9414 {
83197f37 9415 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9416 stmt = pattern_stmt;
9417 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9418 if (dump_enabled_p ())
9d5e7640 9419 {
78c60e3d
SS
9420 dump_printf_loc (MSG_NOTE, vect_location,
9421 "==> examining pattern statement: ");
9422 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9423 }
9424 }
9425 else
9426 {
73fbfcad 9427 if (dump_enabled_p ())
e645e942 9428 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9429
9d5e7640
IR
9430 return true;
9431 }
8644a673 9432 }
83197f37 9433 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9434 && node == NULL
83197f37
IR
9435 && pattern_stmt
9436 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9437 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9438 {
9439 /* Analyze PATTERN_STMT too. */
73fbfcad 9440 if (dump_enabled_p ())
83197f37 9441 {
78c60e3d
SS
9442 dump_printf_loc (MSG_NOTE, vect_location,
9443 "==> examining pattern statement: ");
9444 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9445 }
9446
891ad31c 9447 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9448 node_instance, cost_vec))
83197f37
IR
9449 return false;
9450 }
ebfd146a 9451
8644a673
IR
9452 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9453 {
9454 case vect_internal_def:
9455 break;
ebfd146a 9456
8644a673 9457 case vect_reduction_def:
7c5222ff 9458 case vect_nested_cycle:
14a61437
RB
9459 gcc_assert (!bb_vinfo
9460 && (relevance == vect_used_in_outer
9461 || relevance == vect_used_in_outer_by_reduction
9462 || relevance == vect_used_by_reduction
b28ead45
AH
9463 || relevance == vect_unused_in_scope
9464 || relevance == vect_used_only_live));
8644a673
IR
9465 break;
9466
9467 case vect_induction_def:
e7baeb39
RB
9468 gcc_assert (!bb_vinfo);
9469 break;
9470
8644a673
IR
9471 case vect_constant_def:
9472 case vect_external_def:
9473 case vect_unknown_def_type:
9474 default:
9475 gcc_unreachable ();
9476 }
ebfd146a 9477
8644a673 9478 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9479 {
8644a673 9480 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9481 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9482 || (is_gimple_call (stmt)
9483 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9484 *need_to_vectorize = true;
ebfd146a
IR
9485 }
9486
b1af7da6
RB
9487 if (PURE_SLP_STMT (stmt_info) && !node)
9488 {
9489 dump_printf_loc (MSG_NOTE, vect_location,
9490 "handled only by SLP analysis\n");
9491 return true;
9492 }
9493
9494 ok = true;
9495 if (!bb_vinfo
9496 && (STMT_VINFO_RELEVANT_P (stmt_info)
9497 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9498 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9499 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9500 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9501 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9502 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9503 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9504 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9505 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9506 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9507 cost_vec)
9508 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9509 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9510 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9511 else
9512 {
9513 if (bb_vinfo)
68435eb2
RB
9514 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9515 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9516 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9517 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9518 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9519 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9520 cost_vec)
9521 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9522 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9523 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9524 cost_vec)
9525 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9526 cost_vec));
b1af7da6 9527 }
8644a673
IR
9528
9529 if (!ok)
ebfd146a 9530 {
73fbfcad 9531 if (dump_enabled_p ())
8644a673 9532 {
78c60e3d
SS
9533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9534 "not vectorized: relevant stmt not ");
9535 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9536 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9537 }
b8698a0f 9538
ebfd146a
IR
9539 return false;
9540 }
9541
8644a673
IR
9542 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9543 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9544 if (!bb_vinfo
9545 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9546 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9547 {
73fbfcad 9548 if (dump_enabled_p ())
8644a673 9549 {
78c60e3d 9550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9551 "not vectorized: live stmt not supported: ");
78c60e3d 9552 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9553 }
b8698a0f 9554
8644a673 9555 return false;
ebfd146a
IR
9556 }
9557
ebfd146a
IR
9558 return true;
9559}
9560
9561
9562/* Function vect_transform_stmt.
9563
9564 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9565
9566bool
355fe088 9567vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9568 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9569 slp_instance slp_node_instance)
9570{
9571 bool is_store = false;
355fe088 9572 gimple *vec_stmt = NULL;
ebfd146a 9573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9574 bool done;
ebfd146a 9575
fce57248 9576 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9577 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9578
e57d9a82
RB
9579 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9580 && nested_in_vect_loop_p
9581 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9582 stmt));
9583
ebfd146a
IR
9584 switch (STMT_VINFO_TYPE (stmt_info))
9585 {
9586 case type_demotion_vec_info_type:
ebfd146a 9587 case type_promotion_vec_info_type:
ebfd146a 9588 case type_conversion_vec_info_type:
68435eb2 9589 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9590 gcc_assert (done);
9591 break;
9592
9593 case induc_vec_info_type:
68435eb2 9594 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9595 gcc_assert (done);
9596 break;
9597
9dc3f7de 9598 case shift_vec_info_type:
68435eb2 9599 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9600 gcc_assert (done);
9601 break;
9602
ebfd146a 9603 case op_vec_info_type:
68435eb2 9604 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9605 gcc_assert (done);
9606 break;
9607
9608 case assignment_vec_info_type:
68435eb2 9609 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9610 gcc_assert (done);
9611 break;
9612
9613 case load_vec_info_type:
b8698a0f 9614 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9615 slp_node_instance, NULL);
ebfd146a
IR
9616 gcc_assert (done);
9617 break;
9618
9619 case store_vec_info_type:
68435eb2 9620 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9621 gcc_assert (done);
0d0293ac 9622 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9623 {
9624 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9625 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9626 one are skipped, and there vec_stmt_info shouldn't be freed
9627 meanwhile. */
0d0293ac 9628 *grouped_store = true;
f307441a 9629 stmt_vec_info group_info
2c53b149
RB
9630 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9631 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9632 is_store = true;
f307441a 9633 }
ebfd146a
IR
9634 else
9635 is_store = true;
9636 break;
9637
9638 case condition_vec_info_type:
68435eb2 9639 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9640 gcc_assert (done);
9641 break;
9642
42fd8198 9643 case comparison_vec_info_type:
68435eb2 9644 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9645 gcc_assert (done);
9646 break;
9647
ebfd146a 9648 case call_vec_info_type:
68435eb2 9649 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9650 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9651 break;
9652
0136f8f0 9653 case call_simd_clone_vec_info_type:
68435eb2 9654 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9655 stmt = gsi_stmt (*gsi);
9656 break;
9657
ebfd146a 9658 case reduc_vec_info_type:
891ad31c 9659 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9660 slp_node_instance, NULL);
ebfd146a
IR
9661 gcc_assert (done);
9662 break;
9663
9664 default:
9665 if (!STMT_VINFO_LIVE_P (stmt_info))
9666 {
73fbfcad 9667 if (dump_enabled_p ())
78c60e3d 9668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9669 "stmt not supported.\n");
ebfd146a
IR
9670 gcc_unreachable ();
9671 }
9672 }
9673
225ce44b
RB
9674 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9675 This would break hybrid SLP vectorization. */
9676 if (slp_node)
d90f8440
RB
9677 gcc_assert (!vec_stmt
9678 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9679
ebfd146a
IR
9680 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9681 is being vectorized, but outside the immediately enclosing loop. */
9682 if (vec_stmt
e57d9a82 9683 && nested_p
ebfd146a
IR
9684 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9685 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9686 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9687 vect_used_in_outer_by_reduction))
ebfd146a 9688 {
a70d6342
IR
9689 struct loop *innerloop = LOOP_VINFO_LOOP (
9690 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9691 imm_use_iterator imm_iter;
9692 use_operand_p use_p;
9693 tree scalar_dest;
355fe088 9694 gimple *exit_phi;
ebfd146a 9695
73fbfcad 9696 if (dump_enabled_p ())
78c60e3d 9697 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9698 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9699
9700 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9701 (to be used when vectorizing outer-loop stmts that use the DEF of
9702 STMT). */
9703 if (gimple_code (stmt) == GIMPLE_PHI)
9704 scalar_dest = PHI_RESULT (stmt);
9705 else
9706 scalar_dest = gimple_assign_lhs (stmt);
9707
9708 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9709 {
9710 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9711 {
9712 exit_phi = USE_STMT (use_p);
9713 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9714 }
9715 }
9716 }
9717
9718 /* Handle stmts whose DEF is used outside the loop-nest that is
9719 being vectorized. */
68a0f2ff 9720 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9721 {
68435eb2 9722 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9723 gcc_assert (done);
9724 }
9725
9726 if (vec_stmt)
83197f37 9727 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9728
b8698a0f 9729 return is_store;
ebfd146a
IR
9730}
9731
9732
b8698a0f 9733/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9734 stmt_vec_info. */
9735
9736void
355fe088 9737vect_remove_stores (gimple *first_stmt)
ebfd146a 9738{
355fe088
TS
9739 gimple *next = first_stmt;
9740 gimple *tmp;
ebfd146a
IR
9741 gimple_stmt_iterator next_si;
9742
9743 while (next)
9744 {
78048b1c
JJ
9745 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9746
2c53b149 9747 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9748 if (is_pattern_stmt_p (stmt_info))
9749 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9750 /* Free the attached stmt_vec_info and remove the stmt. */
9751 next_si = gsi_for_stmt (next);
3d3f2249 9752 unlink_stmt_vdef (next);
ebfd146a 9753 gsi_remove (&next_si, true);
3d3f2249 9754 release_defs (next);
ebfd146a
IR
9755 free_stmt_vec_info (next);
9756 next = tmp;
9757 }
9758}
9759
9760
9761/* Function new_stmt_vec_info.
9762
9763 Create and initialize a new stmt_vec_info struct for STMT. */
9764
9765stmt_vec_info
310213d4 9766new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9767{
9768 stmt_vec_info res;
9769 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9770
9771 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9772 STMT_VINFO_STMT (res) = stmt;
310213d4 9773 res->vinfo = vinfo;
8644a673 9774 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9775 STMT_VINFO_LIVE_P (res) = false;
9776 STMT_VINFO_VECTYPE (res) = NULL;
9777 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9778 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9779 STMT_VINFO_IN_PATTERN_P (res) = false;
9780 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9781 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9782 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9783 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9784 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9785
ebfd146a
IR
9786 if (gimple_code (stmt) == GIMPLE_PHI
9787 && is_loop_header_bb_p (gimple_bb (stmt)))
9788 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9789 else
8644a673
IR
9790 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9791
9771b263 9792 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9793 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9794 STMT_VINFO_NUM_SLP_USES (res) = 0;
9795
2c53b149
RB
9796 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9797 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9798 res->size = 0; /* GROUP_SIZE */
9799 res->store_count = 0; /* GROUP_STORE_COUNT */
9800 res->gap = 0; /* GROUP_GAP */
9801 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9802
ca823c85
RB
9803 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9804 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9805
ebfd146a
IR
9806 return res;
9807}
9808
9809
f8c0baaf 9810/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9811
9812void
f8c0baaf 9813set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9814{
f8c0baaf 9815 stmt_vec_info_vec = v;
ebfd146a
IR
9816}
9817
f8c0baaf 9818/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9819
9820void
f8c0baaf 9821free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9822{
93675444 9823 unsigned int i;
3161455c 9824 stmt_vec_info info;
f8c0baaf 9825 FOR_EACH_VEC_ELT (*v, i, info)
93675444 9826 if (info != NULL)
3161455c 9827 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9828 if (v == stmt_vec_info_vec)
9829 stmt_vec_info_vec = NULL;
9830 v->release ();
ebfd146a
IR
9831}
9832
9833
9834/* Free stmt vectorization related info. */
9835
9836void
355fe088 9837free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9838{
9839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9840
9841 if (!stmt_info)
9842 return;
9843
78048b1c
JJ
9844 /* Check if this statement has a related "pattern stmt"
9845 (introduced by the vectorizer during the pattern recognition
9846 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9847 too. */
9848 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9849 {
e3947d80
RS
9850 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9851 for (gimple_stmt_iterator si = gsi_start (seq);
9852 !gsi_end_p (si); gsi_next (&si))
9853 {
9854 gimple *seq_stmt = gsi_stmt (si);
9855 gimple_set_bb (seq_stmt, NULL);
9856 tree lhs = gimple_get_lhs (seq_stmt);
9857 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9858 release_ssa_name (lhs);
9859 free_stmt_vec_info (seq_stmt);
9860 }
78048b1c
JJ
9861 stmt_vec_info patt_info
9862 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9863 if (patt_info)
9864 {
355fe088 9865 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9866 gimple_set_bb (patt_stmt, NULL);
9867 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9868 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9869 release_ssa_name (lhs);
f0281fde 9870 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9871 }
9872 }
9873
9771b263 9874 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9875 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9876 set_vinfo_for_stmt (stmt, NULL);
9877 free (stmt_info);
9878}
9879
9880
bb67d9c7 9881/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9882
bb67d9c7 9883 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9884 by the target. */
9885
c803b2a9 9886tree
86e36728 9887get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9888{
c7d97b28 9889 tree orig_scalar_type = scalar_type;
3bd8f481 9890 scalar_mode inner_mode;
ef4bddc2 9891 machine_mode simd_mode;
86e36728 9892 poly_uint64 nunits;
ebfd146a
IR
9893 tree vectype;
9894
3bd8f481
RS
9895 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9896 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9897 return NULL_TREE;
9898
3bd8f481 9899 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9900
7b7b1813
RG
9901 /* For vector types of elements whose mode precision doesn't
9902 match their types precision we use a element type of mode
9903 precision. The vectorization routines will have to make sure
48f2e373
RB
9904 they support the proper result truncation/extension.
9905 We also make sure to build vector types with INTEGER_TYPE
9906 component type only. */
6d7971b8 9907 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9908 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9909 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9910 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9911 TYPE_UNSIGNED (scalar_type));
6d7971b8 9912
ccbf5bb4
RG
9913 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9914 When the component mode passes the above test simply use a type
9915 corresponding to that mode. The theory is that any use that
9916 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9917 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9918 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9919 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9920
9921 /* We can't build a vector type of elements with alignment bigger than
9922 their size. */
dfc2e2ac 9923 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9924 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9925 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9926
dfc2e2ac
RB
9927 /* If we felt back to using the mode fail if there was
9928 no scalar type for it. */
9929 if (scalar_type == NULL_TREE)
9930 return NULL_TREE;
9931
bb67d9c7
RG
9932 /* If no size was supplied use the mode the target prefers. Otherwise
9933 lookup a vector mode of the specified size. */
86e36728 9934 if (known_eq (size, 0U))
bb67d9c7 9935 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9936 else if (!multiple_p (size, nbytes, &nunits)
9937 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9938 return NULL_TREE;
4c8fd8ac 9939 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9940 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9941 return NULL_TREE;
ebfd146a
IR
9942
9943 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9944
9945 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9946 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9947 return NULL_TREE;
ebfd146a 9948
c7d97b28
RB
9949 /* Re-attach the address-space qualifier if we canonicalized the scalar
9950 type. */
9951 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9952 return build_qualified_type
9953 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9954
ebfd146a
IR
9955 return vectype;
9956}
9957
86e36728 9958poly_uint64 current_vector_size;
bb67d9c7
RG
9959
9960/* Function get_vectype_for_scalar_type.
9961
9962 Returns the vector type corresponding to SCALAR_TYPE as supported
9963 by the target. */
9964
9965tree
9966get_vectype_for_scalar_type (tree scalar_type)
9967{
9968 tree vectype;
9969 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9970 current_vector_size);
9971 if (vectype
86e36728 9972 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9973 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9974 return vectype;
9975}
9976
42fd8198
IE
9977/* Function get_mask_type_for_scalar_type.
9978
9979 Returns the mask type corresponding to a result of comparison
9980 of vectors of specified SCALAR_TYPE as supported by target. */
9981
9982tree
9983get_mask_type_for_scalar_type (tree scalar_type)
9984{
9985 tree vectype = get_vectype_for_scalar_type (scalar_type);
9986
9987 if (!vectype)
9988 return NULL;
9989
9990 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9991 current_vector_size);
9992}
9993
b690cc0f
RG
9994/* Function get_same_sized_vectype
9995
9996 Returns a vector type corresponding to SCALAR_TYPE of size
9997 VECTOR_TYPE if supported by the target. */
9998
9999tree
bb67d9c7 10000get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10001{
2568d8a1 10002 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10003 return build_same_sized_truth_vector_type (vector_type);
10004
bb67d9c7
RG
10005 return get_vectype_for_scalar_type_and_size
10006 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10007}
10008
ebfd146a
IR
10009/* Function vect_is_simple_use.
10010
10011 Input:
81c40241
RB
10012 VINFO - the vect info of the loop or basic block that is being vectorized.
10013 OPERAND - operand in the loop or bb.
10014 Output:
894dd753 10015 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
81c40241 10016 DT - the type of definition
ebfd146a
IR
10017
10018 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10019 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10020 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10021 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10022 is the case in reduction/induction computations).
10023 For basic blocks, supportable operands are constants and bb invariants.
10024 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10025
10026bool
894dd753
RS
10027vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10028 gimple **def_stmt_out)
b8698a0f 10029{
894dd753
RS
10030 if (def_stmt_out)
10031 *def_stmt_out = NULL;
3fc356dc 10032 *dt = vect_unknown_def_type;
b8698a0f 10033
73fbfcad 10034 if (dump_enabled_p ())
ebfd146a 10035 {
78c60e3d
SS
10036 dump_printf_loc (MSG_NOTE, vect_location,
10037 "vect_is_simple_use: operand ");
30f502ed
RB
10038 if (TREE_CODE (operand) == SSA_NAME
10039 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10040 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10041 else
10042 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 10043 }
b8698a0f 10044
b758f602 10045 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
10046 *dt = vect_constant_def;
10047 else if (is_gimple_min_invariant (operand))
10048 *dt = vect_external_def;
10049 else if (TREE_CODE (operand) != SSA_NAME)
10050 *dt = vect_unknown_def_type;
10051 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 10052 *dt = vect_external_def;
ebfd146a
IR
10053 else
10054 {
30f502ed
RB
10055 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10056 if (! vect_stmt_in_region_p (vinfo, def_stmt))
10057 *dt = vect_external_def;
10058 else
0f8c840c 10059 {
30f502ed
RB
10060 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
10061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10062 {
10063 def_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10064 stmt_vinfo = vinfo_for_stmt (def_stmt);
10065 }
10066 switch (gimple_code (def_stmt))
10067 {
10068 case GIMPLE_PHI:
10069 case GIMPLE_ASSIGN:
10070 case GIMPLE_CALL:
10071 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10072 break;
10073 default:
10074 *dt = vect_unknown_def_type;
10075 break;
10076 }
0f8c840c 10077 }
30f502ed
RB
10078 if (def_stmt_out)
10079 *def_stmt_out = def_stmt;
ebfd146a
IR
10080 }
10081
2e8ab70c
RB
10082 if (dump_enabled_p ())
10083 {
30f502ed 10084 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
10085 switch (*dt)
10086 {
10087 case vect_uninitialized_def:
10088 dump_printf (MSG_NOTE, "uninitialized\n");
10089 break;
10090 case vect_constant_def:
10091 dump_printf (MSG_NOTE, "constant\n");
10092 break;
10093 case vect_external_def:
10094 dump_printf (MSG_NOTE, "external\n");
10095 break;
10096 case vect_internal_def:
10097 dump_printf (MSG_NOTE, "internal\n");
10098 break;
10099 case vect_induction_def:
10100 dump_printf (MSG_NOTE, "induction\n");
10101 break;
10102 case vect_reduction_def:
10103 dump_printf (MSG_NOTE, "reduction\n");
10104 break;
10105 case vect_double_reduction_def:
10106 dump_printf (MSG_NOTE, "double reduction\n");
10107 break;
10108 case vect_nested_cycle:
10109 dump_printf (MSG_NOTE, "nested cycle\n");
10110 break;
10111 case vect_unknown_def_type:
10112 dump_printf (MSG_NOTE, "unknown\n");
10113 break;
10114 }
10115 }
10116
81c40241 10117 if (*dt == vect_unknown_def_type)
ebfd146a 10118 {
73fbfcad 10119 if (dump_enabled_p ())
78c60e3d 10120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10121 "Unsupported pattern.\n");
ebfd146a
IR
10122 return false;
10123 }
10124
ebfd146a
IR
10125 return true;
10126}
10127
81c40241 10128/* Function vect_is_simple_use.
b690cc0f 10129
81c40241 10130 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10131 type of OPERAND and stores it to *VECTYPE. If the definition of
10132 OPERAND is vect_uninitialized_def, vect_constant_def or
10133 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10134 is responsible to compute the best suited vector type for the
10135 scalar operand. */
10136
10137bool
894dd753
RS
10138vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10139 tree *vectype, gimple **def_stmt_out)
b690cc0f 10140{
894dd753
RS
10141 gimple *def_stmt;
10142 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt))
b690cc0f
RG
10143 return false;
10144
894dd753
RS
10145 if (def_stmt_out)
10146 *def_stmt_out = def_stmt;
10147
b690cc0f
RG
10148 /* Now get a vector type if the def is internal, otherwise supply
10149 NULL_TREE and leave it up to the caller to figure out a proper
10150 type for the use stmt. */
10151 if (*dt == vect_internal_def
10152 || *dt == vect_induction_def
10153 || *dt == vect_reduction_def
10154 || *dt == vect_double_reduction_def
10155 || *dt == vect_nested_cycle)
10156 {
894dd753 10157 stmt_vec_info stmt_info = vinfo_for_stmt (def_stmt);
b690cc0f
RG
10158 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10159 gcc_assert (*vectype != NULL_TREE);
30f502ed
RB
10160 if (dump_enabled_p ())
10161 {
10162 dump_printf_loc (MSG_NOTE, vect_location,
10163 "vect_is_simple_use: vectype ");
10164 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10165 dump_printf (MSG_NOTE, "\n");
10166 }
b690cc0f
RG
10167 }
10168 else if (*dt == vect_uninitialized_def
10169 || *dt == vect_constant_def
10170 || *dt == vect_external_def)
10171 *vectype = NULL_TREE;
10172 else
10173 gcc_unreachable ();
10174
10175 return true;
10176}
10177
ebfd146a
IR
10178
10179/* Function supportable_widening_operation
10180
b8698a0f
L
10181 Check whether an operation represented by the code CODE is a
10182 widening operation that is supported by the target platform in
b690cc0f
RG
10183 vector form (i.e., when operating on arguments of type VECTYPE_IN
10184 producing a result of type VECTYPE_OUT).
b8698a0f 10185
1bda738b
JJ
10186 Widening operations we currently support are NOP (CONVERT), FLOAT,
10187 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10188 are supported by the target platform either directly (via vector
10189 tree-codes), or via target builtins.
ebfd146a
IR
10190
10191 Output:
b8698a0f
L
10192 - CODE1 and CODE2 are codes of vector operations to be used when
10193 vectorizing the operation, if available.
ebfd146a
IR
10194 - MULTI_STEP_CVT determines the number of required intermediate steps in
10195 case of multi-step conversion (like char->short->int - in that case
10196 MULTI_STEP_CVT will be 1).
b8698a0f
L
10197 - INTERM_TYPES contains the intermediate type required to perform the
10198 widening operation (short in the above example). */
ebfd146a
IR
10199
10200bool
355fe088 10201supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10202 tree vectype_out, tree vectype_in,
ebfd146a
IR
10203 enum tree_code *code1, enum tree_code *code2,
10204 int *multi_step_cvt,
9771b263 10205 vec<tree> *interm_types)
ebfd146a
IR
10206{
10207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10208 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10209 struct loop *vect_loop = NULL;
ef4bddc2 10210 machine_mode vec_mode;
81f40b79 10211 enum insn_code icode1, icode2;
ebfd146a 10212 optab optab1, optab2;
b690cc0f
RG
10213 tree vectype = vectype_in;
10214 tree wide_vectype = vectype_out;
ebfd146a 10215 enum tree_code c1, c2;
4a00c761
JJ
10216 int i;
10217 tree prev_type, intermediate_type;
ef4bddc2 10218 machine_mode intermediate_mode, prev_mode;
4a00c761 10219 optab optab3, optab4;
ebfd146a 10220
4a00c761 10221 *multi_step_cvt = 0;
4ef69dfc
IR
10222 if (loop_info)
10223 vect_loop = LOOP_VINFO_LOOP (loop_info);
10224
ebfd146a
IR
10225 switch (code)
10226 {
10227 case WIDEN_MULT_EXPR:
6ae6116f
RH
10228 /* The result of a vectorized widening operation usually requires
10229 two vectors (because the widened results do not fit into one vector).
10230 The generated vector results would normally be expected to be
10231 generated in the same order as in the original scalar computation,
10232 i.e. if 8 results are generated in each vector iteration, they are
10233 to be organized as follows:
10234 vect1: [res1,res2,res3,res4],
10235 vect2: [res5,res6,res7,res8].
10236
10237 However, in the special case that the result of the widening
10238 operation is used in a reduction computation only, the order doesn't
10239 matter (because when vectorizing a reduction we change the order of
10240 the computation). Some targets can take advantage of this and
10241 generate more efficient code. For example, targets like Altivec,
10242 that support widen_mult using a sequence of {mult_even,mult_odd}
10243 generate the following vectors:
10244 vect1: [res1,res3,res5,res7],
10245 vect2: [res2,res4,res6,res8].
10246
10247 When vectorizing outer-loops, we execute the inner-loop sequentially
10248 (each vectorized inner-loop iteration contributes to VF outer-loop
10249 iterations in parallel). We therefore don't allow to change the
10250 order of the computation in the inner-loop during outer-loop
10251 vectorization. */
10252 /* TODO: Another case in which order doesn't *really* matter is when we
10253 widen and then contract again, e.g. (short)((int)x * y >> 8).
10254 Normally, pack_trunc performs an even/odd permute, whereas the
10255 repack from an even/odd expansion would be an interleave, which
10256 would be significantly simpler for e.g. AVX2. */
10257 /* In any case, in order to avoid duplicating the code below, recurse
10258 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10259 are properly set up for the caller. If we fail, we'll continue with
10260 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10261 if (vect_loop
10262 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10263 && !nested_in_vect_loop_p (vect_loop, stmt)
10264 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10265 stmt, vectype_out, vectype_in,
a86ec597
RH
10266 code1, code2, multi_step_cvt,
10267 interm_types))
ebc047a2
CH
10268 {
10269 /* Elements in a vector with vect_used_by_reduction property cannot
10270 be reordered if the use chain with this property does not have the
10271 same operation. One such an example is s += a * b, where elements
10272 in a and b cannot be reordered. Here we check if the vector defined
10273 by STMT is only directly used in the reduction statement. */
10274 tree lhs = gimple_assign_lhs (stmt);
10275 use_operand_p dummy;
355fe088 10276 gimple *use_stmt;
ebc047a2
CH
10277 stmt_vec_info use_stmt_info = NULL;
10278 if (single_imm_use (lhs, &dummy, &use_stmt)
10279 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10280 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10281 return true;
10282 }
4a00c761
JJ
10283 c1 = VEC_WIDEN_MULT_LO_EXPR;
10284 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10285 break;
10286
81c40241
RB
10287 case DOT_PROD_EXPR:
10288 c1 = DOT_PROD_EXPR;
10289 c2 = DOT_PROD_EXPR;
10290 break;
10291
10292 case SAD_EXPR:
10293 c1 = SAD_EXPR;
10294 c2 = SAD_EXPR;
10295 break;
10296
6ae6116f
RH
10297 case VEC_WIDEN_MULT_EVEN_EXPR:
10298 /* Support the recursion induced just above. */
10299 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10300 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10301 break;
10302
36ba4aae 10303 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10304 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10305 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10306 break;
10307
ebfd146a 10308 CASE_CONVERT:
4a00c761
JJ
10309 c1 = VEC_UNPACK_LO_EXPR;
10310 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10311 break;
10312
10313 case FLOAT_EXPR:
4a00c761
JJ
10314 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10315 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10316 break;
10317
10318 case FIX_TRUNC_EXPR:
1bda738b
JJ
10319 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10320 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10321 break;
ebfd146a
IR
10322
10323 default:
10324 gcc_unreachable ();
10325 }
10326
6ae6116f 10327 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10328 std::swap (c1, c2);
4a00c761 10329
ebfd146a
IR
10330 if (code == FIX_TRUNC_EXPR)
10331 {
10332 /* The signedness is determined from output operand. */
b690cc0f
RG
10333 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10334 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10335 }
10336 else
10337 {
10338 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10339 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10340 }
10341
10342 if (!optab1 || !optab2)
10343 return false;
10344
10345 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10346 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10347 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10348 return false;
10349
4a00c761
JJ
10350 *code1 = c1;
10351 *code2 = c2;
10352
10353 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10354 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10355 /* For scalar masks we may have different boolean
10356 vector types having the same QImode. Thus we
10357 add additional check for elements number. */
10358 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10359 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10360 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10361
b8698a0f 10362 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10363 types. */
ebfd146a 10364
4a00c761
JJ
10365 prev_type = vectype;
10366 prev_mode = vec_mode;
b8698a0f 10367
4a00c761
JJ
10368 if (!CONVERT_EXPR_CODE_P (code))
10369 return false;
b8698a0f 10370
4a00c761
JJ
10371 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10372 intermediate steps in promotion sequence. We try
10373 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10374 not. */
9771b263 10375 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10376 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10377 {
10378 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10379 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10380 {
7cfb4d93 10381 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10382 if (intermediate_mode != TYPE_MODE (intermediate_type))
10383 return false;
10384 }
10385 else
10386 intermediate_type
10387 = lang_hooks.types.type_for_mode (intermediate_mode,
10388 TYPE_UNSIGNED (prev_type));
10389
4a00c761
JJ
10390 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10391 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10392
10393 if (!optab3 || !optab4
10394 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10395 || insn_data[icode1].operand[0].mode != intermediate_mode
10396 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10397 || insn_data[icode2].operand[0].mode != intermediate_mode
10398 || ((icode1 = optab_handler (optab3, intermediate_mode))
10399 == CODE_FOR_nothing)
10400 || ((icode2 = optab_handler (optab4, intermediate_mode))
10401 == CODE_FOR_nothing))
10402 break;
ebfd146a 10403
9771b263 10404 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10405 (*multi_step_cvt)++;
10406
10407 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10408 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10409 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10410 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10411 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10412
10413 prev_type = intermediate_type;
10414 prev_mode = intermediate_mode;
ebfd146a
IR
10415 }
10416
9771b263 10417 interm_types->release ();
4a00c761 10418 return false;
ebfd146a
IR
10419}
10420
10421
10422/* Function supportable_narrowing_operation
10423
b8698a0f
L
10424 Check whether an operation represented by the code CODE is a
10425 narrowing operation that is supported by the target platform in
b690cc0f
RG
10426 vector form (i.e., when operating on arguments of type VECTYPE_IN
10427 and producing a result of type VECTYPE_OUT).
b8698a0f 10428
1bda738b
JJ
10429 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10430 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10431 the target platform directly via vector tree-codes.
10432
10433 Output:
b8698a0f
L
10434 - CODE1 is the code of a vector operation to be used when
10435 vectorizing the operation, if available.
ebfd146a
IR
10436 - MULTI_STEP_CVT determines the number of required intermediate steps in
10437 case of multi-step conversion (like int->short->char - in that case
10438 MULTI_STEP_CVT will be 1).
10439 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10440 narrowing operation (short in the above example). */
ebfd146a
IR
10441
10442bool
10443supportable_narrowing_operation (enum tree_code code,
b690cc0f 10444 tree vectype_out, tree vectype_in,
ebfd146a 10445 enum tree_code *code1, int *multi_step_cvt,
9771b263 10446 vec<tree> *interm_types)
ebfd146a 10447{
ef4bddc2 10448 machine_mode vec_mode;
ebfd146a
IR
10449 enum insn_code icode1;
10450 optab optab1, interm_optab;
b690cc0f
RG
10451 tree vectype = vectype_in;
10452 tree narrow_vectype = vectype_out;
ebfd146a 10453 enum tree_code c1;
3ae0661a 10454 tree intermediate_type, prev_type;
ef4bddc2 10455 machine_mode intermediate_mode, prev_mode;
ebfd146a 10456 int i;
4a00c761 10457 bool uns;
ebfd146a 10458
4a00c761 10459 *multi_step_cvt = 0;
ebfd146a
IR
10460 switch (code)
10461 {
10462 CASE_CONVERT:
10463 c1 = VEC_PACK_TRUNC_EXPR;
10464 break;
10465
10466 case FIX_TRUNC_EXPR:
10467 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10468 break;
10469
10470 case FLOAT_EXPR:
1bda738b
JJ
10471 c1 = VEC_PACK_FLOAT_EXPR;
10472 break;
ebfd146a
IR
10473
10474 default:
10475 gcc_unreachable ();
10476 }
10477
10478 if (code == FIX_TRUNC_EXPR)
10479 /* The signedness is determined from output operand. */
b690cc0f 10480 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10481 else
10482 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10483
10484 if (!optab1)
10485 return false;
10486
10487 vec_mode = TYPE_MODE (vectype);
947131ba 10488 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10489 return false;
10490
4a00c761
JJ
10491 *code1 = c1;
10492
10493 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10494 /* For scalar masks we may have different boolean
10495 vector types having the same QImode. Thus we
10496 add additional check for elements number. */
10497 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10498 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10499 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10500
1bda738b
JJ
10501 if (code == FLOAT_EXPR)
10502 return false;
10503
ebfd146a
IR
10504 /* Check if it's a multi-step conversion that can be done using intermediate
10505 types. */
4a00c761 10506 prev_mode = vec_mode;
3ae0661a 10507 prev_type = vectype;
4a00c761
JJ
10508 if (code == FIX_TRUNC_EXPR)
10509 uns = TYPE_UNSIGNED (vectype_out);
10510 else
10511 uns = TYPE_UNSIGNED (vectype);
10512
10513 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10514 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10515 costly than signed. */
10516 if (code == FIX_TRUNC_EXPR && uns)
10517 {
10518 enum insn_code icode2;
10519
10520 intermediate_type
10521 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10522 interm_optab
10523 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10524 if (interm_optab != unknown_optab
4a00c761
JJ
10525 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10526 && insn_data[icode1].operand[0].mode
10527 == insn_data[icode2].operand[0].mode)
10528 {
10529 uns = false;
10530 optab1 = interm_optab;
10531 icode1 = icode2;
10532 }
10533 }
ebfd146a 10534
4a00c761
JJ
10535 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10536 intermediate steps in promotion sequence. We try
10537 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10538 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10539 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10540 {
10541 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10542 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10543 {
7cfb4d93 10544 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10545 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10546 return false;
3ae0661a
IE
10547 }
10548 else
10549 intermediate_type
10550 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10551 interm_optab
10552 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10553 optab_default);
10554 if (!interm_optab
10555 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10556 || insn_data[icode1].operand[0].mode != intermediate_mode
10557 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10558 == CODE_FOR_nothing))
10559 break;
10560
9771b263 10561 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10562 (*multi_step_cvt)++;
10563
10564 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10565 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10566 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10567 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10568
10569 prev_mode = intermediate_mode;
3ae0661a 10570 prev_type = intermediate_type;
4a00c761 10571 optab1 = interm_optab;
ebfd146a
IR
10572 }
10573
9771b263 10574 interm_types->release ();
4a00c761 10575 return false;
ebfd146a 10576}
7cfb4d93
RS
10577
10578/* Generate and return a statement that sets vector mask MASK such that
10579 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10580
10581gcall *
10582vect_gen_while (tree mask, tree start_index, tree end_index)
10583{
10584 tree cmp_type = TREE_TYPE (start_index);
10585 tree mask_type = TREE_TYPE (mask);
10586 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10587 cmp_type, mask_type,
10588 OPTIMIZE_FOR_SPEED));
10589 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10590 start_index, end_index,
10591 build_zero_cst (mask_type));
10592 gimple_call_set_lhs (call, mask);
10593 return call;
10594}
535e7c11
RS
10595
10596/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10597 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10598
10599tree
10600vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10601 tree end_index)
10602{
10603 tree tmp = make_ssa_name (mask_type);
10604 gcall *call = vect_gen_while (tmp, start_index, end_index);
10605 gimple_seq_add_stmt (seq, call);
10606 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10607}
1f3cb663
RS
10608
10609/* Try to compute the vector types required to vectorize STMT_INFO,
10610 returning true on success and false if vectorization isn't possible.
10611
10612 On success:
10613
10614 - Set *STMT_VECTYPE_OUT to:
10615 - NULL_TREE if the statement doesn't need to be vectorized;
10616 - boolean_type_node if the statement is a boolean operation whose
10617 vector type can only be determined once all the other vector types
10618 are known; and
10619 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10620
10621 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10622 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10623 statement does not help to determine the overall number of units. */
10624
10625bool
10626vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10627 tree *stmt_vectype_out,
10628 tree *nunits_vectype_out)
10629{
10630 gimple *stmt = stmt_info->stmt;
10631
10632 *stmt_vectype_out = NULL_TREE;
10633 *nunits_vectype_out = NULL_TREE;
10634
10635 if (gimple_get_lhs (stmt) == NULL_TREE
10636 /* MASK_STORE has no lhs, but is ok. */
10637 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10638 {
10639 if (is_a <gcall *> (stmt))
10640 {
10641 /* Ignore calls with no lhs. These must be calls to
10642 #pragma omp simd functions, and what vectorization factor
10643 it really needs can't be determined until
10644 vectorizable_simd_clone_call. */
10645 if (dump_enabled_p ())
10646 dump_printf_loc (MSG_NOTE, vect_location,
10647 "defer to SIMD clone analysis.\n");
10648 return true;
10649 }
10650
10651 if (dump_enabled_p ())
10652 {
10653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10654 "not vectorized: irregular stmt.");
10655 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10656 }
10657 return false;
10658 }
10659
10660 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10661 {
10662 if (dump_enabled_p ())
10663 {
10664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10665 "not vectorized: vector stmt in loop:");
10666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10667 }
10668 return false;
10669 }
10670
10671 tree vectype;
10672 tree scalar_type = NULL_TREE;
10673 if (STMT_VINFO_VECTYPE (stmt_info))
10674 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10675 else
10676 {
10677 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10678 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10679 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10680 else
10681 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10682
10683 /* Pure bool ops don't participate in number-of-units computation.
10684 For comparisons use the types being compared. */
10685 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10686 && is_gimple_assign (stmt)
10687 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10688 {
10689 *stmt_vectype_out = boolean_type_node;
10690
10691 tree rhs1 = gimple_assign_rhs1 (stmt);
10692 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10693 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10694 scalar_type = TREE_TYPE (rhs1);
10695 else
10696 {
10697 if (dump_enabled_p ())
10698 dump_printf_loc (MSG_NOTE, vect_location,
10699 "pure bool operation.\n");
10700 return true;
10701 }
10702 }
10703
10704 if (dump_enabled_p ())
10705 {
10706 dump_printf_loc (MSG_NOTE, vect_location,
10707 "get vectype for scalar type: ");
10708 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10709 dump_printf (MSG_NOTE, "\n");
10710 }
10711 vectype = get_vectype_for_scalar_type (scalar_type);
10712 if (!vectype)
10713 {
10714 if (dump_enabled_p ())
10715 {
10716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10717 "not vectorized: unsupported data-type ");
10718 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10719 scalar_type);
10720 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10721 }
10722 return false;
10723 }
10724
10725 if (!*stmt_vectype_out)
10726 *stmt_vectype_out = vectype;
10727
10728 if (dump_enabled_p ())
10729 {
10730 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10731 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10732 dump_printf (MSG_NOTE, "\n");
10733 }
10734 }
10735
10736 /* Don't try to compute scalar types if the stmt produces a boolean
10737 vector; use the existing vector type instead. */
10738 tree nunits_vectype;
10739 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10740 nunits_vectype = vectype;
10741 else
10742 {
10743 /* The number of units is set according to the smallest scalar
10744 type (or the largest vector size, but we only support one
10745 vector size per vectorization). */
10746 if (*stmt_vectype_out != boolean_type_node)
10747 {
10748 HOST_WIDE_INT dummy;
10749 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10750 }
10751 if (dump_enabled_p ())
10752 {
10753 dump_printf_loc (MSG_NOTE, vect_location,
10754 "get vectype for scalar type: ");
10755 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10756 dump_printf (MSG_NOTE, "\n");
10757 }
10758 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10759 }
10760 if (!nunits_vectype)
10761 {
10762 if (dump_enabled_p ())
10763 {
10764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10765 "not vectorized: unsupported data-type ");
10766 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10767 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10768 }
10769 return false;
10770 }
10771
10772 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10773 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10774 {
10775 if (dump_enabled_p ())
10776 {
10777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10778 "not vectorized: different sized vector "
10779 "types in statement, ");
10780 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10781 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10782 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10783 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10784 }
10785 return false;
10786 }
10787
10788 if (dump_enabled_p ())
10789 {
10790 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10791 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10792 dump_printf (MSG_NOTE, "\n");
10793
10794 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10795 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10796 dump_printf (MSG_NOTE, "\n");
10797 }
10798
10799 *nunits_vectype_out = nunits_vectype;
10800 return true;
10801}
10802
10803/* Try to determine the correct vector type for STMT_INFO, which is a
10804 statement that produces a scalar boolean result. Return the vector
10805 type on success, otherwise return NULL_TREE. */
10806
10807tree
10808vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10809{
10810 gimple *stmt = stmt_info->stmt;
10811 tree mask_type = NULL;
10812 tree vectype, scalar_type;
10813
10814 if (is_gimple_assign (stmt)
10815 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10816 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10817 {
10818 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10819 mask_type = get_mask_type_for_scalar_type (scalar_type);
10820
10821 if (!mask_type)
10822 {
10823 if (dump_enabled_p ())
10824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10825 "not vectorized: unsupported mask\n");
10826 return NULL_TREE;
10827 }
10828 }
10829 else
10830 {
10831 tree rhs;
10832 ssa_op_iter iter;
1f3cb663
RS
10833 enum vect_def_type dt;
10834
10835 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10836 {
894dd753 10837 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10838 {
10839 if (dump_enabled_p ())
10840 {
10841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10842 "not vectorized: can't compute mask type "
10843 "for statement, ");
10844 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10845 0);
10846 }
10847 return NULL_TREE;
10848 }
10849
10850 /* No vectype probably means external definition.
10851 Allow it in case there is another operand which
10852 allows to determine mask type. */
10853 if (!vectype)
10854 continue;
10855
10856 if (!mask_type)
10857 mask_type = vectype;
10858 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10859 TYPE_VECTOR_SUBPARTS (vectype)))
10860 {
10861 if (dump_enabled_p ())
10862 {
10863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10864 "not vectorized: different sized masks "
10865 "types in statement, ");
10866 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10867 mask_type);
10868 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10869 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10870 vectype);
10871 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10872 }
10873 return NULL_TREE;
10874 }
10875 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10876 != VECTOR_BOOLEAN_TYPE_P (vectype))
10877 {
10878 if (dump_enabled_p ())
10879 {
10880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10881 "not vectorized: mixed mask and "
10882 "nonmask vector types in statement, ");
10883 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10884 mask_type);
10885 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10886 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10887 vectype);
10888 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10889 }
10890 return NULL_TREE;
10891 }
10892 }
10893
10894 /* We may compare boolean value loaded as vector of integers.
10895 Fix mask_type in such case. */
10896 if (mask_type
10897 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10898 && gimple_code (stmt) == GIMPLE_ASSIGN
10899 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10900 mask_type = build_same_sized_truth_vector_type (mask_type);
10901 }
10902
10903 /* No mask_type should mean loop invariant predicate.
10904 This is probably a subject for optimization in if-conversion. */
10905 if (!mask_type && dump_enabled_p ())
10906 {
10907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10908 "not vectorized: can't compute mask type "
10909 "for statement, ");
10910 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10911 }
10912 return mask_type;
10913}