]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[08/46] Add vec_info::lookup_def
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
b28ead45
AH
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
894dd753 272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
bfaa08b7
RS
400 {
401 internal_fn ifn = gimple_call_internal_fn (stmt);
402 int mask_index = internal_fn_mask_index (ifn);
403 if (mask_index >= 0
404 && use == gimple_call_arg (stmt, mask_index))
405 return true;
f307441a
RS
406 int stored_value_index = internal_fn_stored_value_index (ifn);
407 if (stored_value_index >= 0
408 && use == gimple_call_arg (stmt, stored_value_index))
409 return true;
bfaa08b7
RS
410 if (internal_gather_scatter_fn_p (ifn)
411 && use == gimple_call_arg (stmt, 1))
412 return true;
bfaa08b7 413 }
5ce9450f
JJ
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b28ead45 458process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 459 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
355fe088 466 gimple *def_stmt;
ebfd146a
IR
467 enum vect_def_type dt;
468
b8698a0f 469 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 470 that are used for address computation are not considered relevant. */
aec7ae7d 471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
472 return true;
473
894dd753 474 if (!vect_is_simple_use (use, loop_vinfo, &dt, &def_stmt))
b8698a0f 475 {
73fbfcad 476 if (dump_enabled_p ())
78c60e3d 477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 478 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
479 return false;
480 }
481
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
484
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
487 {
73fbfcad 488 if (dump_enabled_p ())
e645e942 489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
490 return true;
491 }
492
b8698a0f
L
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
505 {
73fbfcad 506 if (dump_enabled_p ())
78c60e3d 507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 508 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 511 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
512 return true;
513 }
514
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
517 d = def_stmt
518 inner-loop:
519 stmt # use (d)
520 outer-loop-tail-bb:
521 ... */
522 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
523 {
73fbfcad 524 if (dump_enabled_p ())
78c60e3d 525 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 526 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 527
ebfd146a
IR
528 switch (relevant)
529 {
8644a673 530 case vect_unused_in_scope:
7c5222ff
IR
531 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
532 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 533 break;
7c5222ff 534
ebfd146a 535 case vect_used_in_outer_by_reduction:
7c5222ff 536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
537 relevant = vect_used_by_reduction;
538 break;
7c5222ff 539
ebfd146a 540 case vect_used_in_outer:
7c5222ff 541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 542 relevant = vect_used_in_scope;
ebfd146a 543 break;
7c5222ff 544
8644a673 545 case vect_used_in_scope:
ebfd146a
IR
546 break;
547
548 default:
549 gcc_unreachable ();
b8698a0f 550 }
ebfd146a
IR
551 }
552
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
555 ...
556 inner-loop:
557 d = def_stmt
06066f92 558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
559 stmt # use (d) */
560 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
561 {
73fbfcad 562 if (dump_enabled_p ())
78c60e3d 563 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 564 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 565
ebfd146a
IR
566 switch (relevant)
567 {
8644a673 568 case vect_unused_in_scope:
b8698a0f 569 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 570 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 571 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
572 break;
573
ebfd146a 574 case vect_used_by_reduction:
b28ead45 575 case vect_used_only_live:
ebfd146a
IR
576 relevant = vect_used_in_outer_by_reduction;
577 break;
578
8644a673 579 case vect_used_in_scope:
ebfd146a
IR
580 relevant = vect_used_in_outer;
581 break;
582
583 default:
584 gcc_unreachable ();
585 }
586 }
643a9684
RB
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
590 of course. */
643a9684
RB
591 else if (gimple_code (stmt) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 593 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
594 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
595 == use))
596 {
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location,
599 "induction value on backedge.\n");
600 return true;
601 }
602
ebfd146a 603
b28ead45 604 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
605 return true;
606}
607
608
609/* Function vect_mark_stmts_to_be_vectorized.
610
611 Not all stmts in the loop need to be vectorized. For example:
612
613 for i...
614 for j...
615 1. T0 = i + j
616 2. T1 = a[T0]
617
618 3. j = j + 1
619
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
622
623 This pass detects such stmts. */
624
625bool
626vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
627{
ebfd146a
IR
628 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
629 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
630 unsigned int nbbs = loop->num_nodes;
631 gimple_stmt_iterator si;
355fe088 632 gimple *stmt;
ebfd146a
IR
633 unsigned int i;
634 stmt_vec_info stmt_vinfo;
635 basic_block bb;
355fe088 636 gimple *phi;
ebfd146a 637 bool live_p;
b28ead45 638 enum vect_relevant relevant;
ebfd146a 639
adac3a68 640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 641
355fe088 642 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
643
644 /* 1. Init worklist. */
645 for (i = 0; i < nbbs; i++)
646 {
647 bb = bbs[i];
648 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 649 {
ebfd146a 650 phi = gsi_stmt (si);
73fbfcad 651 if (dump_enabled_p ())
ebfd146a 652 {
78c60e3d
SS
653 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
655 }
656
657 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 658 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
659 }
660 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
661 {
662 stmt = gsi_stmt (si);
73fbfcad 663 if (dump_enabled_p ())
ebfd146a 664 {
78c60e3d
SS
665 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 667 }
ebfd146a
IR
668
669 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 670 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
671 }
672 }
673
674 /* 2. Process_worklist */
9771b263 675 while (worklist.length () > 0)
ebfd146a
IR
676 {
677 use_operand_p use_p;
678 ssa_op_iter iter;
679
9771b263 680 stmt = worklist.pop ();
73fbfcad 681 if (dump_enabled_p ())
ebfd146a 682 {
78c60e3d
SS
683 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
685 }
686
b8698a0f 687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 of STMT. */
ebfd146a
IR
690 stmt_vinfo = vinfo_for_stmt (stmt);
691 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 692
b28ead45
AH
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
695
696 One exception is when STMT has been identified as defining a reduction
b28ead45 697 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 698 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 699 those that are used by a reduction computation, and those that are
ff802fa1 700 (also) used by a regular computation. This allows us later on to
b8698a0f 701 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 702 order of the results that they produce does not have to be kept. */
ebfd146a 703
b28ead45 704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 705 {
06066f92 706 case vect_reduction_def:
b28ead45
AH
707 gcc_assert (relevant != vect_unused_in_scope);
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_in_scope
710 && relevant != vect_used_by_reduction
711 && relevant != vect_used_only_live)
06066f92 712 {
b28ead45
AH
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of reduction.\n");
716 return false;
06066f92 717 }
06066f92 718 break;
b8698a0f 719
06066f92 720 case vect_nested_cycle:
b28ead45
AH
721 if (relevant != vect_unused_in_scope
722 && relevant != vect_used_in_outer_by_reduction
723 && relevant != vect_used_in_outer)
06066f92 724 {
73fbfcad 725 if (dump_enabled_p ())
78c60e3d 726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 727 "unsupported use of nested cycle.\n");
7c5222ff 728
06066f92
IR
729 return false;
730 }
b8698a0f
L
731 break;
732
06066f92 733 case vect_double_reduction_def:
b28ead45
AH
734 if (relevant != vect_unused_in_scope
735 && relevant != vect_used_by_reduction
736 && relevant != vect_used_only_live)
06066f92 737 {
73fbfcad 738 if (dump_enabled_p ())
78c60e3d 739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 740 "unsupported use of double reduction.\n");
7c5222ff 741
7c5222ff 742 return false;
06066f92 743 }
b8698a0f 744 break;
7c5222ff 745
06066f92
IR
746 default:
747 break;
7c5222ff 748 }
b8698a0f 749
aec7ae7d 750 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
751 {
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt))
756 {
69d2aade
JJ
757 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
758 tree op = gimple_assign_rhs1 (stmt);
759
760 i = 1;
761 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
762 {
763 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 764 relevant, &worklist, false)
69d2aade 765 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 766 relevant, &worklist, false))
566d377a 767 return false;
69d2aade
JJ
768 i = 2;
769 }
770 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 771 {
69d2aade 772 op = gimple_op (stmt, i);
afbe6325 773 if (TREE_CODE (op) == SSA_NAME
b28ead45 774 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 775 &worklist, false))
07687835 776 return false;
9d5e7640
IR
777 }
778 }
779 else if (is_gimple_call (stmt))
780 {
781 for (i = 0; i < gimple_call_num_args (stmt); i++)
782 {
783 tree arg = gimple_call_arg (stmt, i);
b28ead45 784 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 785 &worklist, false))
07687835 786 return false;
9d5e7640
IR
787 }
788 }
789 }
790 else
791 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
792 {
793 tree op = USE_FROM_PTR (use_p);
b28ead45 794 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 795 &worklist, false))
07687835 796 return false;
9d5e7640 797 }
aec7ae7d 798
3bab6342 799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 800 {
134c85ca
RS
801 gather_scatter_info gs_info;
802 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
803 gcc_unreachable ();
804 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
805 &worklist, true))
566d377a 806 return false;
aec7ae7d 807 }
ebfd146a
IR
808 } /* while worklist */
809
ebfd146a
IR
810 return true;
811}
812
68435eb2
RB
813/* Compute the prologue cost for invariant or constant operands. */
814
815static unsigned
816vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
817 unsigned opno, enum vect_def_type dt,
818 stmt_vector_for_cost *cost_vec)
819{
820 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
821 tree op = gimple_op (stmt, opno);
822 unsigned prologue_cost = 0;
823
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
828 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
829 unsigned num_vects_to_check;
830 unsigned HOST_WIDE_INT const_nunits;
831 unsigned nelt_limit;
832 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
833 && ! multiple_p (const_nunits, group_size))
834 {
835 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
836 nelt_limit = const_nunits;
837 }
838 else
839 {
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check = 1;
844 nelt_limit = group_size;
845 }
846 tree elt = NULL_TREE;
847 unsigned nelt = 0;
848 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
849 {
850 unsigned si = j % group_size;
851 if (nelt == 0)
852 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
857 opno))
858 elt = NULL_TREE;
859 nelt++;
860 if (nelt == nelt_limit)
861 {
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost += record_stmt_cost
865 (cost_vec, 1,
866 dt == vect_external_def
867 ? (elt ? scalar_to_vec : vec_construct)
868 : vector_load,
869 stmt_info, 0, vect_prologue);
870 nelt = 0;
871 }
872 }
873
874 return prologue_cost;
875}
ebfd146a 876
b8698a0f 877/* Function vect_model_simple_cost.
ebfd146a 878
b8698a0f 879 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
882
68435eb2 883static void
b8698a0f 884vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 885 enum vect_def_type *dt,
4fc5ebf1 886 int ndts,
68435eb2
RB
887 slp_tree node,
888 stmt_vector_for_cost *cost_vec)
ebfd146a 889{
92345349 890 int inside_cost = 0, prologue_cost = 0;
ebfd146a 891
68435eb2 892 gcc_assert (cost_vec != NULL);
ebfd146a 893
68435eb2
RB
894 /* ??? Somehow we need to fix this at the callers. */
895 if (node)
896 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
897
898 if (node)
899 {
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
902 re-engineered. */
903 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
904 tree lhs = gimple_get_lhs (stmt);
905 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
906 {
907 tree op = gimple_op (stmt, i);
68435eb2
RB
908 enum vect_def_type dt;
909 if (!op || op == lhs)
910 continue;
894dd753 911 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
912 && (dt == vect_constant_def || dt == vect_external_def))
913 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
914 i, dt, cost_vec);
915 }
916 }
917 else
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
920 cost model. */
921 for (int i = 0; i < ndts; i++)
922 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
923 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
925
926 /* Adjust for two-operator SLP nodes. */
927 if (node && SLP_TREE_TWO_OPERATORS (node))
928 {
929 ncopies *= 2;
930 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
931 stmt_info, 0, vect_body);
932 }
c3e7ee41
BS
933
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
935 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
936 stmt_info, 0, vect_body);
c3e7ee41 937
73fbfcad 938 if (dump_enabled_p ())
78c60e3d
SS
939 dump_printf_loc (MSG_NOTE, vect_location,
940 "vect_model_simple_cost: inside_cost = %d, "
e645e942 941 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
942}
943
944
8bd37302
BS
945/* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
949
950static void
951vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
952 enum vect_def_type *dt, int pwr,
953 stmt_vector_for_cost *cost_vec)
8bd37302
BS
954{
955 int i, tmp;
92345349 956 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 957
8bd37302
BS
958 for (i = 0; i < pwr + 1; i++)
959 {
960 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
961 (i + 1) : i;
68435eb2
RB
962 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
963 vec_promote_demote, stmt_info, 0,
964 vect_body);
8bd37302
BS
965 }
966
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i = 0; i < 2; i++)
92345349 969 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
970 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
971 stmt_info, 0, vect_prologue);
8bd37302 972
73fbfcad 973 if (dump_enabled_p ())
78c60e3d
SS
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
977}
978
ebfd146a
IR
979/* Function vect_model_store_cost
980
0d0293ac
MM
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
ebfd146a 983
68435eb2 984static void
b8698a0f 985vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 986 enum vect_def_type dt,
2de001ee 987 vect_memory_access_type memory_access_type,
9ce4345a 988 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 989 stmt_vector_for_cost *cost_vec)
ebfd146a 990{
92345349 991 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
992 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
993 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 994
68435eb2
RB
995 /* ??? Somehow we need to fix this at the callers. */
996 if (slp_node)
997 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
998
9ce4345a 999 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1000 {
1001 if (slp_node)
1002 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1003 1, dt, cost_vec);
1004 else
1005 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1006 stmt_info, 0, vect_prologue);
1007 }
ebfd146a 1008
892a981f
RS
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node && grouped_access_p)
57c454d2 1012 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1013
892a981f
RS
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1018
272c6793 1019 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1021 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1022 include the cost of the permutes. */
1023 if (first_stmt_p
1024 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1025 {
e1377713
ES
1026 /* Uses a high and low interleave or shuffle operations for each
1027 needed permute. */
2c53b149 1028 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1029 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1030 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1031 stmt_info, 0, vect_body);
ebfd146a 1032
73fbfcad 1033 if (dump_enabled_p ())
78c60e3d 1034 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1035 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1036 group_size);
ebfd146a
IR
1037 }
1038
cee62fee 1039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1040 /* Costs of the stores. */
067bc855
RB
1041 if (memory_access_type == VMAT_ELEMENTWISE
1042 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1043 {
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1046 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1047 ncopies * assumed_nunits,
1048 scalar_store, stmt_info, 0, vect_body);
1049 }
f2e2a985 1050 else
57c454d2 1051 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1052
2de001ee
RS
1053 if (memory_access_type == VMAT_ELEMENTWISE
1054 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1055 {
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1058 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1059 ncopies * assumed_nunits,
1060 vec_to_scalar, stmt_info, 0, vect_body);
1061 }
cee62fee 1062
73fbfcad 1063 if (dump_enabled_p ())
78c60e3d
SS
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: inside_cost = %d, "
e645e942 1066 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1067}
1068
1069
720f5239
IR
1070/* Calculate cost of DR's memory access. */
1071void
57c454d2 1072vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1073 unsigned int *inside_cost,
92345349 1074 stmt_vector_for_cost *body_cost_vec)
720f5239 1075{
57c454d2 1076 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1077 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1078
1079 switch (alignment_support_scheme)
1080 {
1081 case dr_aligned:
1082 {
92345349
BS
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 vector_store, stmt_info, 0,
1085 vect_body);
720f5239 1086
73fbfcad 1087 if (dump_enabled_p ())
78c60e3d 1088 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1089 "vect_model_store_cost: aligned.\n");
720f5239
IR
1090 break;
1091 }
1092
1093 case dr_unaligned_supported:
1094 {
720f5239 1095 /* Here, we assign an additional cost for the unaligned store. */
92345349 1096 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1097 unaligned_store, stmt_info,
92345349 1098 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1099 if (dump_enabled_p ())
78c60e3d
SS
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: unaligned supported by "
e645e942 1102 "hardware.\n");
720f5239
IR
1103 break;
1104 }
1105
38eec4c6
UW
1106 case dr_unaligned_unsupported:
1107 {
1108 *inside_cost = VECT_MAX_COST;
1109
73fbfcad 1110 if (dump_enabled_p ())
78c60e3d 1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1112 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1113 break;
1114 }
1115
720f5239
IR
1116 default:
1117 gcc_unreachable ();
1118 }
1119}
1120
1121
ebfd146a
IR
1122/* Function vect_model_load_cost
1123
892a981f
RS
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1126 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1127 access scheme chosen. */
1128
68435eb2
RB
1129static void
1130vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1131 vect_memory_access_type memory_access_type,
68435eb2 1132 slp_instance instance,
2de001ee 1133 slp_tree slp_node,
68435eb2 1134 stmt_vector_for_cost *cost_vec)
ebfd146a 1135{
892a981f 1136 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
92345349 1137 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1139
68435eb2
RB
1140 gcc_assert (cost_vec);
1141
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1145
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1147 {
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
2c53b149 1150 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 /* Record the cost for the permutation. */
1153 unsigned n_perms;
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1156 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1157 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1158 slp_vf, instance, true,
1159 &n_perms);
1160 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1161 stmt_info, 0, vect_body);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
2c53b149 1164 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
68435eb2
RB
1165 bitmap_clear (perm);
1166 for (unsigned i = 0;
1167 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1168 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1169 ncopies = 0;
1170 bool load_seen = false;
2c53b149 1171 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
68435eb2
RB
1172 {
1173 if (i % assumed_nunits == 0)
1174 {
1175 if (load_seen)
1176 ncopies++;
1177 load_seen = false;
1178 }
1179 if (bitmap_bit_p (perm, i))
1180 load_seen = true;
1181 }
1182 if (load_seen)
1183 ncopies++;
1184 gcc_assert (ncopies
2c53b149 1185 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
68435eb2
RB
1186 + assumed_nunits - 1) / assumed_nunits);
1187 }
1188
892a981f
RS
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node && grouped_access_p)
57c454d2 1192 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1193
892a981f
RS
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1198
272c6793 1199 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1201 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1205 {
2c23db6d
ES
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
2c53b149 1208 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
ebfd146a 1212
73fbfcad 1213 if (dump_enabled_p ())
e645e942
TJ
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1216 group_size);
ebfd146a
IR
1217 }
1218
1219 /* The loads themselves. */
067bc855
RB
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1222 {
a21892ad
BS
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1225 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1226 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1227 ncopies * assumed_nunits,
92345349 1228 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1229 }
1230 else
57c454d2 1231 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1232 &inside_cost, &prologue_cost,
68435eb2 1233 cost_vec, cost_vec, true);
2de001ee
RS
1234 if (memory_access_type == VMAT_ELEMENTWISE
1235 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1236 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1237 stmt_info, 0, vect_body);
720f5239 1238
73fbfcad 1239 if (dump_enabled_p ())
78c60e3d
SS
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "vect_model_load_cost: inside_cost = %d, "
e645e942 1242 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1243}
1244
1245
1246/* Calculate cost of DR's memory access. */
1247void
57c454d2 1248vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1249 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1250 unsigned int *prologue_cost,
1251 stmt_vector_for_cost *prologue_cost_vec,
1252 stmt_vector_for_cost *body_cost_vec,
1253 bool record_prologue_costs)
720f5239 1254{
57c454d2 1255 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1256 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1257
1258 switch (alignment_support_scheme)
ebfd146a
IR
1259 {
1260 case dr_aligned:
1261 {
92345349
BS
1262 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1263 stmt_info, 0, vect_body);
ebfd146a 1264
73fbfcad 1265 if (dump_enabled_p ())
78c60e3d 1266 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1267 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1268
1269 break;
1270 }
1271 case dr_unaligned_supported:
1272 {
720f5239 1273 /* Here, we assign an additional cost for the unaligned load. */
92345349 1274 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1275 unaligned_load, stmt_info,
92345349 1276 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1277
73fbfcad 1278 if (dump_enabled_p ())
78c60e3d
SS
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "vect_model_load_cost: unaligned supported by "
e645e942 1281 "hardware.\n");
ebfd146a
IR
1282
1283 break;
1284 }
1285 case dr_explicit_realign:
1286 {
92345349
BS
1287 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1288 vector_load, stmt_info, 0, vect_body);
1289 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1290 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1291
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
92345349 1294 prologue costs. */
ebfd146a 1295 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1296 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1297 stmt_info, 0, vect_body);
ebfd146a 1298
73fbfcad 1299 if (dump_enabled_p ())
e645e942
TJ
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "vect_model_load_cost: explicit realign\n");
8bd37302 1302
ebfd146a
IR
1303 break;
1304 }
1305 case dr_explicit_realign_optimized:
1306 {
73fbfcad 1307 if (dump_enabled_p ())
e645e942 1308 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1309 "vect_model_load_cost: unaligned software "
e645e942 1310 "pipelined.\n");
ebfd146a
IR
1311
1312 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1313 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1314 if this is an access in a group of loads, which provide grouped
ebfd146a 1315 access, then the above cost should only be considered for one
ff802fa1 1316 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1317 and a realignment op. */
1318
92345349 1319 if (add_realign_cost && record_prologue_costs)
ebfd146a 1320 {
92345349
BS
1321 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1322 vector_stmt, stmt_info,
1323 0, vect_prologue);
ebfd146a 1324 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1325 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1326 vector_stmt, stmt_info,
1327 0, vect_prologue);
ebfd146a
IR
1328 }
1329
92345349
BS
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1331 stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1333 stmt_info, 0, vect_body);
8bd37302 1334
73fbfcad 1335 if (dump_enabled_p ())
78c60e3d 1336 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1337 "vect_model_load_cost: explicit realign optimized"
1338 "\n");
8bd37302 1339
ebfd146a
IR
1340 break;
1341 }
1342
38eec4c6
UW
1343 case dr_unaligned_unsupported:
1344 {
1345 *inside_cost = VECT_MAX_COST;
1346
73fbfcad 1347 if (dump_enabled_p ())
78c60e3d 1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1349 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1350 break;
1351 }
1352
ebfd146a
IR
1353 default:
1354 gcc_unreachable ();
1355 }
ebfd146a
IR
1356}
1357
418b7df3
RG
1358/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1360
418b7df3 1361static void
355fe088 1362vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1363{
ebfd146a 1364 if (gsi)
418b7df3 1365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1366 else
1367 {
418b7df3 1368 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1370
a70d6342
IR
1371 if (loop_vinfo)
1372 {
1373 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1374 basic_block new_bb;
1375 edge pe;
a70d6342
IR
1376
1377 if (nested_in_vect_loop_p (loop, stmt))
1378 loop = loop->inner;
b8698a0f 1379
a70d6342 1380 pe = loop_preheader_edge (loop);
418b7df3 1381 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1382 gcc_assert (!new_bb);
1383 }
1384 else
1385 {
1386 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1387 basic_block bb;
1388 gimple_stmt_iterator gsi_bb_start;
1389
1390 gcc_assert (bb_vinfo);
1391 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1392 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1393 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1394 }
ebfd146a
IR
1395 }
1396
73fbfcad 1397 if (dump_enabled_p ())
ebfd146a 1398 {
78c60e3d
SS
1399 dump_printf_loc (MSG_NOTE, vect_location,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1402 }
418b7df3
RG
1403}
1404
1405/* Function vect_init_vector.
ebfd146a 1406
5467ee52
RG
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
418b7df3
RG
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1414
1415tree
355fe088 1416vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1417{
355fe088 1418 gimple *init_stmt;
418b7df3
RG
1419 tree new_temp;
1420
e412ece4
RB
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1423 {
e412ece4
RB
1424 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1425 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1426 {
5a308cf1
IE
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type))
1430 {
b3d51f23
IE
1431 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1432 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1433
1434 if (CONSTANT_CLASS_P (val))
1435 val = integer_zerop (val) ? false_val : true_val;
1436 else
1437 {
1438 new_temp = make_ssa_name (TREE_TYPE (type));
1439 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1440 val, true_val, false_val);
1441 vect_init_vector_1 (stmt, init_stmt, gsi);
1442 val = new_temp;
1443 }
1444 }
1445 else if (CONSTANT_CLASS_P (val))
42fd8198 1446 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1447 else
1448 {
b731b390 1449 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1451 init_stmt = gimple_build_assign (new_temp,
1452 fold_build1 (VIEW_CONVERT_EXPR,
1453 TREE_TYPE (type),
1454 val));
1455 else
1456 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1457 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1458 val = new_temp;
418b7df3
RG
1459 }
1460 }
5467ee52 1461 val = build_vector_from_val (type, val);
418b7df3
RG
1462 }
1463
0e22bb5a
RB
1464 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1465 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1466 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1467 return new_temp;
ebfd146a
IR
1468}
1469
c83a894c 1470/* Function vect_get_vec_def_for_operand_1.
a70d6342 1471
c83a894c
AH
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1474
1475tree
c83a894c 1476vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1477{
1478 tree vec_oprnd;
355fe088 1479 gimple *vec_stmt;
ebfd146a 1480 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1481
1482 switch (dt)
1483 {
81c40241 1484 /* operand is a constant or a loop invariant. */
ebfd146a 1485 case vect_constant_def:
81c40241 1486 case vect_external_def:
c83a894c
AH
1487 /* Code should use vect_get_vec_def_for_operand. */
1488 gcc_unreachable ();
ebfd146a 1489
81c40241 1490 /* operand is defined inside the loop. */
8644a673 1491 case vect_internal_def:
ebfd146a 1492 {
ebfd146a
IR
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1495
ebfd146a 1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1497 /* Get vectorized pattern statement. */
1498 if (!vec_stmt
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info))
1501 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1503 gcc_assert (vec_stmt);
1504 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1505 vec_oprnd = PHI_RESULT (vec_stmt);
1506 else if (is_gimple_call (vec_stmt))
1507 vec_oprnd = gimple_call_lhs (vec_stmt);
1508 else
1509 vec_oprnd = gimple_assign_lhs (vec_stmt);
1510 return vec_oprnd;
1511 }
1512
c78e3652 1513 /* operand is defined by a loop header phi. */
ebfd146a 1514 case vect_reduction_def:
06066f92 1515 case vect_double_reduction_def:
7c5222ff 1516 case vect_nested_cycle:
ebfd146a
IR
1517 case vect_induction_def:
1518 {
1519 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1520
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info = vinfo_for_stmt (def_stmt);
1523 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1524 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1525 vec_oprnd = PHI_RESULT (vec_stmt);
1526 else
1527 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1528 return vec_oprnd;
1529 }
1530
1531 default:
1532 gcc_unreachable ();
1533 }
1534}
1535
1536
c83a894c
AH
1537/* Function vect_get_vec_def_for_operand.
1538
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1541
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1544
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1548
1549tree
1550vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1551{
1552 gimple *def_stmt;
1553 enum vect_def_type dt;
1554 bool is_simple_use;
1555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1557
1558 if (dump_enabled_p ())
1559 {
1560 dump_printf_loc (MSG_NOTE, vect_location,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1563 dump_printf (MSG_NOTE, "\n");
1564 }
1565
894dd753 1566 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, &def_stmt);
c83a894c
AH
1567 gcc_assert (is_simple_use);
1568 if (def_stmt && dump_enabled_p ())
1569 {
1570 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1572 }
1573
1574 if (dt == vect_constant_def || dt == vect_external_def)
1575 {
1576 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1577 tree vector_type;
1578
1579 if (vectype)
1580 vector_type = vectype;
2568d8a1 1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1583 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1584 else
1585 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1586
1587 gcc_assert (vector_type);
1588 return vect_init_vector (stmt, op, vector_type, NULL);
1589 }
1590 else
1591 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1592}
1593
1594
ebfd146a
IR
1595/* Function vect_get_vec_def_for_stmt_copy
1596
ff802fa1 1597 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1600 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1602 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1603 DT is the type of the vector def VEC_OPRND.
1604
1605 Context:
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1608 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1609 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1613 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1616
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1618
ebfd146a
IR
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1622 VS1.3: vx.3 = memref3
ebfd146a
IR
1623
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1628
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
b8698a0f
L
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1633 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1634 returns the vector-def 'vx.0'.
1635
b8698a0f
L
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1640
b8698a0f
L
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1650
1651tree
1652vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1653{
355fe088 1654 gimple *vec_stmt_for_operand;
ebfd146a
IR
1655 stmt_vec_info def_stmt_info;
1656
1657 /* Do nothing; can reuse same def. */
8644a673 1658 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1659 return vec_oprnd;
1660
1661 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1662 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1663 gcc_assert (def_stmt_info);
1664 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1665 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1666 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1667 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1668 else
1669 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1670 return vec_oprnd;
1671}
1672
1673
1674/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1676
c78e3652 1677void
b8698a0f 1678vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1679 vec<tree> *vec_oprnds0,
1680 vec<tree> *vec_oprnds1)
ebfd146a 1681{
9771b263 1682 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1683
1684 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1685 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1686
9771b263 1687 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1688 {
9771b263 1689 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1691 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1692 }
1693}
1694
1695
c78e3652 1696/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1697
c78e3652 1698void
355fe088 1699vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1700 vec<tree> *vec_oprnds0,
1701 vec<tree> *vec_oprnds1,
306b0c92 1702 slp_tree slp_node)
ebfd146a
IR
1703{
1704 if (slp_node)
d092494c
IR
1705 {
1706 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1707 auto_vec<tree> ops (nops);
1708 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1709
9771b263 1710 ops.quick_push (op0);
d092494c 1711 if (op1)
9771b263 1712 ops.quick_push (op1);
d092494c 1713
306b0c92 1714 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1715
37b5ec8f 1716 *vec_oprnds0 = vec_defs[0];
d092494c 1717 if (op1)
37b5ec8f 1718 *vec_oprnds1 = vec_defs[1];
d092494c 1719 }
ebfd146a
IR
1720 else
1721 {
1722 tree vec_oprnd;
1723
9771b263 1724 vec_oprnds0->create (1);
81c40241 1725 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1726 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1727
1728 if (op1)
1729 {
9771b263 1730 vec_oprnds1->create (1);
81c40241 1731 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1732 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1733 }
1734 }
1735}
1736
bb6c2b68
RS
1737/* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1740
1741static void
1742vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1743{
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746
4fbeb363 1747 vinfo->add_stmt (vec_stmt);
bb6c2b68
RS
1748
1749 if (dump_enabled_p ())
1750 {
1751 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1753 }
1754
1755 gimple_set_location (vec_stmt, gimple_location (stmt));
1756
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr = lookup_stmt_eh_lp (stmt);
1761 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1762 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1763}
1764
1765/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1767
1768void
1769vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1770{
1771 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1772
1773 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1774 gsi_replace (&gsi, vec_stmt, false);
1775
1776 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1777}
ebfd146a
IR
1778
1779/* Function vect_finish_stmt_generation.
1780
1781 Insert a new stmt. */
1782
1783void
355fe088 1784vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1785 gimple_stmt_iterator *gsi)
1786{
ebfd146a
IR
1787 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1788
54e8e2c3
RG
1789 if (!gsi_end_p (*gsi)
1790 && gimple_has_mem_ops (vec_stmt))
1791 {
355fe088 1792 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1793 tree vuse = gimple_vuse (at_stmt);
1794 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1795 {
1796 tree vdef = gimple_vdef (at_stmt);
1797 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1803 && ((is_gimple_assign (vec_stmt)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1805 || (is_gimple_call (vec_stmt)
1806 && !(gimple_call_flags (vec_stmt)
1807 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1808 {
1809 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1810 gimple_set_vdef (vec_stmt, new_vdef);
1811 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1812 }
1813 }
1814 }
ebfd146a 1815 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1816 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1817}
1818
70439f0d
RS
1819/* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1823
70439f0d
RS
1824static internal_fn
1825vectorizable_internal_function (combined_fn cfn, tree fndecl,
1826 tree vectype_out, tree vectype_in)
ebfd146a 1827{
70439f0d
RS
1828 internal_fn ifn;
1829 if (internal_fn_p (cfn))
1830 ifn = as_internal_fn (cfn);
1831 else
1832 ifn = associated_internal_fn (fndecl);
1833 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1834 {
1835 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1836 if (info.vectorizable)
1837 {
1838 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1839 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1840 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1841 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1842 return ifn;
1843 }
1844 }
1845 return IFN_LAST;
ebfd146a
IR
1846}
1847
5ce9450f 1848
355fe088 1849static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1850 gimple_stmt_iterator *);
1851
7cfb4d93
RS
1852/* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1856
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
bfaa08b7
RS
1861 If the access is a gather load or scatter store, GS_INFO describes
1862 its arguments.
7cfb4d93
RS
1863
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1866
1867static void
1868check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1869 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1870 vect_memory_access_type memory_access_type,
1871 gather_scatter_info *gs_info)
7cfb4d93
RS
1872{
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type == VMAT_INVARIANT)
1875 return;
1876
1877 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1878 machine_mode vecmode = TYPE_MODE (vectype);
1879 bool is_load = (vls_type == VLS_LOAD);
1880 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1881 {
1882 if (is_load
1883 ? !vect_load_lanes_supported (vectype, group_size, true)
1884 : !vect_store_lanes_supported (vectype, group_size, true))
1885 {
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1892 return;
1893 }
1894 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1895 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1896 return;
1897 }
1898
bfaa08b7
RS
1899 if (memory_access_type == VMAT_GATHER_SCATTER)
1900 {
f307441a
RS
1901 internal_fn ifn = (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1904 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1905 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1906 gs_info->memory_type,
1907 TYPE_SIGN (offset_type),
1908 gs_info->scale))
1909 {
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
f307441a 1914 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1916 return;
1917 }
1918 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1919 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1920 return;
1921 }
1922
7cfb4d93
RS
1923 if (memory_access_type != VMAT_CONTIGUOUS
1924 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1925 {
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1933 return;
1934 }
1935
1936 machine_mode mask_mode;
1937 if (!(targetm.vectorize.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode),
1939 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1940 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1941 {
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1946 " store.\n");
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1948 return;
1949 }
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1954 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1955 unsigned int nvectors;
1956 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1957 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1958 else
1959 gcc_unreachable ();
1960}
1961
1962/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1966
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1969
1970static tree
1971prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1972 gimple_stmt_iterator *gsi)
1973{
1974 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1975 if (!loop_mask)
1976 return vec_mask;
1977
1978 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1979 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1980 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1981 vec_mask, loop_mask);
1982 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1983 return and_res;
1984}
1985
429ef523
RS
1986/* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1989
1990 { 0, X, X*2, X*3, ... }
1991
1992 without loss of precision, where X is STMT's DR_STEP.
1993
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1996
1997static bool
1998vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1999 bool masked_p,
2000 gather_scatter_info *gs_info)
2001{
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2004 tree step = DR_STEP (dr);
2005 if (TREE_CODE (step) != INTEGER_CST)
2006 {
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE, vect_location,
2010 "cannot truncate variable step.\n");
2011 return false;
2012 }
2013
2014 /* Get the number of bits in an element. */
2015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2016 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2017 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2018
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2022
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2025 widest_int max_iters;
2026 if (max_loop_iterations (loop, &max_iters)
2027 && max_iters < count)
2028 count = max_iters.to_shwi ();
2029
2030 /* Try scales of 1 and the element size. */
2031 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
4a669ac3 2032 wi::overflow_type overflow = wi::OVF_NONE;
429ef523
RS
2033 for (int i = 0; i < 2; ++i)
2034 {
2035 int scale = scales[i];
2036 widest_int factor;
2037 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2038 continue;
2039
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
4a669ac3
AH
2042 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2043 if (overflow)
429ef523
RS
2044 continue;
2045 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2046 if (wi::min_precision (range, sign) > element_bits)
2047 {
4a669ac3 2048 overflow = wi::OVF_UNKNOWN;
429ef523
RS
2049 continue;
2050 }
2051
2052 /* See whether the target supports the operation. */
2053 tree memory_type = TREE_TYPE (DR_REF (dr));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2055 memory_type, element_bits, sign, scale,
2056 &gs_info->ifn, &gs_info->element_type))
2057 continue;
2058
2059 tree offset_type = build_nonstandard_integer_type (element_bits,
2060 sign == UNSIGNED);
2061
2062 gs_info->decl = NULL_TREE;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info->base = NULL_TREE;
2066 gs_info->offset = fold_convert (offset_type, step);
929b4411 2067 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2068 gs_info->offset_vectype = NULL_TREE;
2069 gs_info->scale = scale;
2070 gs_info->memory_type = memory_type;
2071 return true;
2072 }
2073
4a669ac3 2074 if (overflow && dump_enabled_p ())
429ef523
RS
2075 dump_printf_loc (MSG_NOTE, vect_location,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits);
2078
2079 return false;
2080}
2081
ab2fc782
RS
2082/* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2086 operation. */
ab2fc782
RS
2087
2088static bool
2089vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2090 bool masked_p,
ab2fc782
RS
2091 gather_scatter_info *gs_info)
2092{
2093 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2094 || gs_info->decl)
429ef523
RS
2095 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2096 masked_p, gs_info);
ab2fc782
RS
2097
2098 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2099 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2100 tree offset_type = TREE_TYPE (gs_info->offset);
2101 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2102
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits >= offset_bits);
2105
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits > offset_bits)
2109 {
2110 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2111 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2112 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2113 }
2114
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE, vect_location,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info->scale);
2119
2120 return true;
2121}
2122
62da9e14
RS
2123/* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2126
2127static int
2128compare_step_with_zero (gimple *stmt)
2129{
2130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2131 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2132 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2133 size_zero_node);
62da9e14
RS
2134}
2135
2136/* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2138
2139static tree
2140perm_mask_for_reverse (tree vectype)
2141{
928686b1 2142 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2143
d980067b
RS
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder sel (nunits, 1, 3);
928686b1 2146 for (int i = 0; i < 3; ++i)
908a1a16 2147 sel.quick_push (nunits - 1 - i);
62da9e14 2148
e3342de4
RS
2149 vec_perm_indices indices (sel, 1, nunits);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2151 return NULL_TREE;
e3342de4 2152 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2153}
5ce9450f 2154
c3a8f964
RS
2155/* STMT is either a masked or unconditional store. Return the value
2156 being stored. */
2157
f307441a 2158tree
c3a8f964
RS
2159vect_get_store_rhs (gimple *stmt)
2160{
2161 if (gassign *assign = dyn_cast <gassign *> (stmt))
2162 {
2163 gcc_assert (gimple_assign_single_p (assign));
2164 return gimple_assign_rhs1 (assign);
2165 }
2166 if (gcall *call = dyn_cast <gcall *> (stmt))
2167 {
2168 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2169 int index = internal_fn_stored_value_index (ifn);
2170 gcc_assert (index >= 0);
2171 return gimple_call_arg (stmt, index);
c3a8f964
RS
2172 }
2173 gcc_unreachable ();
2174}
2175
2de001ee
RS
2176/* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2178 or store.
2179
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2184
2185static bool
2186get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2187 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2188 vect_memory_access_type *memory_access_type,
2189 gather_scatter_info *gs_info)
2de001ee
RS
2190{
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 vec_info *vinfo = stmt_info->vinfo;
2193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2194 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2c53b149 2195 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2196 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 2197 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2de001ee 2198 bool single_element_p = (stmt == first_stmt
2c53b149
RB
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2200 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2202
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p = false;
2206
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2209 bool can_overrun_p = (!masked_p
2210 && vls_type == VLS_LOAD
2211 && loop_vinfo
2212 && !loop->inner);
2de001ee
RS
2213
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2217
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2220
2221 if (slp)
2222 {
2223 if (STMT_VINFO_STRIDED_P (stmt_info))
2224 {
2c53b149 2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
928686b1 2228 if (multiple_p (nunits, group_size))
2de001ee
RS
2229 *memory_access_type = VMAT_STRIDED_SLP;
2230 else
2231 *memory_access_type = VMAT_ELEMENTWISE;
2232 }
2233 else
2234 {
2235 overrun_p = loop_vinfo && gap != 0;
2236 if (overrun_p && vls_type != VLS_LOAD)
2237 {
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2241 return false;
2242 }
f702e7d4
RS
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
f9ef2c76 2247 if (overrun_p
f702e7d4
RS
2248 && gap < (vect_known_alignment_in_bytes (first_dr)
2249 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2250 overrun_p = false;
2de001ee
RS
2251 if (overrun_p && !can_overrun_p)
2252 {
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "Peeling for outer loop is not supported\n");
2256 return false;
2257 }
2258 *memory_access_type = VMAT_CONTIGUOUS;
2259 }
2260 }
2261 else
2262 {
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type = VMAT_ELEMENTWISE;
2266
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
f9ef2c76 2274 if (would_overrun_p
7e11fc7f 2275 && !masked_p
f702e7d4
RS
2276 && gap < (vect_known_alignment_in_bytes (first_dr)
2277 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2278 would_overrun_p = false;
f702e7d4 2279
2de001ee 2280 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2281 && (can_overrun_p || !would_overrun_p)
2282 && compare_step_with_zero (stmt) > 0)
2de001ee 2283 {
6737facb
RS
2284 /* First cope with the degenerate case of a single-element
2285 vector. */
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2287 *memory_access_type = VMAT_CONTIGUOUS;
2288
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type == VMAT_ELEMENTWISE
2291 && (vls_type == VLS_LOAD
7e11fc7f
RS
2292 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2293 : vect_store_lanes_supported (vectype, group_size,
2294 masked_p)))
2de001ee
RS
2295 {
2296 *memory_access_type = VMAT_LOAD_STORE_LANES;
2297 overrun_p = would_overrun_p;
2298 }
2299
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type == VMAT_ELEMENTWISE
2302 && (vls_type == VLS_LOAD
2303 ? vect_grouped_load_supported (vectype, single_element_p,
2304 group_size)
2305 : vect_grouped_store_supported (vectype, group_size)))
2306 {
2307 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2308 overrun_p = would_overrun_p;
2309 }
2310 }
429ef523
RS
2311
2312 /* As a last resort, trying using a gather load or scatter store.
2313
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type == VMAT_ELEMENTWISE
2320 && single_element_p
2321 && loop_vinfo
2322 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2323 masked_p, gs_info))
2324 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2325 }
2326
2327 if (vls_type != VLS_LOAD && first_stmt == stmt)
2328 {
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2c53b149 2331 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2de001ee
RS
2332 while (next_stmt)
2333 {
7e11fc7f 2334 tree op = vect_get_store_rhs (next_stmt);
2de001ee 2335 enum vect_def_type dt;
894dd753 2336 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2337 {
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2340 "use not simple.\n");
2341 return false;
2342 }
2c53b149 2343 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2de001ee
RS
2344 }
2345 }
2346
2347 if (overrun_p)
2348 {
2349 gcc_assert (can_overrun_p);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "Data access with gaps requires scalar "
2353 "epilogue loop\n");
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2355 }
2356
2357 return true;
2358}
2359
62da9e14
RS
2360/* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2363
2364static vect_memory_access_type
2365get_negative_load_store_type (gimple *stmt, tree vectype,
2366 vec_load_store_type vls_type,
2367 unsigned int ncopies)
2368{
2369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2370 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2371 dr_alignment_support alignment_support_scheme;
2372
2373 if (ncopies > 1)
2374 {
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE;
2379 }
2380
2381 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2382 if (alignment_support_scheme != dr_aligned
2383 && alignment_support_scheme != dr_unaligned_supported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE;
2389 }
2390
2391 if (vls_type == VLS_STORE_INVARIANT)
2392 {
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE, vect_location,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN;
2398 }
2399
2400 if (!perm_mask_for_reverse (vectype))
2401 {
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE;
2406 }
2407
2408 return VMAT_CONTIGUOUS_REVERSE;
2409}
2410
2de001ee
RS
2411/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2415
2416 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2417 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2420
2421static bool
7e11fc7f 2422get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2423 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2424 vect_memory_access_type *memory_access_type,
2425 gather_scatter_info *gs_info)
2426{
2427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2428 vec_info *vinfo = stmt_info->vinfo;
2429 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2430 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2432 {
2433 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2434 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2435 gcc_unreachable ();
894dd753 2436 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2437 &gs_info->offset_dt,
2438 &gs_info->offset_vectype))
2439 {
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "%s index use not simple.\n",
2443 vls_type == VLS_LOAD ? "gather" : "scatter");
2444 return false;
2445 }
2446 }
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2448 {
7e11fc7f 2449 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2450 memory_access_type, gs_info))
2de001ee
RS
2451 return false;
2452 }
2453 else if (STMT_VINFO_STRIDED_P (stmt_info))
2454 {
2455 gcc_assert (!slp);
ab2fc782 2456 if (loop_vinfo
429ef523
RS
2457 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2458 masked_p, gs_info))
ab2fc782
RS
2459 *memory_access_type = VMAT_GATHER_SCATTER;
2460 else
2461 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2462 }
2463 else
62da9e14
RS
2464 {
2465 int cmp = compare_step_with_zero (stmt);
2466 if (cmp < 0)
2467 *memory_access_type = get_negative_load_store_type
2468 (stmt, vectype, vls_type, ncopies);
2469 else if (cmp == 0)
2470 {
2471 gcc_assert (vls_type == VLS_LOAD);
2472 *memory_access_type = VMAT_INVARIANT;
2473 }
2474 else
2475 *memory_access_type = VMAT_CONTIGUOUS;
2476 }
2de001ee 2477
4d694b27
RS
2478 if ((*memory_access_type == VMAT_ELEMENTWISE
2479 || *memory_access_type == VMAT_STRIDED_SLP)
2480 && !nunits.is_constant ())
2481 {
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2486 return false;
2487 }
2488
2de001ee
RS
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2493 && !STMT_VINFO_STRIDED_P (stmt_info)
2c53b149
RB
2494 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2497 {
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "not falling back to elementwise accesses\n");
2501 return false;
2502 }
2503 return true;
2504}
2505
aaeefd88 2506/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2510
2511static bool
929b4411
RS
2512vect_check_load_store_mask (gimple *stmt, tree mask,
2513 vect_def_type *mask_dt_out,
2514 tree *mask_vectype_out)
aaeefd88
RS
2515{
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2517 {
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 "mask argument is not a boolean.\n");
2521 return false;
2522 }
2523
2524 if (TREE_CODE (mask) != SSA_NAME)
2525 {
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not an SSA name.\n");
2529 return false;
2530 }
2531
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2533 enum vect_def_type mask_dt;
aaeefd88 2534 tree mask_vectype;
894dd753 2535 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2536 {
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "mask use not simple.\n");
2540 return false;
2541 }
2542
2543 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2544 if (!mask_vectype)
2545 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2546
2547 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2548 {
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "could not find an appropriate vector mask type.\n");
2552 return false;
2553 }
2554
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2556 TYPE_VECTOR_SUBPARTS (vectype)))
2557 {
2558 if (dump_enabled_p ())
2559 {
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2563 dump_printf (MSG_MISSED_OPTIMIZATION,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2566 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2567 }
2568 return false;
2569 }
2570
929b4411 2571 *mask_dt_out = mask_dt;
aaeefd88
RS
2572 *mask_vectype_out = mask_vectype;
2573 return true;
2574}
2575
3133c3b6
RS
2576/* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
929b4411
RS
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2580
2581static bool
929b4411
RS
2582vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2583 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2584{
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2588 {
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "cannot encode constant as a byte sequence.\n");
2592 return false;
2593 }
2594
2595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2596 enum vect_def_type rhs_dt;
3133c3b6 2597 tree rhs_vectype;
894dd753 2598 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2599 {
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "use not simple.\n");
2603 return false;
2604 }
2605
2606 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2607 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2608 {
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "incompatible vector types.\n");
2612 return false;
2613 }
2614
929b4411 2615 *rhs_dt_out = rhs_dt;
3133c3b6 2616 *rhs_vectype_out = rhs_vectype;
929b4411 2617 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2618 *vls_type_out = VLS_STORE_INVARIANT;
2619 else
2620 *vls_type_out = VLS_STORE;
2621 return true;
2622}
2623
bc9587eb
RS
2624/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2627
2628static tree
2629vect_build_all_ones_mask (gimple *stmt, tree masktype)
2630{
2631 if (TREE_CODE (masktype) == INTEGER_TYPE)
2632 return build_int_cst (masktype, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2634 {
2635 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2636 mask = build_vector_from_val (masktype, mask);
2637 return vect_init_vector (stmt, mask, masktype, NULL);
2638 }
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2640 {
2641 REAL_VALUE_TYPE r;
2642 long tmp[6];
2643 for (int j = 0; j < 6; ++j)
2644 tmp[j] = -1;
2645 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2646 tree mask = build_real (TREE_TYPE (masktype), r);
2647 mask = build_vector_from_val (masktype, mask);
2648 return vect_init_vector (stmt, mask, masktype, NULL);
2649 }
2650 gcc_unreachable ();
2651}
2652
2653/* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2655
2656static tree
2657vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2658{
2659 tree merge;
2660 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2661 merge = build_int_cst (TREE_TYPE (vectype), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2663 {
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = 0;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2669 merge = build_real (TREE_TYPE (vectype), r);
2670 }
2671 else
2672 gcc_unreachable ();
2673 merge = build_vector_from_val (vectype, merge);
2674 return vect_init_vector (stmt, merge, vectype, NULL);
2675}
2676
c48d2d35
RS
2677/* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
929b4411 2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2681
2682static void
2683vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2684 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2685 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2686{
2687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2688 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2689 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2691 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2692 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2693 edge pe = loop_preheader_edge (loop);
2694 enum { NARROW, NONE, WIDEN } modifier;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2697
2698 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2699 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2700 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree scaletype = TREE_VALUE (arglist);
2705 gcc_checking_assert (types_compatible_p (srctype, rettype)
2706 && (!mask || types_compatible_p (srctype, masktype)));
2707
2708 tree perm_mask = NULL_TREE;
2709 tree mask_perm_mask = NULL_TREE;
2710 if (known_eq (nunits, gather_off_nunits))
2711 modifier = NONE;
2712 else if (known_eq (nunits * 2, gather_off_nunits))
2713 {
2714 modifier = WIDEN;
2715
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count = gather_off_nunits.to_constant ();
2719 vec_perm_builder sel (count, count, 1);
2720 for (int i = 0; i < count; ++i)
2721 sel.quick_push (i | (count / 2));
2722
2723 vec_perm_indices indices (sel, 1, count);
2724 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2725 indices);
2726 }
2727 else if (known_eq (nunits, gather_off_nunits * 2))
2728 {
2729 modifier = NARROW;
2730
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count = nunits.to_constant ();
2734 vec_perm_builder sel (count, count, 1);
2735 sel.quick_grow (count);
2736 for (int i = 0; i < count; ++i)
2737 sel[i] = i < count / 2 ? i : i + count / 2;
2738 vec_perm_indices indices (sel, 2, count);
2739 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2740
2741 ncopies *= 2;
2742
2743 if (mask)
2744 {
2745 for (int i = 0; i < count; ++i)
2746 sel[i] = i | (count / 2);
2747 indices.new_vector (sel, 2, count);
2748 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2749 }
2750 }
2751 else
2752 gcc_unreachable ();
2753
2754 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2755 vectype);
2756
2757 tree ptr = fold_convert (ptrtype, gs_info->base);
2758 if (!is_gimple_min_invariant (ptr))
2759 {
2760 gimple_seq seq;
2761 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2762 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2763 gcc_assert (!new_bb);
2764 }
2765
2766 tree scale = build_int_cst (scaletype, gs_info->scale);
2767
2768 tree vec_oprnd0 = NULL_TREE;
2769 tree vec_mask = NULL_TREE;
2770 tree src_op = NULL_TREE;
2771 tree mask_op = NULL_TREE;
2772 tree prev_res = NULL_TREE;
2773 stmt_vec_info prev_stmt_info = NULL;
2774
2775 if (!mask)
2776 {
2777 src_op = vect_build_zero_merge_argument (stmt, rettype);
2778 mask_op = vect_build_all_ones_mask (stmt, masktype);
2779 }
2780
2781 for (int j = 0; j < ncopies; ++j)
2782 {
2783 tree op, var;
2784 gimple *new_stmt;
2785 if (modifier == WIDEN && (j & 1))
2786 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2787 perm_mask, stmt, gsi);
2788 else if (j == 0)
2789 op = vec_oprnd0
2790 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2791 else
2792 op = vec_oprnd0
2793 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2794
2795 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2796 {
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2798 TYPE_VECTOR_SUBPARTS (idxtype)));
2799 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2800 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2801 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2803 op = var;
2804 }
2805
2806 if (mask)
2807 {
2808 if (mask_perm_mask && (j & 1))
2809 mask_op = permute_vec_elements (mask_op, mask_op,
2810 mask_perm_mask, stmt, gsi);
2811 else
2812 {
2813 if (j == 0)
2814 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2815 else
929b4411 2816 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2817
2818 mask_op = vec_mask;
2819 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2820 {
2821 gcc_assert
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2823 TYPE_VECTOR_SUBPARTS (masktype)));
2824 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2825 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2826 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2827 mask_op);
2828 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2829 mask_op = var;
2830 }
2831 }
2832 src_op = mask_op;
2833 }
2834
2835 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2836 mask_op, scale);
2837
2838 if (!useless_type_conversion_p (vectype, rettype))
2839 {
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2841 TYPE_VECTOR_SUBPARTS (rettype)));
2842 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2843 gimple_call_set_lhs (new_stmt, op);
2844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2845 var = make_ssa_name (vec_dest);
2846 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2847 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2848 }
2849 else
2850 {
2851 var = make_ssa_name (vec_dest, new_stmt);
2852 gimple_call_set_lhs (new_stmt, var);
2853 }
2854
2855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2856
2857 if (modifier == NARROW)
2858 {
2859 if ((j & 1) == 0)
2860 {
2861 prev_res = var;
2862 continue;
2863 }
2864 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2865 new_stmt = SSA_NAME_DEF_STMT (var);
2866 }
2867
2868 if (prev_stmt_info == NULL)
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2870 else
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2872 prev_stmt_info = vinfo_for_stmt (new_stmt);
2873 }
2874}
2875
bfaa08b7
RS
2876/* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2880
2881static void
2882vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2883 gather_scatter_info *gs_info,
2884 tree *dataref_ptr, tree *vec_offset)
2885{
2886 gimple_seq stmts = NULL;
2887 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2888 if (stmts != NULL)
2889 {
2890 basic_block new_bb;
2891 edge pe = loop_preheader_edge (loop);
2892 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2893 gcc_assert (!new_bb);
2894 }
2895 tree offset_type = TREE_TYPE (gs_info->offset);
2896 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2897 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2898 offset_vectype);
2899}
2900
ab2fc782
RS
2901/* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2904
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2909
2910static void
2911vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2912 gather_scatter_info *gs_info,
2913 tree *dataref_bump, tree *vec_offset)
2914{
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2918 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2919 gimple_seq stmts;
2920
2921 tree bump = size_binop (MULT_EXPR,
2922 fold_convert (sizetype, DR_STEP (dr)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2924 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2925 if (stmts)
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2927
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type = TREE_TYPE (gs_info->offset);
2931 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2932 offset_type = TREE_TYPE (offset_vectype);
2933
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2936 ssize_int (gs_info->scale));
2937 step = fold_convert (offset_type, step);
2938 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2939
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2942 build_zero_cst (offset_type), step);
2943 if (stmts)
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2945}
2946
2947/* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2950 vectorization. */
2951
2952static tree
2953vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2954 vect_memory_access_type memory_access_type)
2955{
2956 if (memory_access_type == VMAT_INVARIANT)
2957 return size_zero_node;
2958
2959 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2960 tree step = vect_dr_behavior (dr)->step;
2961 if (tree_int_cst_sgn (step) == -1)
2962 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2963 return iv_step;
2964}
2965
37b14185
RB
2966/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2967
2968static bool
2969vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2970 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2971 tree vectype_in, enum vect_def_type *dt,
2972 stmt_vector_for_cost *cost_vec)
37b14185
RB
2973{
2974 tree op, vectype;
2975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2977 unsigned ncopies;
2978 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2979
2980 op = gimple_call_arg (stmt, 0);
2981 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2982
2983 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2984 return false;
37b14185
RB
2985
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2988 case of SLP. */
2989 if (slp_node)
2990 ncopies = 1;
2991 else
e8f142e2 2992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2993
2994 gcc_assert (ncopies >= 1);
2995
2996 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2997 if (! char_vectype)
2998 return false;
2999
928686b1
RS
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3001 return false;
3002
794e3180 3003 unsigned word_bytes = num_bytes / nunits;
908a1a16 3004
d980067b
RS
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder elts (num_bytes, word_bytes, 3);
3007 for (unsigned i = 0; i < 3; ++i)
37b14185 3008 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3009 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3010
e3342de4
RS
3011 vec_perm_indices indices (elts, 1, num_bytes);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3013 return false;
3014
3015 if (! vec_stmt)
3016 {
3017 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3019 if (! slp_node)
37b14185 3020 {
68435eb2
RB
3021 record_stmt_cost (cost_vec,
3022 1, vector_stmt, stmt_info, 0, vect_prologue);
3023 record_stmt_cost (cost_vec,
3024 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3025 }
3026 return true;
3027 }
3028
736d0f28 3029 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3030
3031 /* Transform. */
3032 vec<tree> vec_oprnds = vNULL;
3033 gimple *new_stmt = NULL;
3034 stmt_vec_info prev_stmt_info = NULL;
3035 for (unsigned j = 0; j < ncopies; j++)
3036 {
3037 /* Handle uses. */
3038 if (j == 0)
306b0c92 3039 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3040 else
3041 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3042
3043 /* Arguments are ready. create the new vector stmt. */
3044 unsigned i;
3045 tree vop;
3046 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3047 {
3048 tree tem = make_ssa_name (char_vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 char_vectype, vop));
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 tree tem2 = make_ssa_name (char_vectype);
3053 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3054 tem, tem, bswap_vconst);
3055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3056 tem = make_ssa_name (vectype);
3057 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3058 vectype, tem2));
3059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3060 if (slp_node)
3061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3062 }
3063
3064 if (slp_node)
3065 continue;
3066
3067 if (j == 0)
3068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3069 else
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3071
3072 prev_stmt_info = vinfo_for_stmt (new_stmt);
3073 }
3074
3075 vec_oprnds.release ();
3076 return true;
3077}
3078
b1b6836e
RS
3079/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3082 *CONVERT_CODE. */
3083
3084static bool
3085simple_integer_narrowing (tree vectype_out, tree vectype_in,
3086 tree_code *convert_code)
3087{
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3090 return false;
3091
3092 tree_code code;
3093 int multi_step_cvt = 0;
3094 auto_vec <tree, 8> interm_types;
3095 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3096 &code, &multi_step_cvt,
3097 &interm_types)
3098 || multi_step_cvt)
3099 return false;
3100
3101 *convert_code = code;
3102 return true;
3103}
5ce9450f 3104
ebfd146a
IR
3105/* Function vectorizable_call.
3106
538dd0b7 3107 Check if GS performs a function call that can be vectorized.
b8698a0f 3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3111
3112static bool
355fe088 3113vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3114 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3115{
538dd0b7 3116 gcall *stmt;
ebfd146a
IR
3117 tree vec_dest;
3118 tree scalar_dest;
0267732b 3119 tree op;
ebfd146a 3120 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3121 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3122 tree vectype_out, vectype_in;
c7bda0f4
RS
3123 poly_uint64 nunits_in;
3124 poly_uint64 nunits_out;
ebfd146a 3125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3127 vec_info *vinfo = stmt_info->vinfo;
81c40241 3128 tree fndecl, new_temp, rhs_type;
2c58d42c
RS
3129 enum vect_def_type dt[4]
3130 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3131 vect_unknown_def_type };
3132 int ndts = ARRAY_SIZE (dt);
355fe088 3133 gimple *new_stmt = NULL;
ebfd146a 3134 int ncopies, j;
2c58d42c
RS
3135 auto_vec<tree, 8> vargs;
3136 auto_vec<tree, 8> orig_vargs;
ebfd146a
IR
3137 enum { NARROW, NONE, WIDEN } modifier;
3138 size_t i, nargs;
9d5e7640 3139 tree lhs;
ebfd146a 3140
190c2236 3141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3142 return false;
3143
66c16fd9
RB
3144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3145 && ! vec_stmt)
ebfd146a
IR
3146 return false;
3147
538dd0b7
DM
3148 /* Is GS a vectorizable call? */
3149 stmt = dyn_cast <gcall *> (gs);
3150 if (!stmt)
ebfd146a
IR
3151 return false;
3152
5ce9450f 3153 if (gimple_call_internal_p (stmt)
bfaa08b7 3154 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3155 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3156 /* Handled by vectorizable_load and vectorizable_store. */
3157 return false;
5ce9450f 3158
0136f8f0
AH
3159 if (gimple_call_lhs (stmt) == NULL_TREE
3160 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3161 return false;
3162
0136f8f0 3163 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3164
b690cc0f
RG
3165 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3166
ebfd146a
IR
3167 /* Process function arguments. */
3168 rhs_type = NULL_TREE;
b690cc0f 3169 vectype_in = NULL_TREE;
ebfd146a
IR
3170 nargs = gimple_call_num_args (stmt);
3171
1b1562a5
MM
3172 /* Bail out if the function has more than three arguments, we do not have
3173 interesting builtin functions to vectorize with more than two arguments
3174 except for fma. No arguments is also not good. */
2c58d42c 3175 if (nargs == 0 || nargs > 4)
ebfd146a
IR
3176 return false;
3177
74bf76ed 3178 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2c58d42c
RS
3179 combined_fn cfn = gimple_call_combined_fn (stmt);
3180 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed
JJ
3181 {
3182 nargs = 0;
3183 rhs_type = unsigned_type_node;
3184 }
3185
2c58d42c
RS
3186 int mask_opno = -1;
3187 if (internal_fn_p (cfn))
3188 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3189
ebfd146a
IR
3190 for (i = 0; i < nargs; i++)
3191 {
b690cc0f
RG
3192 tree opvectype;
3193
ebfd146a 3194 op = gimple_call_arg (stmt, i);
2c58d42c
RS
3195 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3196 {
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3199 "use not simple.\n");
3200 return false;
3201 }
3202
3203 /* Skip the mask argument to an internal function. This operand
3204 has been converted via a pattern if necessary. */
3205 if ((int) i == mask_opno)
3206 continue;
ebfd146a
IR
3207
3208 /* We can only handle calls with arguments of the same type. */
3209 if (rhs_type
8533c9d8 3210 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3211 {
73fbfcad 3212 if (dump_enabled_p ())
78c60e3d 3213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3214 "argument types differ.\n");
ebfd146a
IR
3215 return false;
3216 }
b690cc0f
RG
3217 if (!rhs_type)
3218 rhs_type = TREE_TYPE (op);
ebfd146a 3219
b690cc0f
RG
3220 if (!vectype_in)
3221 vectype_in = opvectype;
3222 else if (opvectype
3223 && opvectype != vectype_in)
3224 {
73fbfcad 3225 if (dump_enabled_p ())
78c60e3d 3226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3227 "argument vector types differ.\n");
b690cc0f
RG
3228 return false;
3229 }
3230 }
3231 /* If all arguments are external or constant defs use a vector type with
3232 the same size as the output vector type. */
ebfd146a 3233 if (!vectype_in)
b690cc0f 3234 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3235 if (vec_stmt)
3236 gcc_assert (vectype_in);
3237 if (!vectype_in)
3238 {
73fbfcad 3239 if (dump_enabled_p ())
7d8930a0 3240 {
78c60e3d
SS
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "no vectype for scalar type ");
3243 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3244 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3245 }
3246
3247 return false;
3248 }
ebfd146a
IR
3249
3250 /* FORNOW */
b690cc0f
RG
3251 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3252 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3253 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3254 modifier = NARROW;
c7bda0f4 3255 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3256 modifier = NONE;
c7bda0f4 3257 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3258 modifier = WIDEN;
3259 else
3260 return false;
3261
70439f0d
RS
3262 /* We only handle functions that do not read or clobber memory. */
3263 if (gimple_vuse (stmt))
3264 {
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "function reads from or writes to memory.\n");
3268 return false;
3269 }
3270
ebfd146a
IR
3271 /* For now, we only vectorize functions if a target specific builtin
3272 is available. TODO -- in some cases, it might be profitable to
3273 insert the calls for pieces of the vector, in order to be able
3274 to vectorize other operations in the loop. */
70439f0d
RS
3275 fndecl = NULL_TREE;
3276 internal_fn ifn = IFN_LAST;
70439f0d
RS
3277 tree callee = gimple_call_fndecl (stmt);
3278
3279 /* First try using an internal function. */
b1b6836e
RS
3280 tree_code convert_code = ERROR_MARK;
3281 if (cfn != CFN_LAST
3282 && (modifier == NONE
3283 || (modifier == NARROW
3284 && simple_integer_narrowing (vectype_out, vectype_in,
3285 &convert_code))))
70439f0d
RS
3286 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3287 vectype_in);
3288
3289 /* If that fails, try asking for a target-specific built-in function. */
3290 if (ifn == IFN_LAST)
3291 {
3292 if (cfn != CFN_LAST)
3293 fndecl = targetm.vectorize.builtin_vectorized_function
3294 (cfn, vectype_out, vectype_in);
7672aa9b 3295 else if (callee)
70439f0d
RS
3296 fndecl = targetm.vectorize.builtin_md_vectorized_function
3297 (callee, vectype_out, vectype_in);
3298 }
3299
3300 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3301 {
70439f0d 3302 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3303 && !slp_node
3304 && loop_vinfo
3305 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3306 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3307 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3308 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3309 {
3310 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3311 { 0, 1, 2, ... vf - 1 } vector. */
3312 gcc_assert (nargs == 0);
3313 }
37b14185
RB
3314 else if (modifier == NONE
3315 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3316 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3317 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3318 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3319 vectype_in, dt, cost_vec);
74bf76ed
JJ
3320 else
3321 {
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3324 "function is not vectorizable.\n");
74bf76ed
JJ
3325 return false;
3326 }
ebfd146a
IR
3327 }
3328
fce57248 3329 if (slp_node)
190c2236 3330 ncopies = 1;
b1b6836e 3331 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3332 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3333 else
e8f142e2 3334 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3335
3336 /* Sanity check: make sure that at least one copy of the vectorized stmt
3337 needs to be generated. */
3338 gcc_assert (ncopies >= 1);
3339
ed623edb 3340 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
ebfd146a
IR
3341 if (!vec_stmt) /* transformation not required. */
3342 {
3343 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3344 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3345 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3346 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3347 record_stmt_cost (cost_vec, ncopies / 2,
3348 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3349
2c58d42c
RS
3350 if (loop_vinfo && mask_opno >= 0)
3351 {
3352 unsigned int nvectors = (slp_node
3353 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3354 : ncopies);
3355 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3356 }
ebfd146a
IR
3357 return true;
3358 }
3359
67b8dbac 3360 /* Transform. */
ebfd146a 3361
73fbfcad 3362 if (dump_enabled_p ())
e645e942 3363 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3364
3365 /* Handle def. */
3366 scalar_dest = gimple_call_lhs (stmt);
3367 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3368
2c58d42c
RS
3369 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3370
ebfd146a 3371 prev_stmt_info = NULL;
b1b6836e 3372 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3373 {
b1b6836e 3374 tree prev_res = NULL_TREE;
2c58d42c
RS
3375 vargs.safe_grow (nargs);
3376 orig_vargs.safe_grow (nargs);
ebfd146a
IR
3377 for (j = 0; j < ncopies; ++j)
3378 {
3379 /* Build argument list for the vectorized call. */
190c2236
JJ
3380 if (slp_node)
3381 {
ef062b13 3382 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3383 vec<tree> vec_oprnds0;
190c2236
JJ
3384
3385 for (i = 0; i < nargs; i++)
2c58d42c 3386 vargs[i] = gimple_call_arg (stmt, i);
306b0c92 3387 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3388 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3389
3390 /* Arguments are ready. Create the new vector stmt. */
9771b263 3391 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3392 {
3393 size_t k;
3394 for (k = 0; k < nargs; k++)
3395 {
37b5ec8f 3396 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3397 vargs[k] = vec_oprndsk[i];
190c2236 3398 }
b1b6836e
RS
3399 if (modifier == NARROW)
3400 {
2c58d42c
RS
3401 /* We don't define any narrowing conditional functions
3402 at present. */
3403 gcc_assert (mask_opno < 0);
b1b6836e 3404 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3405 gcall *call
3406 = gimple_build_call_internal_vec (ifn, vargs);
3407 gimple_call_set_lhs (call, half_res);
3408 gimple_call_set_nothrow (call, true);
3409 new_stmt = call;
b1b6836e
RS
3410 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3411 if ((i & 1) == 0)
3412 {
3413 prev_res = half_res;
3414 continue;
3415 }
3416 new_temp = make_ssa_name (vec_dest);
3417 new_stmt = gimple_build_assign (new_temp, convert_code,
3418 prev_res, half_res);
3419 }
70439f0d 3420 else
b1b6836e 3421 {
2c58d42c
RS
3422 if (mask_opno >= 0 && masked_loop_p)
3423 {
3424 unsigned int vec_num = vec_oprnds0.length ();
3425 /* Always true for SLP. */
3426 gcc_assert (ncopies == 1);
3427 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3428 vectype_out, i);
3429 vargs[mask_opno] = prepare_load_store_mask
3430 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3431 }
3432
a844293d 3433 gcall *call;
b1b6836e 3434 if (ifn != IFN_LAST)
a844293d 3435 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3436 else
a844293d
RS
3437 call = gimple_build_call_vec (fndecl, vargs);
3438 new_temp = make_ssa_name (vec_dest, call);
3439 gimple_call_set_lhs (call, new_temp);
3440 gimple_call_set_nothrow (call, true);
3441 new_stmt = call;
b1b6836e 3442 }
190c2236 3443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3444 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3445 }
3446
3447 for (i = 0; i < nargs; i++)
3448 {
37b5ec8f 3449 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3450 vec_oprndsi.release ();
190c2236 3451 }
190c2236
JJ
3452 continue;
3453 }
3454
ebfd146a
IR
3455 for (i = 0; i < nargs; i++)
3456 {
3457 op = gimple_call_arg (stmt, i);
3458 if (j == 0)
3459 vec_oprnd0
81c40241 3460 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3461 else
2c58d42c
RS
3462 vec_oprnd0
3463 = vect_get_vec_def_for_stmt_copy (dt[i], orig_vargs[i]);
3464
3465 orig_vargs[i] = vargs[i] = vec_oprnd0;
3466 }
ebfd146a 3467
2c58d42c
RS
3468 if (mask_opno >= 0 && masked_loop_p)
3469 {
3470 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3471 vectype_out, j);
3472 vargs[mask_opno]
3473 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3474 vargs[mask_opno], gsi);
ebfd146a
IR
3475 }
3476
2c58d42c 3477 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed 3478 {
c7bda0f4 3479 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3480 tree new_var
0e22bb5a 3481 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3482 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3483 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3484 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3485 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3486 }
b1b6836e
RS
3487 else if (modifier == NARROW)
3488 {
2c58d42c
RS
3489 /* We don't define any narrowing conditional functions at
3490 present. */
3491 gcc_assert (mask_opno < 0);
b1b6836e 3492 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3493 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3494 gimple_call_set_lhs (call, half_res);
3495 gimple_call_set_nothrow (call, true);
3496 new_stmt = call;
b1b6836e
RS
3497 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3498 if ((j & 1) == 0)
3499 {
3500 prev_res = half_res;
3501 continue;
3502 }
3503 new_temp = make_ssa_name (vec_dest);
3504 new_stmt = gimple_build_assign (new_temp, convert_code,
3505 prev_res, half_res);
3506 }
74bf76ed
JJ
3507 else
3508 {
a844293d 3509 gcall *call;
70439f0d 3510 if (ifn != IFN_LAST)
a844293d 3511 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3512 else
a844293d 3513 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3514 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3515 gimple_call_set_lhs (call, new_temp);
3516 gimple_call_set_nothrow (call, true);
3517 new_stmt = call;
74bf76ed 3518 }
ebfd146a
IR
3519 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3520
b1b6836e 3521 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3522 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3523 else
3524 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3525
3526 prev_stmt_info = vinfo_for_stmt (new_stmt);
3527 }
b1b6836e
RS
3528 }
3529 else if (modifier == NARROW)
3530 {
2c58d42c
RS
3531 /* We don't define any narrowing conditional functions at present. */
3532 gcc_assert (mask_opno < 0);
ebfd146a
IR
3533 for (j = 0; j < ncopies; ++j)
3534 {
3535 /* Build argument list for the vectorized call. */
3536 if (j == 0)
9771b263 3537 vargs.create (nargs * 2);
ebfd146a 3538 else
9771b263 3539 vargs.truncate (0);
ebfd146a 3540
190c2236
JJ
3541 if (slp_node)
3542 {
ef062b13 3543 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3544 vec<tree> vec_oprnds0;
190c2236
JJ
3545
3546 for (i = 0; i < nargs; i++)
9771b263 3547 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3548 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3549 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3550
3551 /* Arguments are ready. Create the new vector stmt. */
9771b263 3552 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3553 {
3554 size_t k;
9771b263 3555 vargs.truncate (0);
190c2236
JJ
3556 for (k = 0; k < nargs; k++)
3557 {
37b5ec8f 3558 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3559 vargs.quick_push (vec_oprndsk[i]);
3560 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3561 }
a844293d 3562 gcall *call;
70439f0d 3563 if (ifn != IFN_LAST)
a844293d 3564 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3565 else
a844293d
RS
3566 call = gimple_build_call_vec (fndecl, vargs);
3567 new_temp = make_ssa_name (vec_dest, call);
3568 gimple_call_set_lhs (call, new_temp);
3569 gimple_call_set_nothrow (call, true);
3570 new_stmt = call;
190c2236 3571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3572 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3573 }
3574
3575 for (i = 0; i < nargs; i++)
3576 {
37b5ec8f 3577 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3578 vec_oprndsi.release ();
190c2236 3579 }
190c2236
JJ
3580 continue;
3581 }
3582
ebfd146a
IR
3583 for (i = 0; i < nargs; i++)
3584 {
3585 op = gimple_call_arg (stmt, i);
3586 if (j == 0)
3587 {
3588 vec_oprnd0
81c40241 3589 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3590 vec_oprnd1
63827fb8 3591 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3592 }
3593 else
3594 {
336ecb65 3595 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3596 vec_oprnd0
63827fb8 3597 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3598 vec_oprnd1
63827fb8 3599 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3600 }
3601
9771b263
DN
3602 vargs.quick_push (vec_oprnd0);
3603 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3604 }
3605
b1b6836e 3606 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3607 new_temp = make_ssa_name (vec_dest, new_stmt);
3608 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3610
3611 if (j == 0)
3612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3613 else
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3615
3616 prev_stmt_info = vinfo_for_stmt (new_stmt);
3617 }
3618
3619 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3620 }
b1b6836e
RS
3621 else
3622 /* No current target implements this case. */
3623 return false;
ebfd146a 3624
9771b263 3625 vargs.release ();
ebfd146a 3626
ebfd146a
IR
3627 /* The call in STMT might prevent it from being removed in dce.
3628 We however cannot remove it here, due to the way the ssa name
3629 it defines is mapped to the new definition. So just replace
3630 rhs of the statement with something harmless. */
3631
dd34c087
JJ
3632 if (slp_node)
3633 return true;
3634
9d5e7640 3635 if (is_pattern_stmt_p (stmt_info))
ed7b8123
RS
3636 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3637 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3638
0267732b 3639 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
ebfd146a 3640 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3641 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3642 STMT_VINFO_STMT (stmt_info) = new_stmt;
3643 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3644
3645 return true;
3646}
3647
3648
0136f8f0
AH
3649struct simd_call_arg_info
3650{
3651 tree vectype;
3652 tree op;
0136f8f0 3653 HOST_WIDE_INT linear_step;
34e82342 3654 enum vect_def_type dt;
0136f8f0 3655 unsigned int align;
17b658af 3656 bool simd_lane_linear;
0136f8f0
AH
3657};
3658
17b658af
JJ
3659/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3660 is linear within simd lane (but not within whole loop), note it in
3661 *ARGINFO. */
3662
3663static void
3664vect_simd_lane_linear (tree op, struct loop *loop,
3665 struct simd_call_arg_info *arginfo)
3666{
355fe088 3667 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3668
3669 if (!is_gimple_assign (def_stmt)
3670 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3671 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3672 return;
3673
3674 tree base = gimple_assign_rhs1 (def_stmt);
3675 HOST_WIDE_INT linear_step = 0;
3676 tree v = gimple_assign_rhs2 (def_stmt);
3677 while (TREE_CODE (v) == SSA_NAME)
3678 {
3679 tree t;
3680 def_stmt = SSA_NAME_DEF_STMT (v);
3681 if (is_gimple_assign (def_stmt))
3682 switch (gimple_assign_rhs_code (def_stmt))
3683 {
3684 case PLUS_EXPR:
3685 t = gimple_assign_rhs2 (def_stmt);
3686 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3687 return;
3688 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3689 v = gimple_assign_rhs1 (def_stmt);
3690 continue;
3691 case MULT_EXPR:
3692 t = gimple_assign_rhs2 (def_stmt);
3693 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3694 return;
3695 linear_step = tree_to_shwi (t);
3696 v = gimple_assign_rhs1 (def_stmt);
3697 continue;
3698 CASE_CONVERT:
3699 t = gimple_assign_rhs1 (def_stmt);
3700 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3701 || (TYPE_PRECISION (TREE_TYPE (v))
3702 < TYPE_PRECISION (TREE_TYPE (t))))
3703 return;
3704 if (!linear_step)
3705 linear_step = 1;
3706 v = t;
3707 continue;
3708 default:
3709 return;
3710 }
8e4284d0 3711 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3712 && loop->simduid
3713 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3714 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3715 == loop->simduid))
3716 {
3717 if (!linear_step)
3718 linear_step = 1;
3719 arginfo->linear_step = linear_step;
3720 arginfo->op = base;
3721 arginfo->simd_lane_linear = true;
3722 return;
3723 }
3724 }
3725}
3726
cf1b2ba4
RS
3727/* Return the number of elements in vector type VECTYPE, which is associated
3728 with a SIMD clone. At present these vectors always have a constant
3729 length. */
3730
3731static unsigned HOST_WIDE_INT
3732simd_clone_subparts (tree vectype)
3733{
928686b1 3734 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3735}
3736
0136f8f0
AH
3737/* Function vectorizable_simd_clone_call.
3738
3739 Check if STMT performs a function call that can be vectorized
3740 by calling a simd clone of the function.
3741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3744
3745static bool
355fe088 3746vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3747 gimple **vec_stmt, slp_tree slp_node,
3748 stmt_vector_for_cost *)
0136f8f0
AH
3749{
3750 tree vec_dest;
3751 tree scalar_dest;
3752 tree op, type;
3753 tree vec_oprnd0 = NULL_TREE;
3754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3755 tree vectype;
3756 unsigned int nunits;
3757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3759 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3760 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3761 tree fndecl, new_temp;
355fe088 3762 gimple *new_stmt = NULL;
0136f8f0 3763 int ncopies, j;
00426f9a 3764 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3765 vec<tree> vargs = vNULL;
3766 size_t i, nargs;
3767 tree lhs, rtype, ratype;
e7a74006 3768 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3769
3770 /* Is STMT a vectorizable call? */
3771 if (!is_gimple_call (stmt))
3772 return false;
3773
3774 fndecl = gimple_call_fndecl (stmt);
3775 if (fndecl == NULL_TREE)
3776 return false;
3777
d52f5295 3778 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3779 if (node == NULL || node->simd_clones == NULL)
3780 return false;
3781
3782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3783 return false;
3784
66c16fd9
RB
3785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3786 && ! vec_stmt)
0136f8f0
AH
3787 return false;
3788
3789 if (gimple_call_lhs (stmt)
3790 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3791 return false;
3792
3793 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3794
3795 vectype = STMT_VINFO_VECTYPE (stmt_info);
3796
3797 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3798 return false;
3799
3800 /* FORNOW */
fce57248 3801 if (slp_node)
0136f8f0
AH
3802 return false;
3803
3804 /* Process function arguments. */
3805 nargs = gimple_call_num_args (stmt);
3806
3807 /* Bail out if the function has zero arguments. */
3808 if (nargs == 0)
3809 return false;
3810
00426f9a 3811 arginfo.reserve (nargs, true);
0136f8f0
AH
3812
3813 for (i = 0; i < nargs; i++)
3814 {
3815 simd_call_arg_info thisarginfo;
3816 affine_iv iv;
3817
3818 thisarginfo.linear_step = 0;
3819 thisarginfo.align = 0;
3820 thisarginfo.op = NULL_TREE;
17b658af 3821 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3822
3823 op = gimple_call_arg (stmt, i);
894dd753 3824 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3825 &thisarginfo.vectype)
0136f8f0
AH
3826 || thisarginfo.dt == vect_uninitialized_def)
3827 {
3828 if (dump_enabled_p ())
3829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3830 "use not simple.\n");
0136f8f0
AH
3831 return false;
3832 }
3833
3834 if (thisarginfo.dt == vect_constant_def
3835 || thisarginfo.dt == vect_external_def)
3836 gcc_assert (thisarginfo.vectype == NULL_TREE);
3837 else
3838 gcc_assert (thisarginfo.vectype != NULL_TREE);
3839
6c9e85fb
JJ
3840 /* For linear arguments, the analyze phase should have saved
3841 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3842 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3843 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3844 {
3845 gcc_assert (vec_stmt);
3846 thisarginfo.linear_step
17b658af 3847 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3848 thisarginfo.op
17b658af
JJ
3849 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3850 thisarginfo.simd_lane_linear
3851 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3852 == boolean_true_node);
6c9e85fb
JJ
3853 /* If loop has been peeled for alignment, we need to adjust it. */
3854 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3855 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3856 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3857 {
3858 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3859 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3860 tree opt = TREE_TYPE (thisarginfo.op);
3861 bias = fold_convert (TREE_TYPE (step), bias);
3862 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3863 thisarginfo.op
3864 = fold_build2 (POINTER_TYPE_P (opt)
3865 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3866 thisarginfo.op, bias);
3867 }
3868 }
3869 else if (!vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && TREE_CODE (op) == SSA_NAME
3874 && simple_iv (loop, loop_containing_stmt (stmt), op,
3875 &iv, false)
3876 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3877 {
3878 thisarginfo.linear_step = tree_to_shwi (iv.step);
3879 thisarginfo.op = iv.base;
3880 }
3881 else if ((thisarginfo.dt == vect_constant_def
3882 || thisarginfo.dt == vect_external_def)
3883 && POINTER_TYPE_P (TREE_TYPE (op)))
3884 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3885 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3886 linear too. */
3887 if (POINTER_TYPE_P (TREE_TYPE (op))
3888 && !thisarginfo.linear_step
3889 && !vec_stmt
3890 && thisarginfo.dt != vect_constant_def
3891 && thisarginfo.dt != vect_external_def
3892 && loop_vinfo
3893 && !slp_node
3894 && TREE_CODE (op) == SSA_NAME)
3895 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3896
3897 arginfo.quick_push (thisarginfo);
3898 }
3899
d9f21f6a
RS
3900 unsigned HOST_WIDE_INT vf;
3901 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3902 {
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "not considering SIMD clones; not yet supported"
3906 " for variable-width vectors.\n");
3907 return NULL;
3908 }
3909
0136f8f0
AH
3910 unsigned int badness = 0;
3911 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3914 else
3915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3916 n = n->simdclone->next_clone)
3917 {
3918 unsigned int this_badness = 0;
d9f21f6a 3919 if (n->simdclone->simdlen > vf
0136f8f0
AH
3920 || n->simdclone->nargs != nargs)
3921 continue;
d9f21f6a
RS
3922 if (n->simdclone->simdlen < vf)
3923 this_badness += (exact_log2 (vf)
0136f8f0
AH
3924 - exact_log2 (n->simdclone->simdlen)) * 1024;
3925 if (n->simdclone->inbranch)
3926 this_badness += 2048;
3927 int target_badness = targetm.simd_clone.usable (n);
3928 if (target_badness < 0)
3929 continue;
3930 this_badness += target_badness * 512;
3931 /* FORNOW: Have to add code to add the mask argument. */
3932 if (n->simdclone->inbranch)
3933 continue;
3934 for (i = 0; i < nargs; i++)
3935 {
3936 switch (n->simdclone->args[i].arg_type)
3937 {
3938 case SIMD_CLONE_ARG_TYPE_VECTOR:
3939 if (!useless_type_conversion_p
3940 (n->simdclone->args[i].orig_type,
3941 TREE_TYPE (gimple_call_arg (stmt, i))))
3942 i = -1;
3943 else if (arginfo[i].dt == vect_constant_def
3944 || arginfo[i].dt == vect_external_def
3945 || arginfo[i].linear_step)
3946 this_badness += 64;
3947 break;
3948 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3949 if (arginfo[i].dt != vect_constant_def
3950 && arginfo[i].dt != vect_external_def)
3951 i = -1;
3952 break;
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3954 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3955 if (arginfo[i].dt == vect_constant_def
3956 || arginfo[i].dt == vect_external_def
3957 || (arginfo[i].linear_step
3958 != n->simdclone->args[i].linear_step))
3959 i = -1;
3960 break;
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3965 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3966 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3967 /* FORNOW */
3968 i = -1;
3969 break;
3970 case SIMD_CLONE_ARG_TYPE_MASK:
3971 gcc_unreachable ();
3972 }
3973 if (i == (size_t) -1)
3974 break;
3975 if (n->simdclone->args[i].alignment > arginfo[i].align)
3976 {
3977 i = -1;
3978 break;
3979 }
3980 if (arginfo[i].align)
3981 this_badness += (exact_log2 (arginfo[i].align)
3982 - exact_log2 (n->simdclone->args[i].alignment));
3983 }
3984 if (i == (size_t) -1)
3985 continue;
3986 if (bestn == NULL || this_badness < badness)
3987 {
3988 bestn = n;
3989 badness = this_badness;
3990 }
3991 }
3992
3993 if (bestn == NULL)
00426f9a 3994 return false;
0136f8f0
AH
3995
3996 for (i = 0; i < nargs; i++)
3997 if ((arginfo[i].dt == vect_constant_def
3998 || arginfo[i].dt == vect_external_def)
3999 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4000 {
4001 arginfo[i].vectype
4002 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4003 i)));
4004 if (arginfo[i].vectype == NULL
cf1b2ba4 4005 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 4006 > bestn->simdclone->simdlen))
00426f9a 4007 return false;
0136f8f0
AH
4008 }
4009
4010 fndecl = bestn->decl;
4011 nunits = bestn->simdclone->simdlen;
d9f21f6a 4012 ncopies = vf / nunits;
0136f8f0
AH
4013
4014 /* If the function isn't const, only allow it in simd loops where user
4015 has asserted that at least nunits consecutive iterations can be
4016 performed using SIMD instructions. */
4017 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4018 && gimple_vuse (stmt))
00426f9a 4019 return false;
0136f8f0
AH
4020
4021 /* Sanity check: make sure that at least one copy of the vectorized stmt
4022 needs to be generated. */
4023 gcc_assert (ncopies >= 1);
4024
4025 if (!vec_stmt) /* transformation not required. */
4026 {
6c9e85fb
JJ
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4028 for (i = 0; i < nargs; i++)
7adb26f2
JJ
4029 if ((bestn->simdclone->args[i].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4031 || (bestn->simdclone->args[i].arg_type
4032 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 4033 {
17b658af 4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
4035 + 1);
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4037 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4038 ? size_type_node : TREE_TYPE (arginfo[i].op);
4039 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4040 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4041 tree sll = arginfo[i].simd_lane_linear
4042 ? boolean_true_node : boolean_false_node;
4043 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4044 }
0136f8f0 4045 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4046 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4047/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4048 return true;
4049 }
4050
67b8dbac 4051 /* Transform. */
0136f8f0
AH
4052
4053 if (dump_enabled_p ())
4054 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4055
4056 /* Handle def. */
4057 scalar_dest = gimple_call_lhs (stmt);
4058 vec_dest = NULL_TREE;
4059 rtype = NULL_TREE;
4060 ratype = NULL_TREE;
4061 if (scalar_dest)
4062 {
4063 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4064 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4065 if (TREE_CODE (rtype) == ARRAY_TYPE)
4066 {
4067 ratype = rtype;
4068 rtype = TREE_TYPE (ratype);
4069 }
4070 }
4071
4072 prev_stmt_info = NULL;
4073 for (j = 0; j < ncopies; ++j)
4074 {
4075 /* Build argument list for the vectorized call. */
4076 if (j == 0)
4077 vargs.create (nargs);
4078 else
4079 vargs.truncate (0);
4080
4081 for (i = 0; i < nargs; i++)
4082 {
4083 unsigned int k, l, m, o;
4084 tree atype;
4085 op = gimple_call_arg (stmt, i);
4086 switch (bestn->simdclone->args[i].arg_type)
4087 {
4088 case SIMD_CLONE_ARG_TYPE_VECTOR:
4089 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4090 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4091 for (m = j * o; m < (j + 1) * o; m++)
4092 {
cf1b2ba4
RS
4093 if (simd_clone_subparts (atype)
4094 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4095 {
73a699ae 4096 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4097 k = (simd_clone_subparts (arginfo[i].vectype)
4098 / simd_clone_subparts (atype));
0136f8f0
AH
4099 gcc_assert ((k & (k - 1)) == 0);
4100 if (m == 0)
4101 vec_oprnd0
81c40241 4102 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4103 else
4104 {
4105 vec_oprnd0 = arginfo[i].op;
4106 if ((m & (k - 1)) == 0)
4107 vec_oprnd0
4108 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4109 vec_oprnd0);
4110 }
4111 arginfo[i].op = vec_oprnd0;
4112 vec_oprnd0
4113 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4114 bitsize_int (prec),
0136f8f0
AH
4115 bitsize_int ((m & (k - 1)) * prec));
4116 new_stmt
b731b390 4117 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4118 vec_oprnd0);
4119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4120 vargs.safe_push (gimple_assign_lhs (new_stmt));
4121 }
4122 else
4123 {
cf1b2ba4
RS
4124 k = (simd_clone_subparts (atype)
4125 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4126 gcc_assert ((k & (k - 1)) == 0);
4127 vec<constructor_elt, va_gc> *ctor_elts;
4128 if (k != 1)
4129 vec_alloc (ctor_elts, k);
4130 else
4131 ctor_elts = NULL;
4132 for (l = 0; l < k; l++)
4133 {
4134 if (m == 0 && l == 0)
4135 vec_oprnd0
81c40241 4136 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4137 else
4138 vec_oprnd0
4139 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4140 arginfo[i].op);
4141 arginfo[i].op = vec_oprnd0;
4142 if (k == 1)
4143 break;
4144 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4145 vec_oprnd0);
4146 }
4147 if (k == 1)
4148 vargs.safe_push (vec_oprnd0);
4149 else
4150 {
4151 vec_oprnd0 = build_constructor (atype, ctor_elts);
4152 new_stmt
b731b390 4153 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4154 vec_oprnd0);
4155 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 vargs.safe_push (gimple_assign_lhs (new_stmt));
4157 }
4158 }
4159 }
4160 break;
4161 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4162 vargs.safe_push (op);
4163 break;
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4165 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4166 if (j == 0)
4167 {
4168 gimple_seq stmts;
4169 arginfo[i].op
4170 = force_gimple_operand (arginfo[i].op, &stmts, true,
4171 NULL_TREE);
4172 if (stmts != NULL)
4173 {
4174 basic_block new_bb;
4175 edge pe = loop_preheader_edge (loop);
4176 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4177 gcc_assert (!new_bb);
4178 }
17b658af
JJ
4179 if (arginfo[i].simd_lane_linear)
4180 {
4181 vargs.safe_push (arginfo[i].op);
4182 break;
4183 }
b731b390 4184 tree phi_res = copy_ssa_name (op);
538dd0b7 4185 gphi *new_phi = create_phi_node (phi_res, loop->header);
4fbeb363 4186 loop_vinfo->add_stmt (new_phi);
0136f8f0
AH
4187 add_phi_arg (new_phi, arginfo[i].op,
4188 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4189 enum tree_code code
4190 = POINTER_TYPE_P (TREE_TYPE (op))
4191 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4192 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4193 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4194 widest_int cst
4195 = wi::mul (bestn->simdclone->args[i].linear_step,
4196 ncopies * nunits);
4197 tree tcst = wide_int_to_tree (type, cst);
b731b390 4198 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4199 new_stmt
4200 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4201 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4202 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4fbeb363 4203 loop_vinfo->add_stmt (new_stmt);
0136f8f0
AH
4204 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4205 UNKNOWN_LOCATION);
4206 arginfo[i].op = phi_res;
4207 vargs.safe_push (phi_res);
4208 }
4209 else
4210 {
4211 enum tree_code code
4212 = POINTER_TYPE_P (TREE_TYPE (op))
4213 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4214 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4215 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4216 widest_int cst
4217 = wi::mul (bestn->simdclone->args[i].linear_step,
4218 j * nunits);
4219 tree tcst = wide_int_to_tree (type, cst);
b731b390 4220 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4221 new_stmt = gimple_build_assign (new_temp, code,
4222 arginfo[i].op, tcst);
0136f8f0
AH
4223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4224 vargs.safe_push (new_temp);
4225 }
4226 break;
7adb26f2
JJ
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4228 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4229 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4233 default:
4234 gcc_unreachable ();
4235 }
4236 }
4237
4238 new_stmt = gimple_build_call_vec (fndecl, vargs);
4239 if (vec_dest)
4240 {
cf1b2ba4 4241 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4242 if (ratype)
b731b390 4243 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4244 else if (simd_clone_subparts (vectype)
4245 == simd_clone_subparts (rtype))
0136f8f0
AH
4246 new_temp = make_ssa_name (vec_dest, new_stmt);
4247 else
4248 new_temp = make_ssa_name (rtype, new_stmt);
4249 gimple_call_set_lhs (new_stmt, new_temp);
4250 }
4251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4252
4253 if (vec_dest)
4254 {
cf1b2ba4 4255 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4256 {
4257 unsigned int k, l;
73a699ae
RS
4258 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4259 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4260 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4261 gcc_assert ((k & (k - 1)) == 0);
4262 for (l = 0; l < k; l++)
4263 {
4264 tree t;
4265 if (ratype)
4266 {
4267 t = build_fold_addr_expr (new_temp);
4268 t = build2 (MEM_REF, vectype, t,
73a699ae 4269 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4270 }
4271 else
4272 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4273 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4274 new_stmt
b731b390 4275 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277 if (j == 0 && l == 0)
4278 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4279 else
4280 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4281
4282 prev_stmt_info = vinfo_for_stmt (new_stmt);
4283 }
4284
4285 if (ratype)
3ba4ff41 4286 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4287 continue;
4288 }
cf1b2ba4 4289 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4290 {
cf1b2ba4
RS
4291 unsigned int k = (simd_clone_subparts (vectype)
4292 / simd_clone_subparts (rtype));
0136f8f0
AH
4293 gcc_assert ((k & (k - 1)) == 0);
4294 if ((j & (k - 1)) == 0)
4295 vec_alloc (ret_ctor_elts, k);
4296 if (ratype)
4297 {
cf1b2ba4 4298 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4299 for (m = 0; m < o; m++)
4300 {
4301 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4302 size_int (m), NULL_TREE, NULL_TREE);
4303 new_stmt
b731b390 4304 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4306 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4307 gimple_assign_lhs (new_stmt));
4308 }
3ba4ff41 4309 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4310 }
4311 else
4312 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4313 if ((j & (k - 1)) != k - 1)
4314 continue;
4315 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4316 new_stmt
b731b390 4317 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4318 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4319
4320 if ((unsigned) j == k - 1)
4321 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4322 else
4323 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4324
4325 prev_stmt_info = vinfo_for_stmt (new_stmt);
4326 continue;
4327 }
4328 else if (ratype)
4329 {
4330 tree t = build_fold_addr_expr (new_temp);
4331 t = build2 (MEM_REF, vectype, t,
4332 build_int_cst (TREE_TYPE (t), 0));
4333 new_stmt
b731b390 4334 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4335 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4336 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4337 }
4338 }
4339
4340 if (j == 0)
4341 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4342 else
4343 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4344
4345 prev_stmt_info = vinfo_for_stmt (new_stmt);
4346 }
4347
4348 vargs.release ();
4349
4350 /* The call in STMT might prevent it from being removed in dce.
4351 We however cannot remove it here, due to the way the ssa name
4352 it defines is mapped to the new definition. So just replace
4353 rhs of the statement with something harmless. */
4354
4355 if (slp_node)
4356 return true;
4357
4358 if (scalar_dest)
4359 {
4360 type = TREE_TYPE (scalar_dest);
4361 if (is_pattern_stmt_p (stmt_info))
4362 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4363 else
4364 lhs = gimple_call_lhs (stmt);
4365 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4366 }
4367 else
4368 new_stmt = gimple_build_nop ();
4369 set_vinfo_for_stmt (new_stmt, stmt_info);
4370 set_vinfo_for_stmt (stmt, NULL);
4371 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4372 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4373 unlink_stmt_vdef (stmt);
4374
4375 return true;
4376}
4377
4378
ebfd146a
IR
4379/* Function vect_gen_widened_results_half
4380
4381 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4382 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4383 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4384 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4385 needs to be created (DECL is a function-decl of a target-builtin).
4386 STMT is the original scalar stmt that we are vectorizing. */
4387
355fe088 4388static gimple *
ebfd146a
IR
4389vect_gen_widened_results_half (enum tree_code code,
4390 tree decl,
4391 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4392 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4393 gimple *stmt)
b8698a0f 4394{
355fe088 4395 gimple *new_stmt;
b8698a0f
L
4396 tree new_temp;
4397
4398 /* Generate half of the widened result: */
4399 if (code == CALL_EXPR)
4400 {
4401 /* Target specific support */
ebfd146a
IR
4402 if (op_type == binary_op)
4403 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4404 else
4405 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4406 new_temp = make_ssa_name (vec_dest, new_stmt);
4407 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4408 }
4409 else
ebfd146a 4410 {
b8698a0f
L
4411 /* Generic support */
4412 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4413 if (op_type != binary_op)
4414 vec_oprnd1 = NULL;
0d0e4a03 4415 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4416 new_temp = make_ssa_name (vec_dest, new_stmt);
4417 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4418 }
ebfd146a
IR
4419 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4420
ebfd146a
IR
4421 return new_stmt;
4422}
4423
4a00c761
JJ
4424
4425/* Get vectorized definitions for loop-based vectorization. For the first
4426 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4427 scalar operand), and for the rest we get a copy with
4428 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4429 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4430 The vectors are collected into VEC_OPRNDS. */
4431
4432static void
355fe088 4433vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4434 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4435{
4436 tree vec_oprnd;
4437
4438 /* Get first vector operand. */
4439 /* All the vector operands except the very first one (that is scalar oprnd)
4440 are stmt copies. */
4441 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4442 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4443 else
4444 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4445
9771b263 4446 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4447
4448 /* Get second vector operand. */
4449 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4450 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4451
4452 *oprnd = vec_oprnd;
4453
4454 /* For conversion in multiple steps, continue to get operands
4455 recursively. */
4456 if (multi_step_cvt)
4457 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4458}
4459
4460
4461/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4462 For multi-step conversions store the resulting vectors and call the function
4463 recursively. */
4464
4465static void
9771b263 4466vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4467 int multi_step_cvt, gimple *stmt,
9771b263 4468 vec<tree> vec_dsts,
4a00c761
JJ
4469 gimple_stmt_iterator *gsi,
4470 slp_tree slp_node, enum tree_code code,
4471 stmt_vec_info *prev_stmt_info)
4472{
4473 unsigned int i;
4474 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4475 gimple *new_stmt;
4a00c761
JJ
4476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4477
9771b263 4478 vec_dest = vec_dsts.pop ();
4a00c761 4479
9771b263 4480 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4481 {
4482 /* Create demotion operation. */
9771b263
DN
4483 vop0 = (*vec_oprnds)[i];
4484 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4485 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4486 new_tmp = make_ssa_name (vec_dest, new_stmt);
4487 gimple_assign_set_lhs (new_stmt, new_tmp);
4488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4489
4490 if (multi_step_cvt)
4491 /* Store the resulting vector for next recursive call. */
9771b263 4492 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4493 else
4494 {
4495 /* This is the last step of the conversion sequence. Store the
4496 vectors in SLP_NODE or in vector info of the scalar statement
4497 (or in STMT_VINFO_RELATED_STMT chain). */
4498 if (slp_node)
9771b263 4499 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4500 else
c689ce1e
RB
4501 {
4502 if (!*prev_stmt_info)
4503 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4504 else
4505 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4506
c689ce1e
RB
4507 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4508 }
4a00c761
JJ
4509 }
4510 }
4511
4512 /* For multi-step demotion operations we first generate demotion operations
4513 from the source type to the intermediate types, and then combine the
4514 results (stored in VEC_OPRNDS) in demotion operation to the destination
4515 type. */
4516 if (multi_step_cvt)
4517 {
4518 /* At each level of recursion we have half of the operands we had at the
4519 previous level. */
9771b263 4520 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4521 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4522 stmt, vec_dsts, gsi, slp_node,
4523 VEC_PACK_TRUNC_EXPR,
4524 prev_stmt_info);
4525 }
4526
9771b263 4527 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4528}
4529
4530
4531/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4532 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4533 the resulting vectors and call the function recursively. */
4534
4535static void
9771b263
DN
4536vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4537 vec<tree> *vec_oprnds1,
355fe088 4538 gimple *stmt, tree vec_dest,
4a00c761
JJ
4539 gimple_stmt_iterator *gsi,
4540 enum tree_code code1,
4541 enum tree_code code2, tree decl1,
4542 tree decl2, int op_type)
4543{
4544 int i;
4545 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4546 gimple *new_stmt1, *new_stmt2;
6e1aa848 4547 vec<tree> vec_tmp = vNULL;
4a00c761 4548
9771b263
DN
4549 vec_tmp.create (vec_oprnds0->length () * 2);
4550 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4551 {
4552 if (op_type == binary_op)
9771b263 4553 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4554 else
4555 vop1 = NULL_TREE;
4556
4557 /* Generate the two halves of promotion operation. */
4558 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4559 op_type, vec_dest, gsi, stmt);
4560 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4561 op_type, vec_dest, gsi, stmt);
4562 if (is_gimple_call (new_stmt1))
4563 {
4564 new_tmp1 = gimple_call_lhs (new_stmt1);
4565 new_tmp2 = gimple_call_lhs (new_stmt2);
4566 }
4567 else
4568 {
4569 new_tmp1 = gimple_assign_lhs (new_stmt1);
4570 new_tmp2 = gimple_assign_lhs (new_stmt2);
4571 }
4572
4573 /* Store the results for the next step. */
9771b263
DN
4574 vec_tmp.quick_push (new_tmp1);
4575 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4576 }
4577
689eaba3 4578 vec_oprnds0->release ();
4a00c761
JJ
4579 *vec_oprnds0 = vec_tmp;
4580}
4581
4582
b8698a0f
L
4583/* Check if STMT performs a conversion operation, that can be vectorized.
4584 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4585 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4586 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4587
4588static bool
355fe088 4589vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4590 gimple **vec_stmt, slp_tree slp_node,
4591 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4592{
4593 tree vec_dest;
4594 tree scalar_dest;
4a00c761 4595 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4596 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4599 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4600 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4601 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4602 tree new_temp;
ebfd146a 4603 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4604 int ndts = 2;
355fe088 4605 gimple *new_stmt = NULL;
ebfd146a 4606 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4607 poly_uint64 nunits_in;
4608 poly_uint64 nunits_out;
ebfd146a 4609 tree vectype_out, vectype_in;
4a00c761
JJ
4610 int ncopies, i, j;
4611 tree lhs_type, rhs_type;
ebfd146a 4612 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4613 vec<tree> vec_oprnds0 = vNULL;
4614 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4615 tree vop0;
4a00c761 4616 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4617 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4618 int multi_step_cvt = 0;
6e1aa848 4619 vec<tree> interm_types = vNULL;
4a00c761
JJ
4620 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4621 int op_type;
4a00c761 4622 unsigned short fltsz;
ebfd146a
IR
4623
4624 /* Is STMT a vectorizable conversion? */
4625
4a00c761 4626 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4627 return false;
4628
66c16fd9
RB
4629 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4630 && ! vec_stmt)
ebfd146a
IR
4631 return false;
4632
4633 if (!is_gimple_assign (stmt))
4634 return false;
4635
4636 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4637 return false;
4638
4639 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4640 if (!CONVERT_EXPR_CODE_P (code)
4641 && code != FIX_TRUNC_EXPR
4642 && code != FLOAT_EXPR
4643 && code != WIDEN_MULT_EXPR
4644 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4645 return false;
4646
4a00c761
JJ
4647 op_type = TREE_CODE_LENGTH (code);
4648
ebfd146a 4649 /* Check types of lhs and rhs. */
b690cc0f 4650 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4651 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4652 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4653
ebfd146a
IR
4654 op0 = gimple_assign_rhs1 (stmt);
4655 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4656
4657 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4658 && !((INTEGRAL_TYPE_P (lhs_type)
4659 && INTEGRAL_TYPE_P (rhs_type))
4660 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4661 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4662 return false;
4663
e6f5c25d
IE
4664 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4665 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4666 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4667 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4668 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4669 {
73fbfcad 4670 if (dump_enabled_p ())
78c60e3d 4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4672 "type conversion to/from bit-precision unsupported."
4673 "\n");
4a00c761
JJ
4674 return false;
4675 }
4676
b690cc0f 4677 /* Check the operands of the operation. */
894dd753 4678 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4679 {
73fbfcad 4680 if (dump_enabled_p ())
78c60e3d 4681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4682 "use not simple.\n");
b690cc0f
RG
4683 return false;
4684 }
4a00c761
JJ
4685 if (op_type == binary_op)
4686 {
4687 bool ok;
4688
4689 op1 = gimple_assign_rhs2 (stmt);
4690 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4691 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4692 OP1. */
4693 if (CONSTANT_CLASS_P (op0))
894dd753 4694 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4695 else
894dd753 4696 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4697
4698 if (!ok)
4699 {
73fbfcad 4700 if (dump_enabled_p ())
78c60e3d 4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4702 "use not simple.\n");
4a00c761
JJ
4703 return false;
4704 }
4705 }
4706
b690cc0f
RG
4707 /* If op0 is an external or constant defs use a vector type of
4708 the same size as the output vector type. */
ebfd146a 4709 if (!vectype_in)
b690cc0f 4710 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4711 if (vec_stmt)
4712 gcc_assert (vectype_in);
4713 if (!vectype_in)
4714 {
73fbfcad 4715 if (dump_enabled_p ())
4a00c761 4716 {
78c60e3d
SS
4717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4718 "no vectype for scalar type ");
4719 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4720 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4721 }
7d8930a0
IR
4722
4723 return false;
4724 }
ebfd146a 4725
e6f5c25d
IE
4726 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4727 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4728 {
4729 if (dump_enabled_p ())
4730 {
4731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4732 "can't convert between boolean and non "
4733 "boolean vectors");
4734 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4735 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4736 }
4737
4738 return false;
4739 }
4740
b690cc0f
RG
4741 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4742 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4743 if (known_eq (nunits_out, nunits_in))
ebfd146a 4744 modifier = NONE;
062d5ccc
RS
4745 else if (multiple_p (nunits_out, nunits_in))
4746 modifier = NARROW;
ebfd146a 4747 else
062d5ccc
RS
4748 {
4749 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4750 modifier = WIDEN;
4751 }
ebfd146a 4752
ff802fa1
IR
4753 /* Multiple types in SLP are handled by creating the appropriate number of
4754 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4755 case of SLP. */
fce57248 4756 if (slp_node)
ebfd146a 4757 ncopies = 1;
4a00c761 4758 else if (modifier == NARROW)
e8f142e2 4759 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4760 else
e8f142e2 4761 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4762
ebfd146a
IR
4763 /* Sanity check: make sure that at least one copy of the vectorized stmt
4764 needs to be generated. */
4765 gcc_assert (ncopies >= 1);
4766
16d22000
RS
4767 bool found_mode = false;
4768 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4769 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4770 opt_scalar_mode rhs_mode_iter;
b397965c 4771
ebfd146a 4772 /* Supportable by target? */
4a00c761 4773 switch (modifier)
ebfd146a 4774 {
4a00c761
JJ
4775 case NONE:
4776 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4777 return false;
4778 if (supportable_convert_operation (code, vectype_out, vectype_in,
4779 &decl1, &code1))
4780 break;
4781 /* FALLTHRU */
4782 unsupported:
73fbfcad 4783 if (dump_enabled_p ())
78c60e3d 4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4785 "conversion not supported by target.\n");
ebfd146a 4786 return false;
ebfd146a 4787
4a00c761
JJ
4788 case WIDEN:
4789 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4790 &code1, &code2, &multi_step_cvt,
4791 &interm_types))
4a00c761
JJ
4792 {
4793 /* Binary widening operation can only be supported directly by the
4794 architecture. */
4795 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4796 break;
4797 }
4798
4799 if (code != FLOAT_EXPR
b397965c 4800 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4801 goto unsupported;
4802
b397965c 4803 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4804 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4805 {
16d22000 4806 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4807 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4808 break;
4809
4a00c761
JJ
4810 cvt_type
4811 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4812 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4813 if (cvt_type == NULL_TREE)
4814 goto unsupported;
4815
4816 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4817 {
4818 if (!supportable_convert_operation (code, vectype_out,
4819 cvt_type, &decl1, &codecvt1))
4820 goto unsupported;
4821 }
4822 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4823 cvt_type, &codecvt1,
4824 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4825 &interm_types))
4826 continue;
4827 else
4828 gcc_assert (multi_step_cvt == 0);
4829
4830 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4831 vectype_in, &code1, &code2,
4832 &multi_step_cvt, &interm_types))
16d22000
RS
4833 {
4834 found_mode = true;
4835 break;
4836 }
4a00c761
JJ
4837 }
4838
16d22000 4839 if (!found_mode)
4a00c761
JJ
4840 goto unsupported;
4841
4842 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4843 codecvt2 = ERROR_MARK;
4844 else
4845 {
4846 multi_step_cvt++;
9771b263 4847 interm_types.safe_push (cvt_type);
4a00c761
JJ
4848 cvt_type = NULL_TREE;
4849 }
4850 break;
4851
4852 case NARROW:
4853 gcc_assert (op_type == unary_op);
4854 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4855 &code1, &multi_step_cvt,
4856 &interm_types))
4857 break;
4858
4859 if (code != FIX_TRUNC_EXPR
b397965c 4860 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4861 goto unsupported;
4862
4a00c761
JJ
4863 cvt_type
4864 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4865 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4866 if (cvt_type == NULL_TREE)
4867 goto unsupported;
4868 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4869 &decl1, &codecvt1))
4870 goto unsupported;
4871 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4872 &code1, &multi_step_cvt,
4873 &interm_types))
4874 break;
4875 goto unsupported;
4876
4877 default:
4878 gcc_unreachable ();
ebfd146a
IR
4879 }
4880
4881 if (!vec_stmt) /* transformation not required. */
4882 {
adac3a68 4883 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4884 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4885 {
4886 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4887 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4888 cost_vec);
8bd37302 4889 }
4a00c761
JJ
4890 else if (modifier == NARROW)
4891 {
4892 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4893 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4894 cost_vec);
4a00c761
JJ
4895 }
4896 else
4897 {
4898 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4899 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4900 cost_vec);
4a00c761 4901 }
9771b263 4902 interm_types.release ();
ebfd146a
IR
4903 return true;
4904 }
4905
67b8dbac 4906 /* Transform. */
73fbfcad 4907 if (dump_enabled_p ())
78c60e3d 4908 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4909 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4910
4a00c761
JJ
4911 if (op_type == binary_op)
4912 {
4913 if (CONSTANT_CLASS_P (op0))
4914 op0 = fold_convert (TREE_TYPE (op1), op0);
4915 else if (CONSTANT_CLASS_P (op1))
4916 op1 = fold_convert (TREE_TYPE (op0), op1);
4917 }
4918
4919 /* In case of multi-step conversion, we first generate conversion operations
4920 to the intermediate types, and then from that types to the final one.
4921 We create vector destinations for the intermediate type (TYPES) received
4922 from supportable_*_operation, and store them in the correct order
4923 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4924 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4925 vec_dest = vect_create_destination_var (scalar_dest,
4926 (cvt_type && modifier == WIDEN)
4927 ? cvt_type : vectype_out);
9771b263 4928 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4929
4930 if (multi_step_cvt)
4931 {
9771b263
DN
4932 for (i = interm_types.length () - 1;
4933 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4934 {
4935 vec_dest = vect_create_destination_var (scalar_dest,
4936 intermediate_type);
9771b263 4937 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4938 }
4939 }
ebfd146a 4940
4a00c761 4941 if (cvt_type)
82294ec1
JJ
4942 vec_dest = vect_create_destination_var (scalar_dest,
4943 modifier == WIDEN
4944 ? vectype_out : cvt_type);
4a00c761
JJ
4945
4946 if (!slp_node)
4947 {
30862efc 4948 if (modifier == WIDEN)
4a00c761 4949 {
c3284718 4950 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4951 if (op_type == binary_op)
9771b263 4952 vec_oprnds1.create (1);
4a00c761 4953 }
30862efc 4954 else if (modifier == NARROW)
9771b263
DN
4955 vec_oprnds0.create (
4956 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4957 }
4958 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4959 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4960
4a00c761 4961 last_oprnd = op0;
ebfd146a
IR
4962 prev_stmt_info = NULL;
4963 switch (modifier)
4964 {
4965 case NONE:
4966 for (j = 0; j < ncopies; j++)
4967 {
ebfd146a 4968 if (j == 0)
306b0c92 4969 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4970 else
4971 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4972
9771b263 4973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4974 {
4975 /* Arguments are ready, create the new vector stmt. */
4976 if (code1 == CALL_EXPR)
4977 {
4978 new_stmt = gimple_build_call (decl1, 1, vop0);
4979 new_temp = make_ssa_name (vec_dest, new_stmt);
4980 gimple_call_set_lhs (new_stmt, new_temp);
4981 }
4982 else
4983 {
4984 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4985 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4986 new_temp = make_ssa_name (vec_dest, new_stmt);
4987 gimple_assign_set_lhs (new_stmt, new_temp);
4988 }
4989
4990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4991 if (slp_node)
9771b263 4992 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4993 else
4994 {
4995 if (!prev_stmt_info)
4996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4997 else
4998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4999 prev_stmt_info = vinfo_for_stmt (new_stmt);
5000 }
4a00c761 5001 }
ebfd146a
IR
5002 }
5003 break;
5004
5005 case WIDEN:
5006 /* In case the vectorization factor (VF) is bigger than the number
5007 of elements that we can fit in a vectype (nunits), we have to
5008 generate more than one vector stmt - i.e - we need to "unroll"
5009 the vector stmt by a factor VF/nunits. */
5010 for (j = 0; j < ncopies; j++)
5011 {
4a00c761 5012 /* Handle uses. */
ebfd146a 5013 if (j == 0)
4a00c761
JJ
5014 {
5015 if (slp_node)
5016 {
5017 if (code == WIDEN_LSHIFT_EXPR)
5018 {
5019 unsigned int k;
ebfd146a 5020
4a00c761
JJ
5021 vec_oprnd1 = op1;
5022 /* Store vec_oprnd1 for every vector stmt to be created
5023 for SLP_NODE. We check during the analysis that all
5024 the shift arguments are the same. */
5025 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5026 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5027
5028 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5029 slp_node);
4a00c761
JJ
5030 }
5031 else
5032 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 5033 &vec_oprnds1, slp_node);
4a00c761
JJ
5034 }
5035 else
5036 {
81c40241 5037 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5038 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5039 if (op_type == binary_op)
5040 {
5041 if (code == WIDEN_LSHIFT_EXPR)
5042 vec_oprnd1 = op1;
5043 else
81c40241 5044 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5045 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5046 }
5047 }
5048 }
ebfd146a 5049 else
4a00c761
JJ
5050 {
5051 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5052 vec_oprnds0.truncate (0);
5053 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5054 if (op_type == binary_op)
5055 {
5056 if (code == WIDEN_LSHIFT_EXPR)
5057 vec_oprnd1 = op1;
5058 else
5059 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5060 vec_oprnd1);
9771b263
DN
5061 vec_oprnds1.truncate (0);
5062 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5063 }
5064 }
ebfd146a 5065
4a00c761
JJ
5066 /* Arguments are ready. Create the new vector stmts. */
5067 for (i = multi_step_cvt; i >= 0; i--)
5068 {
9771b263 5069 tree this_dest = vec_dsts[i];
4a00c761
JJ
5070 enum tree_code c1 = code1, c2 = code2;
5071 if (i == 0 && codecvt2 != ERROR_MARK)
5072 {
5073 c1 = codecvt1;
5074 c2 = codecvt2;
5075 }
5076 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5077 &vec_oprnds1,
5078 stmt, this_dest, gsi,
5079 c1, c2, decl1, decl2,
5080 op_type);
5081 }
5082
9771b263 5083 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5084 {
5085 if (cvt_type)
5086 {
5087 if (codecvt1 == CALL_EXPR)
5088 {
5089 new_stmt = gimple_build_call (decl1, 1, vop0);
5090 new_temp = make_ssa_name (vec_dest, new_stmt);
5091 gimple_call_set_lhs (new_stmt, new_temp);
5092 }
5093 else
5094 {
5095 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5096 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5097 new_stmt = gimple_build_assign (new_temp, codecvt1,
5098 vop0);
4a00c761
JJ
5099 }
5100
5101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5102 }
5103 else
5104 new_stmt = SSA_NAME_DEF_STMT (vop0);
5105
5106 if (slp_node)
9771b263 5107 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5108 else
c689ce1e
RB
5109 {
5110 if (!prev_stmt_info)
5111 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5112 else
5113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5114 prev_stmt_info = vinfo_for_stmt (new_stmt);
5115 }
4a00c761 5116 }
ebfd146a 5117 }
4a00c761
JJ
5118
5119 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5120 break;
5121
5122 case NARROW:
5123 /* In case the vectorization factor (VF) is bigger than the number
5124 of elements that we can fit in a vectype (nunits), we have to
5125 generate more than one vector stmt - i.e - we need to "unroll"
5126 the vector stmt by a factor VF/nunits. */
5127 for (j = 0; j < ncopies; j++)
5128 {
5129 /* Handle uses. */
4a00c761
JJ
5130 if (slp_node)
5131 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5132 slp_node);
ebfd146a
IR
5133 else
5134 {
9771b263 5135 vec_oprnds0.truncate (0);
4a00c761
JJ
5136 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5137 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5138 }
5139
4a00c761
JJ
5140 /* Arguments are ready. Create the new vector stmts. */
5141 if (cvt_type)
9771b263 5142 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5143 {
5144 if (codecvt1 == CALL_EXPR)
5145 {
5146 new_stmt = gimple_build_call (decl1, 1, vop0);
5147 new_temp = make_ssa_name (vec_dest, new_stmt);
5148 gimple_call_set_lhs (new_stmt, new_temp);
5149 }
5150 else
5151 {
5152 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5153 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5154 new_stmt = gimple_build_assign (new_temp, codecvt1,
5155 vop0);
4a00c761 5156 }
ebfd146a 5157
4a00c761 5158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5159 vec_oprnds0[i] = new_temp;
4a00c761 5160 }
ebfd146a 5161
4a00c761
JJ
5162 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5163 stmt, vec_dsts, gsi,
5164 slp_node, code1,
5165 &prev_stmt_info);
ebfd146a
IR
5166 }
5167
5168 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5169 break;
ebfd146a
IR
5170 }
5171
9771b263
DN
5172 vec_oprnds0.release ();
5173 vec_oprnds1.release ();
9771b263 5174 interm_types.release ();
ebfd146a
IR
5175
5176 return true;
5177}
ff802fa1
IR
5178
5179
ebfd146a
IR
5180/* Function vectorizable_assignment.
5181
b8698a0f
L
5182 Check if STMT performs an assignment (copy) that can be vectorized.
5183 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5184 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5185 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5186
5187static bool
355fe088 5188vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5189 gimple **vec_stmt, slp_tree slp_node,
5190 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5191{
5192 tree vec_dest;
5193 tree scalar_dest;
5194 tree op;
5195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5197 tree new_temp;
4fc5ebf1
JG
5198 enum vect_def_type dt[1] = {vect_unknown_def_type};
5199 int ndts = 1;
ebfd146a 5200 int ncopies;
f18b55bd 5201 int i, j;
6e1aa848 5202 vec<tree> vec_oprnds = vNULL;
ebfd146a 5203 tree vop;
a70d6342 5204 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5205 vec_info *vinfo = stmt_info->vinfo;
355fe088 5206 gimple *new_stmt = NULL;
f18b55bd 5207 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5208 enum tree_code code;
5209 tree vectype_in;
ebfd146a 5210
a70d6342 5211 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5212 return false;
5213
66c16fd9
RB
5214 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5215 && ! vec_stmt)
ebfd146a
IR
5216 return false;
5217
5218 /* Is vectorizable assignment? */
5219 if (!is_gimple_assign (stmt))
5220 return false;
5221
5222 scalar_dest = gimple_assign_lhs (stmt);
5223 if (TREE_CODE (scalar_dest) != SSA_NAME)
5224 return false;
5225
fde9c428 5226 code = gimple_assign_rhs_code (stmt);
ebfd146a 5227 if (gimple_assign_single_p (stmt)
fde9c428
RG
5228 || code == PAREN_EXPR
5229 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5230 op = gimple_assign_rhs1 (stmt);
5231 else
5232 return false;
5233
7b7ec6c5
RG
5234 if (code == VIEW_CONVERT_EXPR)
5235 op = TREE_OPERAND (op, 0);
5236
465c8c19 5237 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5238 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5239
5240 /* Multiple types in SLP are handled by creating the appropriate number of
5241 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5242 case of SLP. */
fce57248 5243 if (slp_node)
465c8c19
JJ
5244 ncopies = 1;
5245 else
e8f142e2 5246 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5247
5248 gcc_assert (ncopies >= 1);
5249
894dd753 5250 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5251 {
73fbfcad 5252 if (dump_enabled_p ())
78c60e3d 5253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5254 "use not simple.\n");
ebfd146a
IR
5255 return false;
5256 }
5257
fde9c428
RG
5258 /* We can handle NOP_EXPR conversions that do not change the number
5259 of elements or the vector size. */
7b7ec6c5
RG
5260 if ((CONVERT_EXPR_CODE_P (code)
5261 || code == VIEW_CONVERT_EXPR)
fde9c428 5262 && (!vectype_in
928686b1 5263 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5264 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5265 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5266 return false;
5267
7b7b1813
RG
5268 /* We do not handle bit-precision changes. */
5269 if ((CONVERT_EXPR_CODE_P (code)
5270 || code == VIEW_CONVERT_EXPR)
5271 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5272 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5273 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5274 /* But a conversion that does not change the bit-pattern is ok. */
5275 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5276 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5277 && TYPE_UNSIGNED (TREE_TYPE (op)))
5278 /* Conversion between boolean types of different sizes is
5279 a simple assignment in case their vectypes are same
5280 boolean vectors. */
5281 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5282 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5283 {
73fbfcad 5284 if (dump_enabled_p ())
78c60e3d
SS
5285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5286 "type conversion to/from bit-precision "
e645e942 5287 "unsupported.\n");
7b7b1813
RG
5288 return false;
5289 }
5290
ebfd146a
IR
5291 if (!vec_stmt) /* transformation not required. */
5292 {
5293 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5294 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5295 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5296 return true;
5297 }
5298
67b8dbac 5299 /* Transform. */
73fbfcad 5300 if (dump_enabled_p ())
e645e942 5301 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5302
5303 /* Handle def. */
5304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5305
5306 /* Handle use. */
f18b55bd 5307 for (j = 0; j < ncopies; j++)
ebfd146a 5308 {
f18b55bd
IR
5309 /* Handle uses. */
5310 if (j == 0)
306b0c92 5311 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5312 else
5313 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5314
5315 /* Arguments are ready. create the new vector stmt. */
9771b263 5316 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5317 {
7b7ec6c5
RG
5318 if (CONVERT_EXPR_CODE_P (code)
5319 || code == VIEW_CONVERT_EXPR)
4a73490d 5320 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5321 new_stmt = gimple_build_assign (vec_dest, vop);
5322 new_temp = make_ssa_name (vec_dest, new_stmt);
5323 gimple_assign_set_lhs (new_stmt, new_temp);
5324 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5325 if (slp_node)
9771b263 5326 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5327 }
ebfd146a
IR
5328
5329 if (slp_node)
f18b55bd
IR
5330 continue;
5331
5332 if (j == 0)
5333 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5334 else
5335 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5336
5337 prev_stmt_info = vinfo_for_stmt (new_stmt);
5338 }
b8698a0f 5339
9771b263 5340 vec_oprnds.release ();
ebfd146a
IR
5341 return true;
5342}
5343
9dc3f7de 5344
1107f3ae
IR
5345/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5346 either as shift by a scalar or by a vector. */
5347
5348bool
5349vect_supportable_shift (enum tree_code code, tree scalar_type)
5350{
5351
ef4bddc2 5352 machine_mode vec_mode;
1107f3ae
IR
5353 optab optab;
5354 int icode;
5355 tree vectype;
5356
5357 vectype = get_vectype_for_scalar_type (scalar_type);
5358 if (!vectype)
5359 return false;
5360
5361 optab = optab_for_tree_code (code, vectype, optab_scalar);
5362 if (!optab
5363 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5364 {
5365 optab = optab_for_tree_code (code, vectype, optab_vector);
5366 if (!optab
5367 || (optab_handler (optab, TYPE_MODE (vectype))
5368 == CODE_FOR_nothing))
5369 return false;
5370 }
5371
5372 vec_mode = TYPE_MODE (vectype);
5373 icode = (int) optab_handler (optab, vec_mode);
5374 if (icode == CODE_FOR_nothing)
5375 return false;
5376
5377 return true;
5378}
5379
5380
9dc3f7de
IR
5381/* Function vectorizable_shift.
5382
5383 Check if STMT performs a shift operation that can be vectorized.
5384 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5385 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5386 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5387
5388static bool
355fe088 5389vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5390 gimple **vec_stmt, slp_tree slp_node,
5391 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5392{
5393 tree vec_dest;
5394 tree scalar_dest;
5395 tree op0, op1 = NULL;
5396 tree vec_oprnd1 = NULL_TREE;
5397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5398 tree vectype;
5399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5400 enum tree_code code;
ef4bddc2 5401 machine_mode vec_mode;
9dc3f7de
IR
5402 tree new_temp;
5403 optab optab;
5404 int icode;
ef4bddc2 5405 machine_mode optab_op2_mode;
9dc3f7de 5406 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5407 int ndts = 2;
355fe088 5408 gimple *new_stmt = NULL;
9dc3f7de 5409 stmt_vec_info prev_stmt_info;
928686b1
RS
5410 poly_uint64 nunits_in;
5411 poly_uint64 nunits_out;
9dc3f7de 5412 tree vectype_out;
cede2577 5413 tree op1_vectype;
9dc3f7de
IR
5414 int ncopies;
5415 int j, i;
6e1aa848
DN
5416 vec<tree> vec_oprnds0 = vNULL;
5417 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5418 tree vop0, vop1;
5419 unsigned int k;
49eab32e 5420 bool scalar_shift_arg = true;
9dc3f7de 5421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5422 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5423
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5425 return false;
5426
66c16fd9
RB
5427 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5428 && ! vec_stmt)
9dc3f7de
IR
5429 return false;
5430
5431 /* Is STMT a vectorizable binary/unary operation? */
5432 if (!is_gimple_assign (stmt))
5433 return false;
5434
5435 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5436 return false;
5437
5438 code = gimple_assign_rhs_code (stmt);
5439
5440 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5441 || code == RROTATE_EXPR))
5442 return false;
5443
5444 scalar_dest = gimple_assign_lhs (stmt);
5445 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5446 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5447 {
73fbfcad 5448 if (dump_enabled_p ())
78c60e3d 5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5450 "bit-precision shifts not supported.\n");
7b7b1813
RG
5451 return false;
5452 }
9dc3f7de
IR
5453
5454 op0 = gimple_assign_rhs1 (stmt);
894dd753 5455 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5456 {
73fbfcad 5457 if (dump_enabled_p ())
78c60e3d 5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5459 "use not simple.\n");
9dc3f7de
IR
5460 return false;
5461 }
5462 /* If op0 is an external or constant def use a vector type with
5463 the same size as the output vector type. */
5464 if (!vectype)
5465 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5466 if (vec_stmt)
5467 gcc_assert (vectype);
5468 if (!vectype)
5469 {
73fbfcad 5470 if (dump_enabled_p ())
78c60e3d 5471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5472 "no vectype for scalar type\n");
9dc3f7de
IR
5473 return false;
5474 }
5475
5476 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5477 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5478 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5479 return false;
5480
5481 op1 = gimple_assign_rhs2 (stmt);
894dd753 5482 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype))
9dc3f7de 5483 {
73fbfcad 5484 if (dump_enabled_p ())
78c60e3d 5485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5486 "use not simple.\n");
9dc3f7de
IR
5487 return false;
5488 }
5489
9dc3f7de
IR
5490 /* Multiple types in SLP are handled by creating the appropriate number of
5491 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5492 case of SLP. */
fce57248 5493 if (slp_node)
9dc3f7de
IR
5494 ncopies = 1;
5495 else
e8f142e2 5496 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5497
5498 gcc_assert (ncopies >= 1);
5499
5500 /* Determine whether the shift amount is a vector, or scalar. If the
5501 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5502
dbfa87aa
YR
5503 if ((dt[1] == vect_internal_def
5504 || dt[1] == vect_induction_def)
5505 && !slp_node)
49eab32e
JJ
5506 scalar_shift_arg = false;
5507 else if (dt[1] == vect_constant_def
5508 || dt[1] == vect_external_def
5509 || dt[1] == vect_internal_def)
5510 {
5511 /* In SLP, need to check whether the shift count is the same,
5512 in loops if it is a constant or invariant, it is always
5513 a scalar shift. */
5514 if (slp_node)
5515 {
355fe088
TS
5516 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5517 gimple *slpstmt;
49eab32e 5518
9771b263 5519 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5520 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5521 scalar_shift_arg = false;
5522 }
60d393e8
RB
5523
5524 /* If the shift amount is computed by a pattern stmt we cannot
5525 use the scalar amount directly thus give up and use a vector
5526 shift. */
5527 if (dt[1] == vect_internal_def)
5528 {
5529 gimple *def = SSA_NAME_DEF_STMT (op1);
5530 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5531 scalar_shift_arg = false;
5532 }
49eab32e
JJ
5533 }
5534 else
5535 {
73fbfcad 5536 if (dump_enabled_p ())
78c60e3d 5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5538 "operand mode requires invariant argument.\n");
49eab32e
JJ
5539 return false;
5540 }
5541
9dc3f7de 5542 /* Vector shifted by vector. */
49eab32e 5543 if (!scalar_shift_arg)
9dc3f7de
IR
5544 {
5545 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5546 if (dump_enabled_p ())
78c60e3d 5547 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5548 "vector/vector shift/rotate found.\n");
78c60e3d 5549
aa948027
JJ
5550 if (!op1_vectype)
5551 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5552 if (op1_vectype == NULL_TREE
5553 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5554 {
73fbfcad 5555 if (dump_enabled_p ())
78c60e3d
SS
5556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5557 "unusable type for last operand in"
e645e942 5558 " vector/vector shift/rotate.\n");
cede2577
JJ
5559 return false;
5560 }
9dc3f7de
IR
5561 }
5562 /* See if the machine has a vector shifted by scalar insn and if not
5563 then see if it has a vector shifted by vector insn. */
49eab32e 5564 else
9dc3f7de
IR
5565 {
5566 optab = optab_for_tree_code (code, vectype, optab_scalar);
5567 if (optab
5568 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5569 {
73fbfcad 5570 if (dump_enabled_p ())
78c60e3d 5571 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5572 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5573 }
5574 else
5575 {
5576 optab = optab_for_tree_code (code, vectype, optab_vector);
5577 if (optab
5578 && (optab_handler (optab, TYPE_MODE (vectype))
5579 != CODE_FOR_nothing))
5580 {
49eab32e
JJ
5581 scalar_shift_arg = false;
5582
73fbfcad 5583 if (dump_enabled_p ())
78c60e3d 5584 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5585 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5586
5587 /* Unlike the other binary operators, shifts/rotates have
5588 the rhs being int, instead of the same type as the lhs,
5589 so make sure the scalar is the right type if we are
aa948027 5590 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5591 if (dt[1] == vect_constant_def)
5592 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5593 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5594 TREE_TYPE (op1)))
5595 {
5596 if (slp_node
5597 && TYPE_MODE (TREE_TYPE (vectype))
5598 != TYPE_MODE (TREE_TYPE (op1)))
5599 {
73fbfcad 5600 if (dump_enabled_p ())
78c60e3d
SS
5601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5602 "unusable type for last operand in"
e645e942 5603 " vector/vector shift/rotate.\n");
21c0a521 5604 return false;
aa948027
JJ
5605 }
5606 if (vec_stmt && !slp_node)
5607 {
5608 op1 = fold_convert (TREE_TYPE (vectype), op1);
5609 op1 = vect_init_vector (stmt, op1,
5610 TREE_TYPE (vectype), NULL);
5611 }
5612 }
9dc3f7de
IR
5613 }
5614 }
5615 }
9dc3f7de
IR
5616
5617 /* Supportable by target? */
5618 if (!optab)
5619 {
73fbfcad 5620 if (dump_enabled_p ())
78c60e3d 5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5622 "no optab.\n");
9dc3f7de
IR
5623 return false;
5624 }
5625 vec_mode = TYPE_MODE (vectype);
5626 icode = (int) optab_handler (optab, vec_mode);
5627 if (icode == CODE_FOR_nothing)
5628 {
73fbfcad 5629 if (dump_enabled_p ())
78c60e3d 5630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5631 "op not supported by target.\n");
9dc3f7de 5632 /* Check only during analysis. */
cf098191 5633 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5634 || (!vec_stmt
5635 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5636 return false;
73fbfcad 5637 if (dump_enabled_p ())
e645e942
TJ
5638 dump_printf_loc (MSG_NOTE, vect_location,
5639 "proceeding using word mode.\n");
9dc3f7de
IR
5640 }
5641
5642 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5643 if (!vec_stmt
5644 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5645 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5646 {
73fbfcad 5647 if (dump_enabled_p ())
78c60e3d 5648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5649 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5650 return false;
5651 }
5652
5653 if (!vec_stmt) /* transformation not required. */
5654 {
5655 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5656 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5657 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5658 return true;
5659 }
5660
67b8dbac 5661 /* Transform. */
9dc3f7de 5662
73fbfcad 5663 if (dump_enabled_p ())
78c60e3d 5664 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5665 "transform binary/unary operation.\n");
9dc3f7de
IR
5666
5667 /* Handle def. */
5668 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5669
9dc3f7de
IR
5670 prev_stmt_info = NULL;
5671 for (j = 0; j < ncopies; j++)
5672 {
5673 /* Handle uses. */
5674 if (j == 0)
5675 {
5676 if (scalar_shift_arg)
5677 {
5678 /* Vector shl and shr insn patterns can be defined with scalar
5679 operand 2 (shift operand). In this case, use constant or loop
5680 invariant op1 directly, without extending it to vector mode
5681 first. */
5682 optab_op2_mode = insn_data[icode].operand[2].mode;
5683 if (!VECTOR_MODE_P (optab_op2_mode))
5684 {
73fbfcad 5685 if (dump_enabled_p ())
78c60e3d 5686 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5687 "operand 1 using scalar mode.\n");
9dc3f7de 5688 vec_oprnd1 = op1;
8930f723 5689 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5690 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5691 if (slp_node)
5692 {
5693 /* Store vec_oprnd1 for every vector stmt to be created
5694 for SLP_NODE. We check during the analysis that all
5695 the shift arguments are the same.
5696 TODO: Allow different constants for different vector
5697 stmts generated for an SLP instance. */
5698 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5699 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5700 }
5701 }
5702 }
5703
5704 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5705 (a special case for certain kind of vector shifts); otherwise,
5706 operand 1 should be of a vector type (the usual case). */
5707 if (vec_oprnd1)
5708 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5709 slp_node);
9dc3f7de
IR
5710 else
5711 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5712 slp_node);
9dc3f7de
IR
5713 }
5714 else
5715 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5716
5717 /* Arguments are ready. Create the new vector stmt. */
9771b263 5718 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5719 {
9771b263 5720 vop1 = vec_oprnds1[i];
0d0e4a03 5721 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5722 new_temp = make_ssa_name (vec_dest, new_stmt);
5723 gimple_assign_set_lhs (new_stmt, new_temp);
5724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5725 if (slp_node)
9771b263 5726 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5727 }
5728
5729 if (slp_node)
5730 continue;
5731
5732 if (j == 0)
5733 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5734 else
5735 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5736 prev_stmt_info = vinfo_for_stmt (new_stmt);
5737 }
5738
9771b263
DN
5739 vec_oprnds0.release ();
5740 vec_oprnds1.release ();
9dc3f7de
IR
5741
5742 return true;
5743}
5744
5745
ebfd146a
IR
5746/* Function vectorizable_operation.
5747
16949072
RG
5748 Check if STMT performs a binary, unary or ternary operation that can
5749 be vectorized.
b8698a0f 5750 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5751 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5752 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5753
5754static bool
355fe088 5755vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5756 gimple **vec_stmt, slp_tree slp_node,
5757 stmt_vector_for_cost *cost_vec)
ebfd146a 5758{
00f07b86 5759 tree vec_dest;
ebfd146a 5760 tree scalar_dest;
16949072 5761 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5762 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5763 tree vectype;
ebfd146a 5764 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5765 enum tree_code code, orig_code;
ef4bddc2 5766 machine_mode vec_mode;
ebfd146a
IR
5767 tree new_temp;
5768 int op_type;
00f07b86 5769 optab optab;
523ba738 5770 bool target_support_p;
16949072
RG
5771 enum vect_def_type dt[3]
5772 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5773 int ndts = 3;
355fe088 5774 gimple *new_stmt = NULL;
ebfd146a 5775 stmt_vec_info prev_stmt_info;
928686b1
RS
5776 poly_uint64 nunits_in;
5777 poly_uint64 nunits_out;
ebfd146a
IR
5778 tree vectype_out;
5779 int ncopies;
5780 int j, i;
6e1aa848
DN
5781 vec<tree> vec_oprnds0 = vNULL;
5782 vec<tree> vec_oprnds1 = vNULL;
5783 vec<tree> vec_oprnds2 = vNULL;
16949072 5784 tree vop0, vop1, vop2;
a70d6342 5785 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5786 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5787
a70d6342 5788 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5789 return false;
5790
66c16fd9
RB
5791 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5792 && ! vec_stmt)
ebfd146a
IR
5793 return false;
5794
5795 /* Is STMT a vectorizable binary/unary operation? */
5796 if (!is_gimple_assign (stmt))
5797 return false;
5798
5799 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5800 return false;
5801
0eb952ea 5802 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5803
1af4ebf5
MG
5804 /* For pointer addition and subtraction, we should use the normal
5805 plus and minus for the vector operation. */
ebfd146a
IR
5806 if (code == POINTER_PLUS_EXPR)
5807 code = PLUS_EXPR;
1af4ebf5
MG
5808 if (code == POINTER_DIFF_EXPR)
5809 code = MINUS_EXPR;
ebfd146a
IR
5810
5811 /* Support only unary or binary operations. */
5812 op_type = TREE_CODE_LENGTH (code);
16949072 5813 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5814 {
73fbfcad 5815 if (dump_enabled_p ())
78c60e3d 5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5817 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5818 op_type);
ebfd146a
IR
5819 return false;
5820 }
5821
b690cc0f
RG
5822 scalar_dest = gimple_assign_lhs (stmt);
5823 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5824
7b7b1813
RG
5825 /* Most operations cannot handle bit-precision types without extra
5826 truncations. */
045c1278 5827 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5828 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5829 /* Exception are bitwise binary operations. */
5830 && code != BIT_IOR_EXPR
5831 && code != BIT_XOR_EXPR
5832 && code != BIT_AND_EXPR)
5833 {
73fbfcad 5834 if (dump_enabled_p ())
78c60e3d 5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5836 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5837 return false;
5838 }
5839
ebfd146a 5840 op0 = gimple_assign_rhs1 (stmt);
894dd753 5841 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5842 {
73fbfcad 5843 if (dump_enabled_p ())
78c60e3d 5844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5845 "use not simple.\n");
ebfd146a
IR
5846 return false;
5847 }
b690cc0f
RG
5848 /* If op0 is an external or constant def use a vector type with
5849 the same size as the output vector type. */
5850 if (!vectype)
b036c6c5
IE
5851 {
5852 /* For boolean type we cannot determine vectype by
5853 invariant value (don't know whether it is a vector
5854 of booleans or vector of integers). We use output
5855 vectype because operations on boolean don't change
5856 type. */
2568d8a1 5857 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5858 {
2568d8a1 5859 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5860 {
5861 if (dump_enabled_p ())
5862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5863 "not supported operation on bool value.\n");
5864 return false;
5865 }
5866 vectype = vectype_out;
5867 }
5868 else
5869 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5870 }
7d8930a0
IR
5871 if (vec_stmt)
5872 gcc_assert (vectype);
5873 if (!vectype)
5874 {
73fbfcad 5875 if (dump_enabled_p ())
7d8930a0 5876 {
78c60e3d
SS
5877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5878 "no vectype for scalar type ");
5879 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5880 TREE_TYPE (op0));
e645e942 5881 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5882 }
5883
5884 return false;
5885 }
b690cc0f
RG
5886
5887 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5888 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5889 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5890 return false;
ebfd146a 5891
16949072 5892 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5893 {
5894 op1 = gimple_assign_rhs2 (stmt);
894dd753 5895 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5896 {
73fbfcad 5897 if (dump_enabled_p ())
78c60e3d 5898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5899 "use not simple.\n");
ebfd146a
IR
5900 return false;
5901 }
5902 }
16949072
RG
5903 if (op_type == ternary_op)
5904 {
5905 op2 = gimple_assign_rhs3 (stmt);
894dd753 5906 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5907 {
73fbfcad 5908 if (dump_enabled_p ())
78c60e3d 5909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5910 "use not simple.\n");
16949072
RG
5911 return false;
5912 }
5913 }
ebfd146a 5914
b690cc0f 5915 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5916 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5917 case of SLP. */
fce57248 5918 if (slp_node)
b690cc0f
RG
5919 ncopies = 1;
5920 else
e8f142e2 5921 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5922
5923 gcc_assert (ncopies >= 1);
5924
9dc3f7de 5925 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5926 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5927 || code == RROTATE_EXPR)
9dc3f7de 5928 return false;
ebfd146a 5929
ebfd146a 5930 /* Supportable by target? */
00f07b86
RH
5931
5932 vec_mode = TYPE_MODE (vectype);
5933 if (code == MULT_HIGHPART_EXPR)
523ba738 5934 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5935 else
5936 {
5937 optab = optab_for_tree_code (code, vectype, optab_default);
5938 if (!optab)
5deb57cb 5939 {
73fbfcad 5940 if (dump_enabled_p ())
78c60e3d 5941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5942 "no optab.\n");
00f07b86 5943 return false;
5deb57cb 5944 }
523ba738
RS
5945 target_support_p = (optab_handler (optab, vec_mode)
5946 != CODE_FOR_nothing);
5deb57cb
JJ
5947 }
5948
523ba738 5949 if (!target_support_p)
ebfd146a 5950 {
73fbfcad 5951 if (dump_enabled_p ())
78c60e3d 5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5953 "op not supported by target.\n");
ebfd146a 5954 /* Check only during analysis. */
cf098191 5955 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5956 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5957 return false;
73fbfcad 5958 if (dump_enabled_p ())
e645e942
TJ
5959 dump_printf_loc (MSG_NOTE, vect_location,
5960 "proceeding using word mode.\n");
383d9c83
IR
5961 }
5962
4a00c761 5963 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5964 if (!VECTOR_MODE_P (vec_mode)
5965 && !vec_stmt
ca09abcb 5966 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5967 {
73fbfcad 5968 if (dump_enabled_p ())
78c60e3d 5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5970 "not worthwhile without SIMD support.\n");
e34842c6 5971 return false;
7d8930a0 5972 }
ebfd146a 5973
ebfd146a
IR
5974 if (!vec_stmt) /* transformation not required. */
5975 {
4a00c761 5976 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5977 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5978 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5979 return true;
5980 }
5981
67b8dbac 5982 /* Transform. */
ebfd146a 5983
73fbfcad 5984 if (dump_enabled_p ())
78c60e3d 5985 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5986 "transform binary/unary operation.\n");
383d9c83 5987
0eb952ea
JJ
5988 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5989 vectors with unsigned elements, but the result is signed. So, we
5990 need to compute the MINUS_EXPR into vectype temporary and
5991 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5992 tree vec_cvt_dest = NULL_TREE;
5993 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
5994 {
5995 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5996 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5997 }
5998 /* Handle def. */
5999 else
6000 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 6001
ebfd146a
IR
6002 /* In case the vectorization factor (VF) is bigger than the number
6003 of elements that we can fit in a vectype (nunits), we have to generate
6004 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
6005 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6006 from one copy of the vector stmt to the next, in the field
6007 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6008 stages to find the correct vector defs to be used when vectorizing
6009 stmts that use the defs of the current stmt. The example below
6010 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6011 we need to create 4 vectorized stmts):
6012
6013 before vectorization:
6014 RELATED_STMT VEC_STMT
6015 S1: x = memref - -
6016 S2: z = x + 1 - -
6017
6018 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6019 there):
6020 RELATED_STMT VEC_STMT
6021 VS1_0: vx0 = memref0 VS1_1 -
6022 VS1_1: vx1 = memref1 VS1_2 -
6023 VS1_2: vx2 = memref2 VS1_3 -
6024 VS1_3: vx3 = memref3 - -
6025 S1: x = load - VS1_0
6026 S2: z = x + 1 - -
6027
6028 step2: vectorize stmt S2 (done here):
6029 To vectorize stmt S2 we first need to find the relevant vector
6030 def for the first operand 'x'. This is, as usual, obtained from
6031 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6032 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6033 relevant vector def 'vx0'. Having found 'vx0' we can generate
6034 the vector stmt VS2_0, and as usual, record it in the
6035 STMT_VINFO_VEC_STMT of stmt S2.
6036 When creating the second copy (VS2_1), we obtain the relevant vector
6037 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6038 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6039 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6040 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6041 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6042 chain of stmts and pointers:
6043 RELATED_STMT VEC_STMT
6044 VS1_0: vx0 = memref0 VS1_1 -
6045 VS1_1: vx1 = memref1 VS1_2 -
6046 VS1_2: vx2 = memref2 VS1_3 -
6047 VS1_3: vx3 = memref3 - -
6048 S1: x = load - VS1_0
6049 VS2_0: vz0 = vx0 + v1 VS2_1 -
6050 VS2_1: vz1 = vx1 + v1 VS2_2 -
6051 VS2_2: vz2 = vx2 + v1 VS2_3 -
6052 VS2_3: vz3 = vx3 + v1 - -
6053 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6054
6055 prev_stmt_info = NULL;
6056 for (j = 0; j < ncopies; j++)
6057 {
6058 /* Handle uses. */
6059 if (j == 0)
4a00c761 6060 {
d6476f90 6061 if (op_type == binary_op)
4a00c761 6062 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6063 slp_node);
d6476f90
RB
6064 else if (op_type == ternary_op)
6065 {
6066 if (slp_node)
6067 {
6068 auto_vec<tree> ops(3);
6069 ops.quick_push (op0);
6070 ops.quick_push (op1);
6071 ops.quick_push (op2);
6072 auto_vec<vec<tree> > vec_defs(3);
6073 vect_get_slp_defs (ops, slp_node, &vec_defs);
6074 vec_oprnds0 = vec_defs[0];
6075 vec_oprnds1 = vec_defs[1];
6076 vec_oprnds2 = vec_defs[2];
6077 }
6078 else
6079 {
6080 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6081 NULL);
6082 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6083 NULL);
6084 }
6085 }
4a00c761
JJ
6086 else
6087 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6088 slp_node);
4a00c761 6089 }
ebfd146a 6090 else
4a00c761
JJ
6091 {
6092 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6093 if (op_type == ternary_op)
6094 {
9771b263
DN
6095 tree vec_oprnd = vec_oprnds2.pop ();
6096 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6097 vec_oprnd));
4a00c761
JJ
6098 }
6099 }
6100
6101 /* Arguments are ready. Create the new vector stmt. */
9771b263 6102 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6103 {
4a00c761 6104 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6105 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6106 vop2 = ((op_type == ternary_op)
9771b263 6107 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6108 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6109 new_temp = make_ssa_name (vec_dest, new_stmt);
6110 gimple_assign_set_lhs (new_stmt, new_temp);
6111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6112 if (vec_cvt_dest)
6113 {
6114 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6115 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6116 new_temp);
6117 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6118 gimple_assign_set_lhs (new_stmt, new_temp);
6119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6120 }
4a00c761 6121 if (slp_node)
9771b263 6122 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6123 }
6124
4a00c761
JJ
6125 if (slp_node)
6126 continue;
6127
6128 if (j == 0)
6129 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6130 else
6131 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6132 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6133 }
6134
9771b263
DN
6135 vec_oprnds0.release ();
6136 vec_oprnds1.release ();
6137 vec_oprnds2.release ();
ebfd146a 6138
ebfd146a
IR
6139 return true;
6140}
6141
f702e7d4 6142/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6143
6144static void
f702e7d4 6145ensure_base_align (struct data_reference *dr)
c716e67f 6146{
ca823c85 6147 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6148 return;
6149
52639a61 6150 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6151 {
52639a61 6152 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6153
f702e7d4
RS
6154 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6155
428f0c67 6156 if (decl_in_symtab_p (base_decl))
f702e7d4 6157 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6158 else
6159 {
f702e7d4 6160 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6161 DECL_USER_ALIGN (base_decl) = 1;
6162 }
52639a61 6163 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6164 }
6165}
6166
ebfd146a 6167
44fc7854
BE
6168/* Function get_group_alias_ptr_type.
6169
6170 Return the alias type for the group starting at FIRST_STMT. */
6171
6172static tree
6173get_group_alias_ptr_type (gimple *first_stmt)
6174{
6175 struct data_reference *first_dr, *next_dr;
6176 gimple *next_stmt;
6177
6178 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6179 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
44fc7854
BE
6180 while (next_stmt)
6181 {
6182 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6183 if (get_alias_set (DR_REF (first_dr))
6184 != get_alias_set (DR_REF (next_dr)))
6185 {
6186 if (dump_enabled_p ())
6187 dump_printf_loc (MSG_NOTE, vect_location,
6188 "conflicting alias set types.\n");
6189 return ptr_type_node;
6190 }
2c53b149 6191 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
44fc7854
BE
6192 }
6193 return reference_alias_ptr_type (DR_REF (first_dr));
6194}
6195
6196
ebfd146a
IR
6197/* Function vectorizable_store.
6198
b8698a0f
L
6199 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6200 can be vectorized.
6201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6204
6205static bool
355fe088 6206vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6207 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6208{
ebfd146a
IR
6209 tree data_ref;
6210 tree op;
6211 tree vec_oprnd = NULL_TREE;
6212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6213 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6214 tree elem_type;
ebfd146a 6215 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6216 struct loop *loop = NULL;
ef4bddc2 6217 machine_mode vec_mode;
ebfd146a
IR
6218 tree dummy;
6219 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6220 enum vect_def_type rhs_dt = vect_unknown_def_type;
6221 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6222 stmt_vec_info prev_stmt_info = NULL;
6223 tree dataref_ptr = NULL_TREE;
74bf76ed 6224 tree dataref_offset = NULL_TREE;
355fe088 6225 gimple *ptr_incr = NULL;
ebfd146a
IR
6226 int ncopies;
6227 int j;
2de001ee
RS
6228 gimple *next_stmt, *first_stmt;
6229 bool grouped_store;
ebfd146a 6230 unsigned int group_size, i;
6e1aa848
DN
6231 vec<tree> oprnds = vNULL;
6232 vec<tree> result_chain = vNULL;
ebfd146a 6233 bool inv_p;
09dfa495 6234 tree offset = NULL_TREE;
6e1aa848 6235 vec<tree> vec_oprnds = vNULL;
ebfd146a 6236 bool slp = (slp_node != NULL);
ebfd146a 6237 unsigned int vec_num;
a70d6342 6238 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6239 vec_info *vinfo = stmt_info->vinfo;
272c6793 6240 tree aggr_type;
134c85ca 6241 gather_scatter_info gs_info;
355fe088 6242 gimple *new_stmt;
d9f21f6a 6243 poly_uint64 vf;
2de001ee 6244 vec_load_store_type vls_type;
44fc7854 6245 tree ref_type;
a70d6342 6246
a70d6342 6247 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6248 return false;
6249
66c16fd9
RB
6250 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6251 && ! vec_stmt)
ebfd146a
IR
6252 return false;
6253
6254 /* Is vectorizable store? */
6255
c3a8f964
RS
6256 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6257 if (is_gimple_assign (stmt))
6258 {
6259 tree scalar_dest = gimple_assign_lhs (stmt);
6260 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6261 && is_pattern_stmt_p (stmt_info))
6262 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6263 if (TREE_CODE (scalar_dest) != ARRAY_REF
6264 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6265 && TREE_CODE (scalar_dest) != INDIRECT_REF
6266 && TREE_CODE (scalar_dest) != COMPONENT_REF
6267 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6268 && TREE_CODE (scalar_dest) != REALPART_EXPR
6269 && TREE_CODE (scalar_dest) != MEM_REF)
6270 return false;
6271 }
6272 else
6273 {
6274 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6275 if (!call || !gimple_call_internal_p (call))
6276 return false;
6277
6278 internal_fn ifn = gimple_call_internal_fn (call);
6279 if (!internal_store_fn_p (ifn))
c3a8f964 6280 return false;
ebfd146a 6281
c3a8f964
RS
6282 if (slp_node != NULL)
6283 {
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6286 "SLP of masked stores not supported.\n");
6287 return false;
6288 }
6289
f307441a
RS
6290 int mask_index = internal_fn_mask_index (ifn);
6291 if (mask_index >= 0)
6292 {
6293 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6294 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6295 &mask_vectype))
f307441a
RS
6296 return false;
6297 }
c3a8f964
RS
6298 }
6299
6300 op = vect_get_store_rhs (stmt);
ebfd146a 6301
fce57248
RS
6302 /* Cannot have hybrid store SLP -- that would mean storing to the
6303 same location twice. */
6304 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6305
f4d09712 6306 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6307 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6308
6309 if (loop_vinfo)
b17dc4d4
RB
6310 {
6311 loop = LOOP_VINFO_LOOP (loop_vinfo);
6312 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6313 }
6314 else
6315 vf = 1;
465c8c19
JJ
6316
6317 /* Multiple types in SLP are handled by creating the appropriate number of
6318 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6319 case of SLP. */
fce57248 6320 if (slp)
465c8c19
JJ
6321 ncopies = 1;
6322 else
e8f142e2 6323 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6324
6325 gcc_assert (ncopies >= 1);
6326
6327 /* FORNOW. This restriction should be relaxed. */
6328 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6329 {
6330 if (dump_enabled_p ())
6331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6332 "multiple types in nested loop.\n");
6333 return false;
6334 }
6335
929b4411 6336 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6337 return false;
6338
272c6793 6339 elem_type = TREE_TYPE (vectype);
ebfd146a 6340 vec_mode = TYPE_MODE (vectype);
7b7b1813 6341
ebfd146a
IR
6342 if (!STMT_VINFO_DATA_REF (stmt_info))
6343 return false;
6344
2de001ee 6345 vect_memory_access_type memory_access_type;
7e11fc7f 6346 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6347 &memory_access_type, &gs_info))
6348 return false;
3bab6342 6349
c3a8f964
RS
6350 if (mask)
6351 {
7e11fc7f
RS
6352 if (memory_access_type == VMAT_CONTIGUOUS)
6353 {
6354 if (!VECTOR_MODE_P (vec_mode)
6355 || !can_vec_mask_load_store_p (vec_mode,
6356 TYPE_MODE (mask_vectype), false))
6357 return false;
6358 }
f307441a
RS
6359 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6360 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6361 {
6362 if (dump_enabled_p ())
6363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6364 "unsupported access type for masked store.\n");
6365 return false;
6366 }
c3a8f964
RS
6367 }
6368 else
6369 {
6370 /* FORNOW. In some cases can vectorize even if data-type not supported
6371 (e.g. - array initialization with 0). */
6372 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6373 return false;
6374 }
6375
f307441a 6376 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6377 && memory_access_type != VMAT_GATHER_SCATTER
6378 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6379 if (grouped_store)
6380 {
2c53b149 6381 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7cfb4d93 6382 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6383 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7cfb4d93
RS
6384 }
6385 else
6386 {
6387 first_stmt = stmt;
6388 first_dr = dr;
6389 group_size = vec_num = 1;
6390 }
6391
ebfd146a
IR
6392 if (!vec_stmt) /* transformation not required. */
6393 {
2de001ee 6394 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6395
6396 if (loop_vinfo
6397 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6398 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6399 memory_access_type, &gs_info);
7cfb4d93 6400
ebfd146a 6401 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6402 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6403 vls_type, slp_node, cost_vec);
ebfd146a
IR
6404 return true;
6405 }
2de001ee 6406 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6407
67b8dbac 6408 /* Transform. */
ebfd146a 6409
f702e7d4 6410 ensure_base_align (dr);
c716e67f 6411
f307441a 6412 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6413 {
c3a8f964 6414 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6415 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6416 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6417 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6418 edge pe = loop_preheader_edge (loop);
6419 gimple_seq seq;
6420 basic_block new_bb;
6421 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6422 poly_uint64 scatter_off_nunits
6423 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6424
4d694b27 6425 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6426 modifier = NONE;
4d694b27 6427 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6428 {
3bab6342
AT
6429 modifier = WIDEN;
6430
4d694b27
RS
6431 /* Currently gathers and scatters are only supported for
6432 fixed-length vectors. */
6433 unsigned int count = scatter_off_nunits.to_constant ();
6434 vec_perm_builder sel (count, count, 1);
6435 for (i = 0; i < (unsigned int) count; ++i)
6436 sel.quick_push (i | (count / 2));
3bab6342 6437
4d694b27 6438 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6439 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6440 indices);
3bab6342
AT
6441 gcc_assert (perm_mask != NULL_TREE);
6442 }
4d694b27 6443 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6444 {
3bab6342
AT
6445 modifier = NARROW;
6446
4d694b27
RS
6447 /* Currently gathers and scatters are only supported for
6448 fixed-length vectors. */
6449 unsigned int count = nunits.to_constant ();
6450 vec_perm_builder sel (count, count, 1);
6451 for (i = 0; i < (unsigned int) count; ++i)
6452 sel.quick_push (i | (count / 2));
3bab6342 6453
4d694b27 6454 vec_perm_indices indices (sel, 2, count);
e3342de4 6455 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6456 gcc_assert (perm_mask != NULL_TREE);
6457 ncopies *= 2;
6458 }
6459 else
6460 gcc_unreachable ();
6461
134c85ca 6462 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6463 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6464 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6465 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6466 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6467 scaletype = TREE_VALUE (arglist);
6468
6469 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6470 && TREE_CODE (rettype) == VOID_TYPE);
6471
134c85ca 6472 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6473 if (!is_gimple_min_invariant (ptr))
6474 {
6475 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6476 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6477 gcc_assert (!new_bb);
6478 }
6479
6480 /* Currently we support only unconditional scatter stores,
6481 so mask should be all ones. */
6482 mask = build_int_cst (masktype, -1);
6483 mask = vect_init_vector (stmt, mask, masktype, NULL);
6484
134c85ca 6485 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6486
6487 prev_stmt_info = NULL;
6488 for (j = 0; j < ncopies; ++j)
6489 {
6490 if (j == 0)
6491 {
6492 src = vec_oprnd1
c3a8f964 6493 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6494 op = vec_oprnd0
134c85ca 6495 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6496 }
6497 else if (modifier != NONE && (j & 1))
6498 {
6499 if (modifier == WIDEN)
6500 {
6501 src = vec_oprnd1
929b4411 6502 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6503 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6504 stmt, gsi);
6505 }
6506 else if (modifier == NARROW)
6507 {
6508 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6509 stmt, gsi);
6510 op = vec_oprnd0
134c85ca
RS
6511 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6512 vec_oprnd0);
3bab6342
AT
6513 }
6514 else
6515 gcc_unreachable ();
6516 }
6517 else
6518 {
6519 src = vec_oprnd1
929b4411 6520 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6521 op = vec_oprnd0
134c85ca
RS
6522 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6523 vec_oprnd0);
3bab6342
AT
6524 }
6525
6526 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6527 {
928686b1
RS
6528 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6529 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6530 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6531 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6532 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6534 src = var;
6535 }
6536
6537 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6538 {
928686b1
RS
6539 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6540 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6541 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6542 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6543 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6545 op = var;
6546 }
6547
6548 new_stmt
134c85ca 6549 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6550
6551 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6552
6553 if (prev_stmt_info == NULL)
6554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6555 else
6556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6557 prev_stmt_info = vinfo_for_stmt (new_stmt);
6558 }
6559 return true;
6560 }
6561
f307441a 6562 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6563 {
2c53b149
RB
6564 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6565 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
f307441a 6566 }
ebfd146a 6567
f307441a
RS
6568 if (grouped_store)
6569 {
ebfd146a 6570 /* FORNOW */
a70d6342 6571 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6572
6573 /* We vectorize all the stmts of the interleaving group when we
6574 reach the last stmt in the group. */
2c53b149
RB
6575 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6576 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6577 && !slp)
6578 {
6579 *vec_stmt = NULL;
6580 return true;
6581 }
6582
6583 if (slp)
4b5caab7 6584 {
0d0293ac 6585 grouped_store = false;
4b5caab7
IR
6586 /* VEC_NUM is the number of vect stmts to be created for this
6587 group. */
6588 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6589 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2c53b149 6590 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6591 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6592 op = vect_get_store_rhs (first_stmt);
4b5caab7 6593 }
ebfd146a 6594 else
4b5caab7
IR
6595 /* VEC_NUM is the number of vect stmts to be created for this
6596 group. */
ebfd146a 6597 vec_num = group_size;
44fc7854
BE
6598
6599 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6600 }
b8698a0f 6601 else
7cfb4d93 6602 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6603
73fbfcad 6604 if (dump_enabled_p ())
78c60e3d 6605 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6606 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6607
2de001ee
RS
6608 if (memory_access_type == VMAT_ELEMENTWISE
6609 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6610 {
6611 gimple_stmt_iterator incr_gsi;
6612 bool insert_after;
355fe088 6613 gimple *incr;
f2e2a985
MM
6614 tree offvar;
6615 tree ivstep;
6616 tree running_off;
f2e2a985
MM
6617 tree stride_base, stride_step, alias_off;
6618 tree vec_oprnd;
f502d50e 6619 unsigned int g;
4d694b27
RS
6620 /* Checked by get_load_store_type. */
6621 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6622
7cfb4d93 6623 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6624 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6625
6626 stride_base
6627 = fold_build_pointer_plus
b210f45f 6628 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6629 size_binop (PLUS_EXPR,
b210f45f 6630 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6631 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6632 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6633
6634 /* For a store with loop-invariant (but other than power-of-2)
6635 stride (i.e. not a grouped access) like so:
6636
6637 for (i = 0; i < n; i += stride)
6638 array[i] = ...;
6639
6640 we generate a new induction variable and new stores from
6641 the components of the (vectorized) rhs:
6642
6643 for (j = 0; ; j += VF*stride)
6644 vectemp = ...;
6645 tmp1 = vectemp[0];
6646 array[j] = tmp1;
6647 tmp2 = vectemp[1];
6648 array[j + stride] = tmp2;
6649 ...
6650 */
6651
4d694b27 6652 unsigned nstores = const_nunits;
b17dc4d4 6653 unsigned lnel = 1;
cee62fee 6654 tree ltype = elem_type;
04199738 6655 tree lvectype = vectype;
cee62fee
MM
6656 if (slp)
6657 {
4d694b27
RS
6658 if (group_size < const_nunits
6659 && const_nunits % group_size == 0)
b17dc4d4 6660 {
4d694b27 6661 nstores = const_nunits / group_size;
b17dc4d4
RB
6662 lnel = group_size;
6663 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6664 lvectype = vectype;
6665
6666 /* First check if vec_extract optab doesn't support extraction
6667 of vector elts directly. */
b397965c 6668 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6669 machine_mode vmode;
6670 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6671 || !VECTOR_MODE_P (vmode)
414fef4e 6672 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6673 || (convert_optab_handler (vec_extract_optab,
6674 TYPE_MODE (vectype), vmode)
6675 == CODE_FOR_nothing))
6676 {
6677 /* Try to avoid emitting an extract of vector elements
6678 by performing the extracts using an integer type of the
6679 same size, extracting from a vector of those and then
6680 re-interpreting it as the original vector type if
6681 supported. */
6682 unsigned lsize
6683 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6684 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6685 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6686 /* If we can't construct such a vector fall back to
6687 element extracts from the original vector type and
6688 element size stores. */
4d694b27 6689 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6690 && VECTOR_MODE_P (vmode)
414fef4e 6691 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6692 && (convert_optab_handler (vec_extract_optab,
6693 vmode, elmode)
6694 != CODE_FOR_nothing))
6695 {
4d694b27 6696 nstores = lnunits;
04199738
RB
6697 lnel = group_size;
6698 ltype = build_nonstandard_integer_type (lsize, 1);
6699 lvectype = build_vector_type (ltype, nstores);
6700 }
6701 /* Else fall back to vector extraction anyway.
6702 Fewer stores are more important than avoiding spilling
6703 of the vector we extract from. Compared to the
6704 construction case in vectorizable_load no store-forwarding
6705 issue exists here for reasonable archs. */
6706 }
b17dc4d4 6707 }
4d694b27
RS
6708 else if (group_size >= const_nunits
6709 && group_size % const_nunits == 0)
b17dc4d4
RB
6710 {
6711 nstores = 1;
4d694b27 6712 lnel = const_nunits;
b17dc4d4 6713 ltype = vectype;
04199738 6714 lvectype = vectype;
b17dc4d4 6715 }
cee62fee
MM
6716 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6717 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6718 }
6719
f2e2a985
MM
6720 ivstep = stride_step;
6721 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6722 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6723
6724 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6725
b210f45f
RB
6726 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6727 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6728 create_iv (stride_base, ivstep, NULL,
6729 loop, &incr_gsi, insert_after,
6730 &offvar, NULL);
6731 incr = gsi_stmt (incr_gsi);
4fbeb363 6732 loop_vinfo->add_stmt (incr);
f2e2a985 6733
b210f45f 6734 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6735
6736 prev_stmt_info = NULL;
44fc7854 6737 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6738 next_stmt = first_stmt;
6739 for (g = 0; g < group_size; g++)
f2e2a985 6740 {
f502d50e
MM
6741 running_off = offvar;
6742 if (g)
f2e2a985 6743 {
f502d50e
MM
6744 tree size = TYPE_SIZE_UNIT (ltype);
6745 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6746 size);
f502d50e 6747 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6748 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6749 running_off, pos);
f2e2a985 6750 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6751 running_off = newoff;
f502d50e 6752 }
b17dc4d4
RB
6753 unsigned int group_el = 0;
6754 unsigned HOST_WIDE_INT
6755 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6756 for (j = 0; j < ncopies; j++)
6757 {
c3a8f964 6758 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6759 and first_stmt == stmt. */
6760 if (j == 0)
6761 {
6762 if (slp)
6763 {
6764 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6765 slp_node);
f502d50e
MM
6766 vec_oprnd = vec_oprnds[0];
6767 }
6768 else
6769 {
c3a8f964 6770 op = vect_get_store_rhs (next_stmt);
81c40241 6771 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6772 }
6773 }
f2e2a985 6774 else
f502d50e
MM
6775 {
6776 if (slp)
6777 vec_oprnd = vec_oprnds[j];
6778 else
c079cbac 6779 {
894dd753 6780 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6781 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6782 vec_oprnd);
c079cbac 6783 }
f502d50e 6784 }
04199738
RB
6785 /* Pun the vector to extract from if necessary. */
6786 if (lvectype != vectype)
6787 {
6788 tree tem = make_ssa_name (lvectype);
6789 gimple *pun
6790 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6791 lvectype, vec_oprnd));
6792 vect_finish_stmt_generation (stmt, pun, gsi);
6793 vec_oprnd = tem;
6794 }
f502d50e
MM
6795 for (i = 0; i < nstores; i++)
6796 {
6797 tree newref, newoff;
355fe088 6798 gimple *incr, *assign;
f502d50e
MM
6799 tree size = TYPE_SIZE (ltype);
6800 /* Extract the i'th component. */
6801 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6802 bitsize_int (i), size);
6803 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6804 size, pos);
6805
6806 elem = force_gimple_operand_gsi (gsi, elem, true,
6807 NULL_TREE, true,
6808 GSI_SAME_STMT);
6809
b17dc4d4
RB
6810 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6811 group_el * elsz);
f502d50e 6812 newref = build2 (MEM_REF, ltype,
b17dc4d4 6813 running_off, this_off);
19986382 6814 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6815
6816 /* And store it to *running_off. */
6817 assign = gimple_build_assign (newref, elem);
6818 vect_finish_stmt_generation (stmt, assign, gsi);
6819
b17dc4d4
RB
6820 group_el += lnel;
6821 if (! slp
6822 || group_el == group_size)
6823 {
6824 newoff = copy_ssa_name (running_off, NULL);
6825 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6826 running_off, stride_step);
6827 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6828
b17dc4d4
RB
6829 running_off = newoff;
6830 group_el = 0;
6831 }
225ce44b
RB
6832 if (g == group_size - 1
6833 && !slp)
f502d50e
MM
6834 {
6835 if (j == 0 && i == 0)
225ce44b
RB
6836 STMT_VINFO_VEC_STMT (stmt_info)
6837 = *vec_stmt = assign;
f502d50e
MM
6838 else
6839 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6840 prev_stmt_info = vinfo_for_stmt (assign);
6841 }
6842 }
f2e2a985 6843 }
2c53b149 6844 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6845 if (slp)
6846 break;
f2e2a985 6847 }
778dd3b6
RB
6848
6849 vec_oprnds.release ();
f2e2a985
MM
6850 return true;
6851 }
6852
8c681247 6853 auto_vec<tree> dr_chain (group_size);
9771b263 6854 oprnds.create (group_size);
ebfd146a 6855
720f5239 6856 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6857 gcc_assert (alignment_support_scheme);
70088b95
RS
6858 vec_loop_masks *loop_masks
6859 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6860 ? &LOOP_VINFO_MASKS (loop_vinfo)
6861 : NULL);
272c6793 6862 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6863 realignment. vect_supportable_dr_alignment always returns either
6864 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6865 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6866 && !mask
70088b95 6867 && !loop_masks)
272c6793
RS
6868 || alignment_support_scheme == dr_aligned
6869 || alignment_support_scheme == dr_unaligned_supported);
6870
62da9e14
RS
6871 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6872 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6873 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6874
f307441a
RS
6875 tree bump;
6876 tree vec_offset = NULL_TREE;
6877 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6878 {
6879 aggr_type = NULL_TREE;
6880 bump = NULL_TREE;
6881 }
6882 else if (memory_access_type == VMAT_GATHER_SCATTER)
6883 {
6884 aggr_type = elem_type;
6885 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6886 &bump, &vec_offset);
6887 }
272c6793 6888 else
f307441a
RS
6889 {
6890 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6891 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6892 else
6893 aggr_type = vectype;
6894 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6895 }
ebfd146a 6896
c3a8f964
RS
6897 if (mask)
6898 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6899
ebfd146a
IR
6900 /* In case the vectorization factor (VF) is bigger than the number
6901 of elements that we can fit in a vectype (nunits), we have to generate
6902 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6903 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6904 vect_get_vec_def_for_copy_stmt. */
6905
0d0293ac 6906 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6907
6908 S1: &base + 2 = x2
6909 S2: &base = x0
6910 S3: &base + 1 = x1
6911 S4: &base + 3 = x3
6912
6913 We create vectorized stores starting from base address (the access of the
6914 first stmt in the chain (S2 in the above example), when the last store stmt
6915 of the chain (S4) is reached:
6916
6917 VS1: &base = vx2
6918 VS2: &base + vec_size*1 = vx0
6919 VS3: &base + vec_size*2 = vx1
6920 VS4: &base + vec_size*3 = vx3
6921
6922 Then permutation statements are generated:
6923
3fcc1b55
JJ
6924 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6925 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6926 ...
b8698a0f 6927
ebfd146a
IR
6928 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6929 (the order of the data-refs in the output of vect_permute_store_chain
6930 corresponds to the order of scalar stmts in the interleaving chain - see
6931 the documentation of vect_permute_store_chain()).
6932
6933 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6934 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6935 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6936 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6937 */
6938
6939 prev_stmt_info = NULL;
c3a8f964 6940 tree vec_mask = NULL_TREE;
ebfd146a
IR
6941 for (j = 0; j < ncopies; j++)
6942 {
ebfd146a
IR
6943
6944 if (j == 0)
6945 {
6946 if (slp)
6947 {
6948 /* Get vectorized arguments for SLP_NODE. */
d092494c 6949 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6950 NULL, slp_node);
ebfd146a 6951
9771b263 6952 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6953 }
6954 else
6955 {
b8698a0f
L
6956 /* For interleaved stores we collect vectorized defs for all the
6957 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6958 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6959 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6960
2c53b149 6961 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6962 OPRNDS are of size 1. */
b8698a0f 6963 next_stmt = first_stmt;
ebfd146a
IR
6964 for (i = 0; i < group_size; i++)
6965 {
b8698a0f 6966 /* Since gaps are not supported for interleaved stores,
2c53b149 6967 DR_GROUP_SIZE is the exact number of stmts in the chain.
b8698a0f 6968 Therefore, NEXT_STMT can't be NULL_TREE. In case that
2c53b149 6969 there is no interleaving, DR_GROUP_SIZE is 1, and only one
ebfd146a 6970 iteration of the loop will be executed. */
c3a8f964 6971 op = vect_get_store_rhs (next_stmt);
81c40241 6972 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6973 dr_chain.quick_push (vec_oprnd);
6974 oprnds.quick_push (vec_oprnd);
2c53b149 6975 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6976 }
c3a8f964
RS
6977 if (mask)
6978 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6979 mask_vectype);
ebfd146a
IR
6980 }
6981
6982 /* We should have catched mismatched types earlier. */
6983 gcc_assert (useless_type_conversion_p (vectype,
6984 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6985 bool simd_lane_access_p
6986 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6987 if (simd_lane_access_p
6988 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6989 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6990 && integer_zerop (DR_OFFSET (first_dr))
6991 && integer_zerop (DR_INIT (first_dr))
6992 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6993 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6994 {
6995 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6996 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6997 inv_p = false;
74bf76ed 6998 }
f307441a
RS
6999 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7000 {
7001 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7002 &dataref_ptr, &vec_offset);
7003 inv_p = false;
7004 }
74bf76ed
JJ
7005 else
7006 dataref_ptr
7007 = vect_create_data_ref_ptr (first_stmt, aggr_type,
7008 simd_lane_access_p ? loop : NULL,
09dfa495 7009 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
7010 simd_lane_access_p, &inv_p,
7011 NULL_TREE, bump);
a70d6342 7012 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 7013 }
b8698a0f 7014 else
ebfd146a 7015 {
b8698a0f
L
7016 /* For interleaved stores we created vectorized defs for all the
7017 defs stored in OPRNDS in the previous iteration (previous copy).
7018 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
7019 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7020 next copy.
2c53b149 7021 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
7022 OPRNDS are of size 1. */
7023 for (i = 0; i < group_size; i++)
7024 {
9771b263 7025 op = oprnds[i];
894dd753 7026 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 7027 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
7028 dr_chain[i] = vec_oprnd;
7029 oprnds[i] = vec_oprnd;
ebfd146a 7030 }
c3a8f964 7031 if (mask)
929b4411 7032 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
7033 if (dataref_offset)
7034 dataref_offset
f307441a
RS
7035 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7036 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7037 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7038 vec_offset);
74bf76ed
JJ
7039 else
7040 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7041 bump);
ebfd146a
IR
7042 }
7043
2de001ee 7044 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7045 {
272c6793 7046 tree vec_array;
267d3070 7047
3ba4ff41 7048 /* Get an array into which we can store the individual vectors. */
272c6793 7049 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7050
7051 /* Invalidate the current contents of VEC_ARRAY. This should
7052 become an RTL clobber too, which prevents the vector registers
7053 from being upward-exposed. */
7054 vect_clobber_variable (stmt, gsi, vec_array);
7055
7056 /* Store the individual vectors into the array. */
272c6793 7057 for (i = 0; i < vec_num; i++)
c2d7ab2a 7058 {
9771b263 7059 vec_oprnd = dr_chain[i];
272c6793 7060 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7061 }
b8698a0f 7062
7cfb4d93 7063 tree final_mask = NULL;
70088b95
RS
7064 if (loop_masks)
7065 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7066 vectype, j);
7cfb4d93
RS
7067 if (vec_mask)
7068 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7069 vec_mask, gsi);
7070
7e11fc7f 7071 gcall *call;
7cfb4d93 7072 if (final_mask)
7e11fc7f
RS
7073 {
7074 /* Emit:
7075 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7076 VEC_ARRAY). */
7077 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7078 tree alias_ptr = build_int_cst (ref_type, align);
7079 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7080 dataref_ptr, alias_ptr,
7cfb4d93 7081 final_mask, vec_array);
7e11fc7f
RS
7082 }
7083 else
7084 {
7085 /* Emit:
7086 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7087 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7088 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7089 vec_array);
7090 gimple_call_set_lhs (call, data_ref);
7091 }
a844293d
RS
7092 gimple_call_set_nothrow (call, true);
7093 new_stmt = call;
267d3070 7094 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7095
7096 /* Record that VEC_ARRAY is now dead. */
7097 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7098 }
7099 else
7100 {
7101 new_stmt = NULL;
0d0293ac 7102 if (grouped_store)
272c6793 7103 {
b6b9227d
JJ
7104 if (j == 0)
7105 result_chain.create (group_size);
272c6793
RS
7106 /* Permute. */
7107 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7108 &result_chain);
7109 }
c2d7ab2a 7110
272c6793
RS
7111 next_stmt = first_stmt;
7112 for (i = 0; i < vec_num; i++)
7113 {
644ffefd 7114 unsigned align, misalign;
272c6793 7115
7cfb4d93 7116 tree final_mask = NULL_TREE;
70088b95
RS
7117 if (loop_masks)
7118 final_mask = vect_get_loop_mask (gsi, loop_masks,
7119 vec_num * ncopies,
7cfb4d93
RS
7120 vectype, vec_num * j + i);
7121 if (vec_mask)
7122 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7123 vec_mask, gsi);
7124
f307441a
RS
7125 if (memory_access_type == VMAT_GATHER_SCATTER)
7126 {
7127 tree scale = size_int (gs_info.scale);
7128 gcall *call;
70088b95 7129 if (loop_masks)
f307441a
RS
7130 call = gimple_build_call_internal
7131 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7132 scale, vec_oprnd, final_mask);
7133 else
7134 call = gimple_build_call_internal
7135 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7136 scale, vec_oprnd);
7137 gimple_call_set_nothrow (call, true);
7138 new_stmt = call;
7139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7140 break;
7141 }
7142
272c6793
RS
7143 if (i > 0)
7144 /* Bump the vector pointer. */
7145 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7146 stmt, bump);
272c6793
RS
7147
7148 if (slp)
9771b263 7149 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7150 else if (grouped_store)
7151 /* For grouped stores vectorized defs are interleaved in
272c6793 7152 vect_permute_store_chain(). */
9771b263 7153 vec_oprnd = result_chain[i];
272c6793 7154
f702e7d4 7155 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7156 if (aligned_access_p (first_dr))
644ffefd 7157 misalign = 0;
272c6793
RS
7158 else if (DR_MISALIGNMENT (first_dr) == -1)
7159 {
25f68d90 7160 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7161 misalign = 0;
272c6793
RS
7162 }
7163 else
c3a8f964 7164 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7165 if (dataref_offset == NULL_TREE
7166 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7167 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7168 misalign);
c2d7ab2a 7169
62da9e14 7170 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7171 {
7172 tree perm_mask = perm_mask_for_reverse (vectype);
7173 tree perm_dest
c3a8f964 7174 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7175 vectype);
b731b390 7176 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7177
7178 /* Generate the permute statement. */
355fe088 7179 gimple *perm_stmt
0d0e4a03
JJ
7180 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7181 vec_oprnd, perm_mask);
09dfa495
BM
7182 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7183
7184 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7185 vec_oprnd = new_temp;
7186 }
7187
272c6793 7188 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7189 if (final_mask)
c3a8f964
RS
7190 {
7191 align = least_bit_hwi (misalign | align);
7192 tree ptr = build_int_cst (ref_type, align);
7193 gcall *call
7194 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7195 dataref_ptr, ptr,
7cfb4d93 7196 final_mask, vec_oprnd);
c3a8f964
RS
7197 gimple_call_set_nothrow (call, true);
7198 new_stmt = call;
7199 }
7200 else
7201 {
7202 data_ref = fold_build2 (MEM_REF, vectype,
7203 dataref_ptr,
7204 dataref_offset
7205 ? dataref_offset
7206 : build_int_cst (ref_type, 0));
7207 if (aligned_access_p (first_dr))
7208 ;
7209 else if (DR_MISALIGNMENT (first_dr) == -1)
7210 TREE_TYPE (data_ref)
7211 = build_aligned_type (TREE_TYPE (data_ref),
7212 align * BITS_PER_UNIT);
7213 else
7214 TREE_TYPE (data_ref)
7215 = build_aligned_type (TREE_TYPE (data_ref),
7216 TYPE_ALIGN (elem_type));
19986382 7217 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7218 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7219 }
272c6793 7220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7221
7222 if (slp)
7223 continue;
7224
2c53b149 7225 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7226 if (!next_stmt)
7227 break;
7228 }
ebfd146a 7229 }
1da0876c
RS
7230 if (!slp)
7231 {
7232 if (j == 0)
7233 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7234 else
7235 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7236 prev_stmt_info = vinfo_for_stmt (new_stmt);
7237 }
ebfd146a
IR
7238 }
7239
9771b263
DN
7240 oprnds.release ();
7241 result_chain.release ();
7242 vec_oprnds.release ();
ebfd146a
IR
7243
7244 return true;
7245}
7246
557be5a8
AL
7247/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7248 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7249 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7250 vect_gen_perm_mask_checked. */
a1e53f3f 7251
3fcc1b55 7252tree
4aae3cb3 7253vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7254{
b00cb3bf 7255 tree mask_type;
a1e53f3f 7256
0ecc2b7d
RS
7257 poly_uint64 nunits = sel.length ();
7258 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7259
7260 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7261 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7262}
7263
7ac7e286 7264/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7265 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7266
7267tree
4aae3cb3 7268vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7269{
7ac7e286 7270 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7271 return vect_gen_perm_mask_any (vectype, sel);
7272}
7273
aec7ae7d
JJ
7274/* Given a vector variable X and Y, that was generated for the scalar
7275 STMT, generate instructions to permute the vector elements of X and Y
7276 using permutation mask MASK_VEC, insert them at *GSI and return the
7277 permuted vector variable. */
a1e53f3f
L
7278
7279static tree
355fe088 7280permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7281 gimple_stmt_iterator *gsi)
a1e53f3f
L
7282{
7283 tree vectype = TREE_TYPE (x);
aec7ae7d 7284 tree perm_dest, data_ref;
355fe088 7285 gimple *perm_stmt;
a1e53f3f 7286
7ad429a4
RS
7287 tree scalar_dest = gimple_get_lhs (stmt);
7288 if (TREE_CODE (scalar_dest) == SSA_NAME)
7289 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7290 else
7291 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7292 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7293
7294 /* Generate the permute statement. */
0d0e4a03 7295 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7296 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7297
7298 return data_ref;
7299}
7300
6b916b36
RB
7301/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7302 inserting them on the loops preheader edge. Returns true if we
7303 were successful in doing so (and thus STMT can be moved then),
7304 otherwise returns false. */
7305
7306static bool
355fe088 7307hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7308{
7309 ssa_op_iter i;
7310 tree op;
7311 bool any = false;
7312
7313 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7314 {
355fe088 7315 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7316 if (!gimple_nop_p (def_stmt)
7317 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7318 {
7319 /* Make sure we don't need to recurse. While we could do
7320 so in simple cases when there are more complex use webs
7321 we don't have an easy way to preserve stmt order to fulfil
7322 dependencies within them. */
7323 tree op2;
7324 ssa_op_iter i2;
d1417442
JJ
7325 if (gimple_code (def_stmt) == GIMPLE_PHI)
7326 return false;
6b916b36
RB
7327 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7328 {
355fe088 7329 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7330 if (!gimple_nop_p (def_stmt2)
7331 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7332 return false;
7333 }
7334 any = true;
7335 }
7336 }
7337
7338 if (!any)
7339 return true;
7340
7341 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7342 {
355fe088 7343 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7344 if (!gimple_nop_p (def_stmt)
7345 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7346 {
7347 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7348 gsi_remove (&gsi, false);
7349 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7350 }
7351 }
7352
7353 return true;
7354}
7355
ebfd146a
IR
7356/* vectorizable_load.
7357
b8698a0f
L
7358 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7359 can be vectorized.
7360 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7361 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7362 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7363
7364static bool
355fe088 7365vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7366 slp_tree slp_node, slp_instance slp_node_instance,
7367 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7368{
7369 tree scalar_dest;
7370 tree vec_dest = NULL;
7371 tree data_ref = NULL;
7372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7373 stmt_vec_info prev_stmt_info;
ebfd146a 7374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7375 struct loop *loop = NULL;
ebfd146a 7376 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7377 bool nested_in_vect_loop = false;
c716e67f 7378 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7379 tree elem_type;
ebfd146a 7380 tree new_temp;
ef4bddc2 7381 machine_mode mode;
355fe088 7382 gimple *new_stmt = NULL;
ebfd146a
IR
7383 tree dummy;
7384 enum dr_alignment_support alignment_support_scheme;
7385 tree dataref_ptr = NULL_TREE;
74bf76ed 7386 tree dataref_offset = NULL_TREE;
355fe088 7387 gimple *ptr_incr = NULL;
ebfd146a 7388 int ncopies;
4d694b27
RS
7389 int i, j;
7390 unsigned int group_size;
7391 poly_uint64 group_gap_adj;
ebfd146a
IR
7392 tree msq = NULL_TREE, lsq;
7393 tree offset = NULL_TREE;
356bbc4c 7394 tree byte_offset = NULL_TREE;
ebfd146a 7395 tree realignment_token = NULL_TREE;
538dd0b7 7396 gphi *phi = NULL;
6e1aa848 7397 vec<tree> dr_chain = vNULL;
0d0293ac 7398 bool grouped_load = false;
355fe088 7399 gimple *first_stmt;
4f0a0218 7400 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7401 bool inv_p;
7402 bool compute_in_loop = false;
7403 struct loop *at_loop;
7404 int vec_num;
7405 bool slp = (slp_node != NULL);
7406 bool slp_perm = false;
a70d6342 7407 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7408 poly_uint64 vf;
272c6793 7409 tree aggr_type;
134c85ca 7410 gather_scatter_info gs_info;
310213d4 7411 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7412 tree ref_type;
929b4411 7413 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7414
465c8c19
JJ
7415 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7416 return false;
7417
66c16fd9
RB
7418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7419 && ! vec_stmt)
465c8c19
JJ
7420 return false;
7421
c3a8f964
RS
7422 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7423 if (is_gimple_assign (stmt))
7424 {
7425 scalar_dest = gimple_assign_lhs (stmt);
7426 if (TREE_CODE (scalar_dest) != SSA_NAME)
7427 return false;
465c8c19 7428
c3a8f964
RS
7429 tree_code code = gimple_assign_rhs_code (stmt);
7430 if (code != ARRAY_REF
7431 && code != BIT_FIELD_REF
7432 && code != INDIRECT_REF
7433 && code != COMPONENT_REF
7434 && code != IMAGPART_EXPR
7435 && code != REALPART_EXPR
7436 && code != MEM_REF
7437 && TREE_CODE_CLASS (code) != tcc_declaration)
7438 return false;
7439 }
7440 else
7441 {
7442 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7443 if (!call || !gimple_call_internal_p (call))
7444 return false;
7445
7446 internal_fn ifn = gimple_call_internal_fn (call);
7447 if (!internal_load_fn_p (ifn))
c3a8f964 7448 return false;
465c8c19 7449
c3a8f964
RS
7450 scalar_dest = gimple_call_lhs (call);
7451 if (!scalar_dest)
7452 return false;
7453
7454 if (slp_node != NULL)
7455 {
7456 if (dump_enabled_p ())
7457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7458 "SLP of masked loads not supported.\n");
7459 return false;
7460 }
7461
bfaa08b7
RS
7462 int mask_index = internal_fn_mask_index (ifn);
7463 if (mask_index >= 0)
7464 {
7465 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7466 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7467 &mask_vectype))
bfaa08b7
RS
7468 return false;
7469 }
c3a8f964 7470 }
465c8c19
JJ
7471
7472 if (!STMT_VINFO_DATA_REF (stmt_info))
7473 return false;
7474
7475 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7476 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7477
a70d6342
IR
7478 if (loop_vinfo)
7479 {
7480 loop = LOOP_VINFO_LOOP (loop_vinfo);
7481 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7482 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7483 }
7484 else
3533e503 7485 vf = 1;
ebfd146a
IR
7486
7487 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7488 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7489 case of SLP. */
fce57248 7490 if (slp)
ebfd146a
IR
7491 ncopies = 1;
7492 else
e8f142e2 7493 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7494
7495 gcc_assert (ncopies >= 1);
7496
7497 /* FORNOW. This restriction should be relaxed. */
7498 if (nested_in_vect_loop && ncopies > 1)
7499 {
73fbfcad 7500 if (dump_enabled_p ())
78c60e3d 7501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7502 "multiple types in nested loop.\n");
ebfd146a
IR
7503 return false;
7504 }
7505
f2556b68
RB
7506 /* Invalidate assumptions made by dependence analysis when vectorization
7507 on the unrolled body effectively re-orders stmts. */
7508 if (ncopies > 1
7509 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7510 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7511 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7512 {
7513 if (dump_enabled_p ())
7514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7515 "cannot perform implicit CSE when unrolling "
7516 "with negative dependence distance\n");
7517 return false;
7518 }
7519
7b7b1813 7520 elem_type = TREE_TYPE (vectype);
947131ba 7521 mode = TYPE_MODE (vectype);
ebfd146a
IR
7522
7523 /* FORNOW. In some cases can vectorize even if data-type not supported
7524 (e.g. - data copies). */
947131ba 7525 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7526 {
73fbfcad 7527 if (dump_enabled_p ())
78c60e3d 7528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7529 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7530 return false;
7531 }
7532
ebfd146a 7533 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7534 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7535 {
0d0293ac 7536 grouped_load = true;
ebfd146a 7537 /* FORNOW */
2de001ee
RS
7538 gcc_assert (!nested_in_vect_loop);
7539 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7540
2c53b149
RB
7541 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7542 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7543
b1af7da6
RB
7544 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7545 slp_perm = true;
7546
f2556b68
RB
7547 /* Invalidate assumptions made by dependence analysis when vectorization
7548 on the unrolled body effectively re-orders stmts. */
7549 if (!PURE_SLP_STMT (stmt_info)
7550 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7551 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7552 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7553 {
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556 "cannot perform implicit CSE when performing "
7557 "group loads with negative dependence distance\n");
7558 return false;
7559 }
96bb56b2
RB
7560
7561 /* Similarly when the stmt is a load that is both part of a SLP
7562 instance and a loop vectorized stmt via the same-dr mechanism
7563 we have to give up. */
2c53b149 7564 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2
RB
7565 && (STMT_SLP_TYPE (stmt_info)
7566 != STMT_SLP_TYPE (vinfo_for_stmt
2c53b149 7567 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
96bb56b2
RB
7568 {
7569 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7571 "conflicting SLP types for CSEd load\n");
7572 return false;
7573 }
ebfd146a 7574 }
7cfb4d93
RS
7575 else
7576 group_size = 1;
ebfd146a 7577
2de001ee 7578 vect_memory_access_type memory_access_type;
7e11fc7f 7579 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7580 &memory_access_type, &gs_info))
7581 return false;
a1e53f3f 7582
c3a8f964
RS
7583 if (mask)
7584 {
7585 if (memory_access_type == VMAT_CONTIGUOUS)
7586 {
7e11fc7f
RS
7587 machine_mode vec_mode = TYPE_MODE (vectype);
7588 if (!VECTOR_MODE_P (vec_mode)
7589 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7590 TYPE_MODE (mask_vectype), true))
7591 return false;
7592 }
bfaa08b7 7593 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7594 {
7595 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7596 tree masktype
7597 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7598 if (TREE_CODE (masktype) == INTEGER_TYPE)
7599 {
7600 if (dump_enabled_p ())
7601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7602 "masked gather with integer mask not"
7603 " supported.");
7604 return false;
7605 }
7606 }
bfaa08b7
RS
7607 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7608 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7609 {
7610 if (dump_enabled_p ())
7611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7612 "unsupported access type for masked load.\n");
7613 return false;
7614 }
7615 }
7616
ebfd146a
IR
7617 if (!vec_stmt) /* transformation not required. */
7618 {
2de001ee
RS
7619 if (!slp)
7620 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7621
7622 if (loop_vinfo
7623 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7624 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7625 memory_access_type, &gs_info);
7cfb4d93 7626
ebfd146a 7627 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7628 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7629 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7630 return true;
7631 }
7632
2de001ee
RS
7633 if (!slp)
7634 gcc_assert (memory_access_type
7635 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7636
73fbfcad 7637 if (dump_enabled_p ())
78c60e3d 7638 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7639 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7640
67b8dbac 7641 /* Transform. */
ebfd146a 7642
f702e7d4 7643 ensure_base_align (dr);
c716e67f 7644
bfaa08b7 7645 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7646 {
929b4411
RS
7647 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7648 mask_dt);
aec7ae7d
JJ
7649 return true;
7650 }
2de001ee
RS
7651
7652 if (memory_access_type == VMAT_ELEMENTWISE
7653 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7654 {
7655 gimple_stmt_iterator incr_gsi;
7656 bool insert_after;
355fe088 7657 gimple *incr;
7d75abc8 7658 tree offvar;
7d75abc8
MM
7659 tree ivstep;
7660 tree running_off;
9771b263 7661 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7662 tree stride_base, stride_step, alias_off;
4d694b27
RS
7663 /* Checked by get_load_store_type. */
7664 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7665 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7666
7cfb4d93 7667 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7668 gcc_assert (!nested_in_vect_loop);
7d75abc8 7669
b210f45f 7670 if (grouped_load)
44fc7854 7671 {
2c53b149 7672 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7673 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7674 }
ab313a8c 7675 else
44fc7854
BE
7676 {
7677 first_stmt = stmt;
7678 first_dr = dr;
b210f45f
RB
7679 }
7680 if (slp && grouped_load)
7681 {
2c53b149 7682 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
b210f45f
RB
7683 ref_type = get_group_alias_ptr_type (first_stmt);
7684 }
7685 else
7686 {
7687 if (grouped_load)
7688 cst_offset
7689 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7690 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7691 group_size = 1;
b210f45f 7692 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7693 }
ab313a8c 7694
14ac6aa2
RB
7695 stride_base
7696 = fold_build_pointer_plus
ab313a8c 7697 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7698 size_binop (PLUS_EXPR,
ab313a8c
RB
7699 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7700 convert_to_ptrofftype (DR_INIT (first_dr))));
7701 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7702
7703 /* For a load with loop-invariant (but other than power-of-2)
7704 stride (i.e. not a grouped access) like so:
7705
7706 for (i = 0; i < n; i += stride)
7707 ... = array[i];
7708
7709 we generate a new induction variable and new accesses to
7710 form a new vector (or vectors, depending on ncopies):
7711
7712 for (j = 0; ; j += VF*stride)
7713 tmp1 = array[j];
7714 tmp2 = array[j + stride];
7715 ...
7716 vectemp = {tmp1, tmp2, ...}
7717 */
7718
ab313a8c
RB
7719 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7720 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7721
7722 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7723
b210f45f
RB
7724 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7725 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7726 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7727 loop, &incr_gsi, insert_after,
7728 &offvar, NULL);
7729 incr = gsi_stmt (incr_gsi);
4fbeb363 7730 loop_vinfo->add_stmt (incr);
7d75abc8 7731
b210f45f 7732 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7733
7734 prev_stmt_info = NULL;
7735 running_off = offvar;
44fc7854 7736 alias_off = build_int_cst (ref_type, 0);
4d694b27 7737 int nloads = const_nunits;
e09b4c37 7738 int lnel = 1;
7b5fc413 7739 tree ltype = TREE_TYPE (vectype);
ea60dd34 7740 tree lvectype = vectype;
b266b968 7741 auto_vec<tree> dr_chain;
2de001ee 7742 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7743 {
4d694b27 7744 if (group_size < const_nunits)
e09b4c37 7745 {
ff03930a
JJ
7746 /* First check if vec_init optab supports construction from
7747 vector elts directly. */
b397965c 7748 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7749 machine_mode vmode;
7750 if (mode_for_vector (elmode, group_size).exists (&vmode)
7751 && VECTOR_MODE_P (vmode)
414fef4e 7752 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7753 && (convert_optab_handler (vec_init_optab,
7754 TYPE_MODE (vectype), vmode)
7755 != CODE_FOR_nothing))
ea60dd34 7756 {
4d694b27 7757 nloads = const_nunits / group_size;
ea60dd34 7758 lnel = group_size;
ff03930a
JJ
7759 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7760 }
7761 else
7762 {
7763 /* Otherwise avoid emitting a constructor of vector elements
7764 by performing the loads using an integer type of the same
7765 size, constructing a vector of those and then
7766 re-interpreting it as the original vector type.
7767 This avoids a huge runtime penalty due to the general
7768 inability to perform store forwarding from smaller stores
7769 to a larger load. */
7770 unsigned lsize
7771 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7772 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7773 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7774 /* If we can't construct such a vector fall back to
7775 element loads of the original vector type. */
4d694b27 7776 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7777 && VECTOR_MODE_P (vmode)
414fef4e 7778 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7779 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7780 != CODE_FOR_nothing))
7781 {
4d694b27 7782 nloads = lnunits;
ff03930a
JJ
7783 lnel = group_size;
7784 ltype = build_nonstandard_integer_type (lsize, 1);
7785 lvectype = build_vector_type (ltype, nloads);
7786 }
ea60dd34 7787 }
e09b4c37 7788 }
2de001ee 7789 else
e09b4c37 7790 {
ea60dd34 7791 nloads = 1;
4d694b27 7792 lnel = const_nunits;
e09b4c37 7793 ltype = vectype;
e09b4c37 7794 }
2de001ee
RS
7795 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7796 }
bb4e4747
BC
7797 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7798 else if (nloads == 1)
7799 ltype = vectype;
7800
2de001ee
RS
7801 if (slp)
7802 {
66c16fd9
RB
7803 /* For SLP permutation support we need to load the whole group,
7804 not only the number of vector stmts the permutation result
7805 fits in. */
b266b968 7806 if (slp_perm)
66c16fd9 7807 {
d9f21f6a
RS
7808 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7809 variable VF. */
7810 unsigned int const_vf = vf.to_constant ();
4d694b27 7811 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7812 dr_chain.create (ncopies);
7813 }
7814 else
7815 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7816 }
4d694b27 7817 unsigned int group_el = 0;
e09b4c37
RB
7818 unsigned HOST_WIDE_INT
7819 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7820 for (j = 0; j < ncopies; j++)
7821 {
7b5fc413 7822 if (nloads > 1)
e09b4c37
RB
7823 vec_alloc (v, nloads);
7824 for (i = 0; i < nloads; i++)
7b5fc413 7825 {
e09b4c37 7826 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7827 group_el * elsz + cst_offset);
19986382
RB
7828 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7829 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7830 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7832 if (nloads > 1)
7833 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7834 gimple_assign_lhs (new_stmt));
7835
7836 group_el += lnel;
7837 if (! slp
7838 || group_el == group_size)
7b5fc413 7839 {
e09b4c37
RB
7840 tree newoff = copy_ssa_name (running_off);
7841 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7842 running_off, stride_step);
7b5fc413
RB
7843 vect_finish_stmt_generation (stmt, incr, gsi);
7844
7845 running_off = newoff;
e09b4c37 7846 group_el = 0;
7b5fc413 7847 }
7b5fc413 7848 }
e09b4c37 7849 if (nloads > 1)
7d75abc8 7850 {
ea60dd34
RB
7851 tree vec_inv = build_constructor (lvectype, v);
7852 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7853 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7854 if (lvectype != vectype)
7855 {
7856 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7857 VIEW_CONVERT_EXPR,
7858 build1 (VIEW_CONVERT_EXPR,
7859 vectype, new_temp));
7860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7861 }
7d75abc8
MM
7862 }
7863
7b5fc413 7864 if (slp)
b266b968 7865 {
b266b968
RB
7866 if (slp_perm)
7867 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7868 else
7869 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7870 }
7d75abc8 7871 else
225ce44b
RB
7872 {
7873 if (j == 0)
7874 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7875 else
7876 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7877 prev_stmt_info = vinfo_for_stmt (new_stmt);
7878 }
7d75abc8 7879 }
b266b968 7880 if (slp_perm)
29afecdf
RB
7881 {
7882 unsigned n_perms;
7883 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7884 slp_node_instance, false, &n_perms);
7885 }
7d75abc8
MM
7886 return true;
7887 }
aec7ae7d 7888
b5ec4de7
RS
7889 if (memory_access_type == VMAT_GATHER_SCATTER
7890 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7891 grouped_load = false;
7892
0d0293ac 7893 if (grouped_load)
ebfd146a 7894 {
2c53b149
RB
7895 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7896 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7897 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7898 without permutation. */
7899 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7900 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7901 /* For BB vectorization always use the first stmt to base
7902 the data ref pointer on. */
7903 if (bb_vinfo)
7904 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7905
ebfd146a 7906 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7907 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7908 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7909 ??? But we can only do so if there is exactly one
7910 as we have no way to get at the rest. Leave the CSE
7911 opportunity alone.
7912 ??? With the group load eventually participating
7913 in multiple different permutations (having multiple
7914 slp nodes which refer to the same group) the CSE
7915 is even wrong code. See PR56270. */
7916 && !slp)
ebfd146a
IR
7917 {
7918 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7919 return true;
7920 }
7921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7922 group_gap_adj = 0;
ebfd146a
IR
7923
7924 /* VEC_NUM is the number of vect stmts to be created for this group. */
7925 if (slp)
7926 {
0d0293ac 7927 grouped_load = false;
91ff1504
RB
7928 /* For SLP permutation support we need to load the whole group,
7929 not only the number of vector stmts the permutation result
7930 fits in. */
7931 if (slp_perm)
b267968e 7932 {
d9f21f6a
RS
7933 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7934 variable VF. */
7935 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7936 unsigned int const_nunits = nunits.to_constant ();
7937 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7938 group_gap_adj = vf * group_size - nunits * vec_num;
7939 }
91ff1504 7940 else
b267968e
RB
7941 {
7942 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7943 group_gap_adj
7944 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7945 }
a70d6342 7946 }
ebfd146a 7947 else
9b999e8c 7948 vec_num = group_size;
44fc7854
BE
7949
7950 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7951 }
7952 else
7953 {
7954 first_stmt = stmt;
7955 first_dr = dr;
7956 group_size = vec_num = 1;
9b999e8c 7957 group_gap_adj = 0;
44fc7854 7958 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7959 }
7960
720f5239 7961 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7962 gcc_assert (alignment_support_scheme);
70088b95
RS
7963 vec_loop_masks *loop_masks
7964 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7965 ? &LOOP_VINFO_MASKS (loop_vinfo)
7966 : NULL);
7cfb4d93
RS
7967 /* Targets with store-lane instructions must not require explicit
7968 realignment. vect_supportable_dr_alignment always returns either
7969 dr_aligned or dr_unaligned_supported for masked operations. */
7970 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7971 && !mask
70088b95 7972 && !loop_masks)
272c6793
RS
7973 || alignment_support_scheme == dr_aligned
7974 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7975
7976 /* In case the vectorization factor (VF) is bigger than the number
7977 of elements that we can fit in a vectype (nunits), we have to generate
7978 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7979 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7980 from one copy of the vector stmt to the next, in the field
ff802fa1 7981 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7982 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7983 stmts that use the defs of the current stmt. The example below
7984 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7985 need to create 4 vectorized stmts):
ebfd146a
IR
7986
7987 before vectorization:
7988 RELATED_STMT VEC_STMT
7989 S1: x = memref - -
7990 S2: z = x + 1 - -
7991
7992 step 1: vectorize stmt S1:
7993 We first create the vector stmt VS1_0, and, as usual, record a
7994 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7995 Next, we create the vector stmt VS1_1, and record a pointer to
7996 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7997 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7998 stmts and pointers:
7999 RELATED_STMT VEC_STMT
8000 VS1_0: vx0 = memref0 VS1_1 -
8001 VS1_1: vx1 = memref1 VS1_2 -
8002 VS1_2: vx2 = memref2 VS1_3 -
8003 VS1_3: vx3 = memref3 - -
8004 S1: x = load - VS1_0
8005 S2: z = x + 1 - -
8006
b8698a0f
L
8007 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8008 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8009 stmt S2. */
8010
0d0293ac 8011 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8012
8013 S1: x2 = &base + 2
8014 S2: x0 = &base
8015 S3: x1 = &base + 1
8016 S4: x3 = &base + 3
8017
b8698a0f 8018 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8019 starting from the access of the first stmt of the chain:
8020
8021 VS1: vx0 = &base
8022 VS2: vx1 = &base + vec_size*1
8023 VS3: vx3 = &base + vec_size*2
8024 VS4: vx4 = &base + vec_size*3
8025
8026 Then permutation statements are generated:
8027
e2c83630
RH
8028 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8029 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8030 ...
8031
8032 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8033 (the order of the data-refs in the output of vect_permute_load_chain
8034 corresponds to the order of scalar stmts in the interleaving chain - see
8035 the documentation of vect_permute_load_chain()).
8036 The generation of permutation stmts and recording them in
0d0293ac 8037 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8038
b8698a0f 8039 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8040 permutation stmts above are created for every copy. The result vector
8041 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8042 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8043
8044 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8045 on a target that supports unaligned accesses (dr_unaligned_supported)
8046 we generate the following code:
8047 p = initial_addr;
8048 indx = 0;
8049 loop {
8050 p = p + indx * vectype_size;
8051 vec_dest = *(p);
8052 indx = indx + 1;
8053 }
8054
8055 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8056 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8057 then generate the following code, in which the data in each iteration is
8058 obtained by two vector loads, one from the previous iteration, and one
8059 from the current iteration:
8060 p1 = initial_addr;
8061 msq_init = *(floor(p1))
8062 p2 = initial_addr + VS - 1;
8063 realignment_token = call target_builtin;
8064 indx = 0;
8065 loop {
8066 p2 = p2 + indx * vectype_size
8067 lsq = *(floor(p2))
8068 vec_dest = realign_load (msq, lsq, realignment_token)
8069 indx = indx + 1;
8070 msq = lsq;
8071 } */
8072
8073 /* If the misalignment remains the same throughout the execution of the
8074 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8075 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8076 This can only occur when vectorizing memory accesses in the inner-loop
8077 nested within an outer-loop that is being vectorized. */
8078
d1e4b493 8079 if (nested_in_vect_loop
cf098191
RS
8080 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8081 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8082 {
8083 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8084 compute_in_loop = true;
8085 }
8086
8087 if ((alignment_support_scheme == dr_explicit_realign_optimized
8088 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8089 && !compute_in_loop)
ebfd146a
IR
8090 {
8091 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8092 alignment_support_scheme, NULL_TREE,
8093 &at_loop);
8094 if (alignment_support_scheme == dr_explicit_realign_optimized)
8095 {
538dd0b7 8096 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8097 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8098 size_one_node);
ebfd146a
IR
8099 }
8100 }
8101 else
8102 at_loop = loop;
8103
62da9e14 8104 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8105 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8106
ab2fc782
RS
8107 tree bump;
8108 tree vec_offset = NULL_TREE;
8109 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8110 {
8111 aggr_type = NULL_TREE;
8112 bump = NULL_TREE;
8113 }
8114 else if (memory_access_type == VMAT_GATHER_SCATTER)
8115 {
8116 aggr_type = elem_type;
8117 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8118 &bump, &vec_offset);
8119 }
272c6793 8120 else
ab2fc782
RS
8121 {
8122 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8123 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8124 else
8125 aggr_type = vectype;
8126 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8127 }
272c6793 8128
c3a8f964 8129 tree vec_mask = NULL_TREE;
ebfd146a 8130 prev_stmt_info = NULL;
4d694b27 8131 poly_uint64 group_elt = 0;
ebfd146a 8132 for (j = 0; j < ncopies; j++)
b8698a0f 8133 {
272c6793 8134 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8135 if (j == 0)
74bf76ed
JJ
8136 {
8137 bool simd_lane_access_p
8138 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8139 if (simd_lane_access_p
8140 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8141 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8142 && integer_zerop (DR_OFFSET (first_dr))
8143 && integer_zerop (DR_INIT (first_dr))
8144 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8145 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8146 && (alignment_support_scheme == dr_aligned
8147 || alignment_support_scheme == dr_unaligned_supported))
8148 {
8149 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8150 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8151 inv_p = false;
74bf76ed 8152 }
4f0a0218
RB
8153 else if (first_stmt_for_drptr
8154 && first_stmt != first_stmt_for_drptr)
8155 {
8156 dataref_ptr
8157 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8158 at_loop, offset, &dummy, gsi,
8159 &ptr_incr, simd_lane_access_p,
ab2fc782 8160 &inv_p, byte_offset, bump);
4f0a0218
RB
8161 /* Adjust the pointer by the difference to first_stmt. */
8162 data_reference_p ptrdr
8163 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8164 tree diff = fold_convert (sizetype,
8165 size_binop (MINUS_EXPR,
8166 DR_INIT (first_dr),
8167 DR_INIT (ptrdr)));
8168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8169 stmt, diff);
8170 }
bfaa08b7
RS
8171 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8172 {
8173 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8174 &dataref_ptr, &vec_offset);
8175 inv_p = false;
8176 }
74bf76ed
JJ
8177 else
8178 dataref_ptr
8179 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8180 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8181 simd_lane_access_p, &inv_p,
ab2fc782 8182 byte_offset, bump);
c3a8f964
RS
8183 if (mask)
8184 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8185 mask_vectype);
74bf76ed 8186 }
ebfd146a 8187 else
c3a8f964
RS
8188 {
8189 if (dataref_offset)
8190 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8191 bump);
bfaa08b7 8192 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8193 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8194 vec_offset);
c3a8f964 8195 else
ab2fc782
RS
8196 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8197 stmt, bump);
c3a8f964 8198 if (mask)
929b4411 8199 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8200 }
ebfd146a 8201
0d0293ac 8202 if (grouped_load || slp_perm)
9771b263 8203 dr_chain.create (vec_num);
5ce1ee7f 8204
2de001ee 8205 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8206 {
272c6793
RS
8207 tree vec_array;
8208
8209 vec_array = create_vector_array (vectype, vec_num);
8210
7cfb4d93 8211 tree final_mask = NULL_TREE;
70088b95
RS
8212 if (loop_masks)
8213 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8214 vectype, j);
7cfb4d93
RS
8215 if (vec_mask)
8216 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8217 vec_mask, gsi);
8218
7e11fc7f 8219 gcall *call;
7cfb4d93 8220 if (final_mask)
7e11fc7f
RS
8221 {
8222 /* Emit:
8223 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8224 VEC_MASK). */
8225 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8226 tree alias_ptr = build_int_cst (ref_type, align);
8227 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8228 dataref_ptr, alias_ptr,
7cfb4d93 8229 final_mask);
7e11fc7f
RS
8230 }
8231 else
8232 {
8233 /* Emit:
8234 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8235 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8236 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8237 }
a844293d
RS
8238 gimple_call_set_lhs (call, vec_array);
8239 gimple_call_set_nothrow (call, true);
8240 new_stmt = call;
272c6793 8241 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8242
272c6793
RS
8243 /* Extract each vector into an SSA_NAME. */
8244 for (i = 0; i < vec_num; i++)
ebfd146a 8245 {
272c6793
RS
8246 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8247 vec_array, i);
9771b263 8248 dr_chain.quick_push (new_temp);
272c6793
RS
8249 }
8250
8251 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8252 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8253
8254 /* Record that VEC_ARRAY is now dead. */
8255 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8256 }
8257 else
8258 {
8259 for (i = 0; i < vec_num; i++)
8260 {
7cfb4d93 8261 tree final_mask = NULL_TREE;
70088b95 8262 if (loop_masks
7cfb4d93 8263 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8264 final_mask = vect_get_loop_mask (gsi, loop_masks,
8265 vec_num * ncopies,
7cfb4d93
RS
8266 vectype, vec_num * j + i);
8267 if (vec_mask)
8268 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8269 vec_mask, gsi);
8270
272c6793
RS
8271 if (i > 0)
8272 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8273 stmt, bump);
272c6793
RS
8274
8275 /* 2. Create the vector-load in the loop. */
8276 switch (alignment_support_scheme)
8277 {
8278 case dr_aligned:
8279 case dr_unaligned_supported:
be1ac4ec 8280 {
644ffefd
MJ
8281 unsigned int align, misalign;
8282
bfaa08b7
RS
8283 if (memory_access_type == VMAT_GATHER_SCATTER)
8284 {
8285 tree scale = size_int (gs_info.scale);
8286 gcall *call;
70088b95 8287 if (loop_masks)
bfaa08b7
RS
8288 call = gimple_build_call_internal
8289 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8290 vec_offset, scale, final_mask);
8291 else
8292 call = gimple_build_call_internal
8293 (IFN_GATHER_LOAD, 3, dataref_ptr,
8294 vec_offset, scale);
8295 gimple_call_set_nothrow (call, true);
8296 new_stmt = call;
8297 data_ref = NULL_TREE;
8298 break;
8299 }
8300
f702e7d4 8301 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8302 if (alignment_support_scheme == dr_aligned)
8303 {
8304 gcc_assert (aligned_access_p (first_dr));
644ffefd 8305 misalign = 0;
272c6793
RS
8306 }
8307 else if (DR_MISALIGNMENT (first_dr) == -1)
8308 {
25f68d90 8309 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8310 misalign = 0;
272c6793
RS
8311 }
8312 else
c3a8f964 8313 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8314 if (dataref_offset == NULL_TREE
8315 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8316 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8317 align, misalign);
c3a8f964 8318
7cfb4d93 8319 if (final_mask)
c3a8f964
RS
8320 {
8321 align = least_bit_hwi (misalign | align);
8322 tree ptr = build_int_cst (ref_type, align);
8323 gcall *call
8324 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8325 dataref_ptr, ptr,
7cfb4d93 8326 final_mask);
c3a8f964
RS
8327 gimple_call_set_nothrow (call, true);
8328 new_stmt = call;
8329 data_ref = NULL_TREE;
8330 }
8331 else
8332 {
8333 data_ref
8334 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8335 dataref_offset
8336 ? dataref_offset
8337 : build_int_cst (ref_type, 0));
8338 if (alignment_support_scheme == dr_aligned)
8339 ;
8340 else if (DR_MISALIGNMENT (first_dr) == -1)
8341 TREE_TYPE (data_ref)
8342 = build_aligned_type (TREE_TYPE (data_ref),
8343 align * BITS_PER_UNIT);
8344 else
8345 TREE_TYPE (data_ref)
8346 = build_aligned_type (TREE_TYPE (data_ref),
8347 TYPE_ALIGN (elem_type));
8348 }
272c6793 8349 break;
be1ac4ec 8350 }
272c6793 8351 case dr_explicit_realign:
267d3070 8352 {
272c6793 8353 tree ptr, bump;
272c6793 8354
d88981fc 8355 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8356
8357 if (compute_in_loop)
8358 msq = vect_setup_realignment (first_stmt, gsi,
8359 &realignment_token,
8360 dr_explicit_realign,
8361 dataref_ptr, NULL);
8362
aed93b23
RB
8363 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8364 ptr = copy_ssa_name (dataref_ptr);
8365 else
8366 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8367 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8368 new_stmt = gimple_build_assign
8369 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8370 build_int_cst
8371 (TREE_TYPE (dataref_ptr),
f702e7d4 8372 -(HOST_WIDE_INT) align));
272c6793
RS
8373 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8374 data_ref
8375 = build2 (MEM_REF, vectype, ptr,
44fc7854 8376 build_int_cst (ref_type, 0));
19986382 8377 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8378 vec_dest = vect_create_destination_var (scalar_dest,
8379 vectype);
8380 new_stmt = gimple_build_assign (vec_dest, data_ref);
8381 new_temp = make_ssa_name (vec_dest, new_stmt);
8382 gimple_assign_set_lhs (new_stmt, new_temp);
8383 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8384 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8385 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8386 msq = new_temp;
8387
d88981fc 8388 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8389 TYPE_SIZE_UNIT (elem_type));
d88981fc 8390 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8391 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8392 new_stmt = gimple_build_assign
8393 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8394 build_int_cst
f702e7d4 8395 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8396 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8397 gimple_assign_set_lhs (new_stmt, ptr);
8398 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8399 data_ref
8400 = build2 (MEM_REF, vectype, ptr,
44fc7854 8401 build_int_cst (ref_type, 0));
272c6793 8402 break;
267d3070 8403 }
272c6793 8404 case dr_explicit_realign_optimized:
f702e7d4
RS
8405 {
8406 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8407 new_temp = copy_ssa_name (dataref_ptr);
8408 else
8409 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8410 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8411 new_stmt = gimple_build_assign
8412 (new_temp, BIT_AND_EXPR, dataref_ptr,
8413 build_int_cst (TREE_TYPE (dataref_ptr),
8414 -(HOST_WIDE_INT) align));
8415 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8416 data_ref
8417 = build2 (MEM_REF, vectype, new_temp,
8418 build_int_cst (ref_type, 0));
8419 break;
8420 }
272c6793
RS
8421 default:
8422 gcc_unreachable ();
8423 }
ebfd146a 8424 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8425 /* DATA_REF is null if we've already built the statement. */
8426 if (data_ref)
19986382
RB
8427 {
8428 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8429 new_stmt = gimple_build_assign (vec_dest, data_ref);
8430 }
ebfd146a 8431 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8432 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8433 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8434
272c6793
RS
8435 /* 3. Handle explicit realignment if necessary/supported.
8436 Create in loop:
8437 vec_dest = realign_load (msq, lsq, realignment_token) */
8438 if (alignment_support_scheme == dr_explicit_realign_optimized
8439 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8440 {
272c6793
RS
8441 lsq = gimple_assign_lhs (new_stmt);
8442 if (!realignment_token)
8443 realignment_token = dataref_ptr;
8444 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8445 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8446 msq, lsq, realignment_token);
272c6793
RS
8447 new_temp = make_ssa_name (vec_dest, new_stmt);
8448 gimple_assign_set_lhs (new_stmt, new_temp);
8449 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8450
8451 if (alignment_support_scheme == dr_explicit_realign_optimized)
8452 {
8453 gcc_assert (phi);
8454 if (i == vec_num - 1 && j == ncopies - 1)
8455 add_phi_arg (phi, lsq,
8456 loop_latch_edge (containing_loop),
9e227d60 8457 UNKNOWN_LOCATION);
272c6793
RS
8458 msq = lsq;
8459 }
ebfd146a 8460 }
ebfd146a 8461
59fd17e3
RB
8462 /* 4. Handle invariant-load. */
8463 if (inv_p && !bb_vinfo)
8464 {
59fd17e3 8465 gcc_assert (!grouped_load);
d1417442
JJ
8466 /* If we have versioned for aliasing or the loop doesn't
8467 have any data dependencies that would preclude this,
8468 then we are sure this is a loop invariant load and
8469 thus we can insert it on the preheader edge. */
8470 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8471 && !nested_in_vect_loop
6b916b36 8472 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8473 {
8474 if (dump_enabled_p ())
8475 {
8476 dump_printf_loc (MSG_NOTE, vect_location,
8477 "hoisting out of the vectorized "
8478 "loop: ");
8479 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8480 }
b731b390 8481 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8482 gsi_insert_on_edge_immediate
8483 (loop_preheader_edge (loop),
8484 gimple_build_assign (tem,
8485 unshare_expr
8486 (gimple_assign_rhs1 (stmt))));
8487 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5 8488 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4fbeb363 8489 vinfo->add_stmt (new_stmt);
a0e35eb0
RB
8490 }
8491 else
8492 {
8493 gimple_stmt_iterator gsi2 = *gsi;
8494 gsi_next (&gsi2);
8495 new_temp = vect_init_vector (stmt, scalar_dest,
8496 vectype, &gsi2);
34cd48e5 8497 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8498 }
59fd17e3
RB
8499 }
8500
62da9e14 8501 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8502 {
aec7ae7d
JJ
8503 tree perm_mask = perm_mask_for_reverse (vectype);
8504 new_temp = permute_vec_elements (new_temp, new_temp,
8505 perm_mask, stmt, gsi);
ebfd146a
IR
8506 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8507 }
267d3070 8508
272c6793 8509 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8510 vect_transform_grouped_load (). */
8511 if (grouped_load || slp_perm)
9771b263 8512 dr_chain.quick_push (new_temp);
267d3070 8513
272c6793
RS
8514 /* Store vector loads in the corresponding SLP_NODE. */
8515 if (slp && !slp_perm)
9771b263 8516 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8517
8518 /* With SLP permutation we load the gaps as well, without
8519 we need to skip the gaps after we manage to fully load
2c53b149 8520 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8521 group_elt += nunits;
d9f21f6a
RS
8522 if (maybe_ne (group_gap_adj, 0U)
8523 && !slp_perm
8524 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8525 {
d9f21f6a
RS
8526 poly_wide_int bump_val
8527 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8528 * group_gap_adj);
8e6cdc90 8529 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8530 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8531 stmt, bump);
8532 group_elt = 0;
8533 }
272c6793 8534 }
9b999e8c
RB
8535 /* Bump the vector pointer to account for a gap or for excess
8536 elements loaded for a permuted SLP load. */
d9f21f6a 8537 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8538 {
d9f21f6a
RS
8539 poly_wide_int bump_val
8540 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8541 * group_gap_adj);
8e6cdc90 8542 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8543 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8544 stmt, bump);
8545 }
ebfd146a
IR
8546 }
8547
8548 if (slp && !slp_perm)
8549 continue;
8550
8551 if (slp_perm)
8552 {
29afecdf 8553 unsigned n_perms;
01d8bf07 8554 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8555 slp_node_instance, false,
8556 &n_perms))
ebfd146a 8557 {
9771b263 8558 dr_chain.release ();
ebfd146a
IR
8559 return false;
8560 }
8561 }
8562 else
8563 {
0d0293ac 8564 if (grouped_load)
ebfd146a 8565 {
2de001ee 8566 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8567 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8568 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8569 }
8570 else
8571 {
8572 if (j == 0)
8573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8574 else
8575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8576 prev_stmt_info = vinfo_for_stmt (new_stmt);
8577 }
8578 }
9771b263 8579 dr_chain.release ();
ebfd146a
IR
8580 }
8581
ebfd146a
IR
8582 return true;
8583}
8584
8585/* Function vect_is_simple_cond.
b8698a0f 8586
ebfd146a
IR
8587 Input:
8588 LOOP - the loop that is being vectorized.
8589 COND - Condition that is checked for simple use.
8590
e9e1d143
RG
8591 Output:
8592 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8593 *DTS - The def types for the arguments of the comparison
e9e1d143 8594
ebfd146a
IR
8595 Returns whether a COND can be vectorized. Checks whether
8596 condition operands are supportable using vec_is_simple_use. */
8597
87aab9b2 8598static bool
4fc5ebf1 8599vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8600 tree *comp_vectype, enum vect_def_type *dts,
8601 tree vectype)
ebfd146a
IR
8602{
8603 tree lhs, rhs;
e9e1d143 8604 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8605
a414c77f
IE
8606 /* Mask case. */
8607 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8608 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8609 {
894dd753 8610 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8611 || !*comp_vectype
8612 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8613 return false;
8614 return true;
8615 }
8616
ebfd146a
IR
8617 if (!COMPARISON_CLASS_P (cond))
8618 return false;
8619
8620 lhs = TREE_OPERAND (cond, 0);
8621 rhs = TREE_OPERAND (cond, 1);
8622
8623 if (TREE_CODE (lhs) == SSA_NAME)
8624 {
894dd753 8625 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8626 return false;
8627 }
4fc5ebf1
JG
8628 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8629 || TREE_CODE (lhs) == FIXED_CST)
8630 dts[0] = vect_constant_def;
8631 else
ebfd146a
IR
8632 return false;
8633
8634 if (TREE_CODE (rhs) == SSA_NAME)
8635 {
894dd753 8636 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8637 return false;
8638 }
4fc5ebf1
JG
8639 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8640 || TREE_CODE (rhs) == FIXED_CST)
8641 dts[1] = vect_constant_def;
8642 else
ebfd146a
IR
8643 return false;
8644
28b33016 8645 if (vectype1 && vectype2
928686b1
RS
8646 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8647 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8648 return false;
8649
e9e1d143 8650 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8651 /* Invariant comparison. */
4515e413 8652 if (! *comp_vectype && vectype)
8da4c8d8
RB
8653 {
8654 tree scalar_type = TREE_TYPE (lhs);
8655 /* If we can widen the comparison to match vectype do so. */
8656 if (INTEGRAL_TYPE_P (scalar_type)
8657 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8658 TYPE_SIZE (TREE_TYPE (vectype))))
8659 scalar_type = build_nonstandard_integer_type
8660 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8661 TYPE_UNSIGNED (scalar_type));
8662 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8663 }
8664
ebfd146a
IR
8665 return true;
8666}
8667
8668/* vectorizable_condition.
8669
b8698a0f
L
8670 Check if STMT is conditional modify expression that can be vectorized.
8671 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8672 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8673 at GSI.
8674
8675 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8676 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8677 else clause if it is 2).
ebfd146a
IR
8678
8679 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8680
4bbe8262 8681bool
355fe088
TS
8682vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8683 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8684 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8685{
8686 tree scalar_dest = NULL_TREE;
8687 tree vec_dest = NULL_TREE;
01216d27
JJ
8688 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8689 tree then_clause, else_clause;
ebfd146a 8690 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8691 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8692 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8693 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8694 tree vec_compare;
ebfd146a
IR
8695 tree new_temp;
8696 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8697 enum vect_def_type dts[4]
8698 = {vect_unknown_def_type, vect_unknown_def_type,
8699 vect_unknown_def_type, vect_unknown_def_type};
8700 int ndts = 4;
f7e531cf 8701 int ncopies;
01216d27 8702 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8703 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8704 int i, j;
8705 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8706 vec<tree> vec_oprnds0 = vNULL;
8707 vec<tree> vec_oprnds1 = vNULL;
8708 vec<tree> vec_oprnds2 = vNULL;
8709 vec<tree> vec_oprnds3 = vNULL;
74946978 8710 tree vec_cmp_type;
a414c77f 8711 bool masked = false;
b8698a0f 8712
f7e531cf
IR
8713 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8714 return false;
8715
bb6c2b68
RS
8716 vect_reduction_type reduction_type
8717 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8718 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8719 {
8720 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8721 return false;
ebfd146a 8722
af29617a
AH
8723 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8724 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8725 && reduc_def))
8726 return false;
ebfd146a 8727
af29617a
AH
8728 /* FORNOW: not yet supported. */
8729 if (STMT_VINFO_LIVE_P (stmt_info))
8730 {
8731 if (dump_enabled_p ())
8732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8733 "value used after loop.\n");
8734 return false;
8735 }
ebfd146a
IR
8736 }
8737
8738 /* Is vectorizable conditional operation? */
8739 if (!is_gimple_assign (stmt))
8740 return false;
8741
8742 code = gimple_assign_rhs_code (stmt);
8743
8744 if (code != COND_EXPR)
8745 return false;
8746
465c8c19 8747 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8748 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8749
fce57248 8750 if (slp_node)
465c8c19
JJ
8751 ncopies = 1;
8752 else
e8f142e2 8753 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8754
8755 gcc_assert (ncopies >= 1);
8756 if (reduc_index && ncopies > 1)
8757 return false; /* FORNOW */
8758
4e71066d
RG
8759 cond_expr = gimple_assign_rhs1 (stmt);
8760 then_clause = gimple_assign_rhs2 (stmt);
8761 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8762
4fc5ebf1 8763 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8764 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8765 || !comp_vectype)
ebfd146a
IR
8766 return false;
8767
894dd753 8768 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8769 return false;
894dd753 8770 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8771 return false;
2947d3b2
IE
8772
8773 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8774 return false;
8775
8776 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8777 return false;
8778
28b33016
IE
8779 masked = !COMPARISON_CLASS_P (cond_expr);
8780 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8781
74946978
MP
8782 if (vec_cmp_type == NULL_TREE)
8783 return false;
784fb9b3 8784
01216d27
JJ
8785 cond_code = TREE_CODE (cond_expr);
8786 if (!masked)
8787 {
8788 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8789 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8790 }
8791
8792 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8793 {
8794 /* Boolean values may have another representation in vectors
8795 and therefore we prefer bit operations over comparison for
8796 them (which also works for scalar masks). We store opcodes
8797 to use in bitop1 and bitop2. Statement is vectorized as
8798 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8799 depending on bitop1 and bitop2 arity. */
8800 switch (cond_code)
8801 {
8802 case GT_EXPR:
8803 bitop1 = BIT_NOT_EXPR;
8804 bitop2 = BIT_AND_EXPR;
8805 break;
8806 case GE_EXPR:
8807 bitop1 = BIT_NOT_EXPR;
8808 bitop2 = BIT_IOR_EXPR;
8809 break;
8810 case LT_EXPR:
8811 bitop1 = BIT_NOT_EXPR;
8812 bitop2 = BIT_AND_EXPR;
8813 std::swap (cond_expr0, cond_expr1);
8814 break;
8815 case LE_EXPR:
8816 bitop1 = BIT_NOT_EXPR;
8817 bitop2 = BIT_IOR_EXPR;
8818 std::swap (cond_expr0, cond_expr1);
8819 break;
8820 case NE_EXPR:
8821 bitop1 = BIT_XOR_EXPR;
8822 break;
8823 case EQ_EXPR:
8824 bitop1 = BIT_XOR_EXPR;
8825 bitop2 = BIT_NOT_EXPR;
8826 break;
8827 default:
8828 return false;
8829 }
8830 cond_code = SSA_NAME;
8831 }
8832
b8698a0f 8833 if (!vec_stmt)
ebfd146a 8834 {
01216d27
JJ
8835 if (bitop1 != NOP_EXPR)
8836 {
8837 machine_mode mode = TYPE_MODE (comp_vectype);
8838 optab optab;
8839
8840 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8841 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8842 return false;
8843
8844 if (bitop2 != NOP_EXPR)
8845 {
8846 optab = optab_for_tree_code (bitop2, comp_vectype,
8847 optab_default);
8848 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8849 return false;
8850 }
8851 }
4fc5ebf1
JG
8852 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8853 cond_code))
8854 {
68435eb2
RB
8855 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8856 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8857 cost_vec);
4fc5ebf1
JG
8858 return true;
8859 }
8860 return false;
ebfd146a
IR
8861 }
8862
f7e531cf
IR
8863 /* Transform. */
8864
8865 if (!slp_node)
8866 {
9771b263
DN
8867 vec_oprnds0.create (1);
8868 vec_oprnds1.create (1);
8869 vec_oprnds2.create (1);
8870 vec_oprnds3.create (1);
f7e531cf 8871 }
ebfd146a
IR
8872
8873 /* Handle def. */
8874 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8875 if (reduction_type != EXTRACT_LAST_REDUCTION)
8876 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8877
8878 /* Handle cond expr. */
a855b1b1
MM
8879 for (j = 0; j < ncopies; j++)
8880 {
bb6c2b68 8881 gimple *new_stmt = NULL;
a855b1b1
MM
8882 if (j == 0)
8883 {
f7e531cf
IR
8884 if (slp_node)
8885 {
00f96dc9
TS
8886 auto_vec<tree, 4> ops;
8887 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8888
a414c77f 8889 if (masked)
01216d27 8890 ops.safe_push (cond_expr);
a414c77f
IE
8891 else
8892 {
01216d27
JJ
8893 ops.safe_push (cond_expr0);
8894 ops.safe_push (cond_expr1);
a414c77f 8895 }
9771b263
DN
8896 ops.safe_push (then_clause);
8897 ops.safe_push (else_clause);
306b0c92 8898 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8899 vec_oprnds3 = vec_defs.pop ();
8900 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8901 if (!masked)
8902 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8903 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8904 }
8905 else
8906 {
a414c77f
IE
8907 if (masked)
8908 {
8909 vec_cond_lhs
8910 = vect_get_vec_def_for_operand (cond_expr, stmt,
8911 comp_vectype);
894dd753 8912 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8913 }
8914 else
8915 {
01216d27
JJ
8916 vec_cond_lhs
8917 = vect_get_vec_def_for_operand (cond_expr0,
8918 stmt, comp_vectype);
894dd753 8919 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8920
8921 vec_cond_rhs
8922 = vect_get_vec_def_for_operand (cond_expr1,
8923 stmt, comp_vectype);
894dd753 8924 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8925 }
f7e531cf
IR
8926 if (reduc_index == 1)
8927 vec_then_clause = reduc_def;
8928 else
8929 {
8930 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8931 stmt);
894dd753 8932 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8933 }
8934 if (reduc_index == 2)
8935 vec_else_clause = reduc_def;
8936 else
8937 {
8938 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8939 stmt);
894dd753 8940 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8941 }
a855b1b1
MM
8942 }
8943 }
8944 else
8945 {
a414c77f
IE
8946 vec_cond_lhs
8947 = vect_get_vec_def_for_stmt_copy (dts[0],
8948 vec_oprnds0.pop ());
8949 if (!masked)
8950 vec_cond_rhs
8951 = vect_get_vec_def_for_stmt_copy (dts[1],
8952 vec_oprnds1.pop ());
8953
a855b1b1 8954 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8955 vec_oprnds2.pop ());
a855b1b1 8956 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8957 vec_oprnds3.pop ());
f7e531cf
IR
8958 }
8959
8960 if (!slp_node)
8961 {
9771b263 8962 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8963 if (!masked)
8964 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8965 vec_oprnds2.quick_push (vec_then_clause);
8966 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8967 }
8968
9dc3f7de 8969 /* Arguments are ready. Create the new vector stmt. */
9771b263 8970 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8971 {
9771b263
DN
8972 vec_then_clause = vec_oprnds2[i];
8973 vec_else_clause = vec_oprnds3[i];
a855b1b1 8974
a414c77f
IE
8975 if (masked)
8976 vec_compare = vec_cond_lhs;
8977 else
8978 {
8979 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8980 if (bitop1 == NOP_EXPR)
8981 vec_compare = build2 (cond_code, vec_cmp_type,
8982 vec_cond_lhs, vec_cond_rhs);
8983 else
8984 {
8985 new_temp = make_ssa_name (vec_cmp_type);
8986 if (bitop1 == BIT_NOT_EXPR)
8987 new_stmt = gimple_build_assign (new_temp, bitop1,
8988 vec_cond_rhs);
8989 else
8990 new_stmt
8991 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8992 vec_cond_rhs);
8993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8994 if (bitop2 == NOP_EXPR)
8995 vec_compare = new_temp;
8996 else if (bitop2 == BIT_NOT_EXPR)
8997 {
8998 /* Instead of doing ~x ? y : z do x ? z : y. */
8999 vec_compare = new_temp;
9000 std::swap (vec_then_clause, vec_else_clause);
9001 }
9002 else
9003 {
9004 vec_compare = make_ssa_name (vec_cmp_type);
9005 new_stmt
9006 = gimple_build_assign (vec_compare, bitop2,
9007 vec_cond_lhs, new_temp);
9008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9009 }
9010 }
a414c77f 9011 }
bb6c2b68
RS
9012 if (reduction_type == EXTRACT_LAST_REDUCTION)
9013 {
9014 if (!is_gimple_val (vec_compare))
9015 {
9016 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9017 new_stmt = gimple_build_assign (vec_compare_name,
9018 vec_compare);
9019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9020 vec_compare = vec_compare_name;
9021 }
9022 gcc_assert (reduc_index == 2);
9023 new_stmt = gimple_build_call_internal
9024 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9025 vec_then_clause);
9026 gimple_call_set_lhs (new_stmt, scalar_dest);
9027 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9028 if (stmt == gsi_stmt (*gsi))
9029 vect_finish_replace_stmt (stmt, new_stmt);
9030 else
9031 {
9032 /* In this case we're moving the definition to later in the
9033 block. That doesn't matter because the only uses of the
9034 lhs are in phi statements. */
9035 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9036 gsi_remove (&old_gsi, true);
9037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9038 }
9039 }
9040 else
9041 {
9042 new_temp = make_ssa_name (vec_dest);
9043 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9044 vec_compare, vec_then_clause,
9045 vec_else_clause);
9046 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9047 }
f7e531cf 9048 if (slp_node)
9771b263 9049 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9050 }
9051
9052 if (slp_node)
9053 continue;
9054
9055 if (j == 0)
9056 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9057 else
9058 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9059
9060 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9061 }
b8698a0f 9062
9771b263
DN
9063 vec_oprnds0.release ();
9064 vec_oprnds1.release ();
9065 vec_oprnds2.release ();
9066 vec_oprnds3.release ();
f7e531cf 9067
ebfd146a
IR
9068 return true;
9069}
9070
42fd8198
IE
9071/* vectorizable_comparison.
9072
9073 Check if STMT is comparison expression that can be vectorized.
9074 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9075 comparison, put it in VEC_STMT, and insert it at GSI.
9076
9077 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9078
fce57248 9079static bool
42fd8198
IE
9080vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9081 gimple **vec_stmt, tree reduc_def,
68435eb2 9082 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9083{
9084 tree lhs, rhs1, rhs2;
9085 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9086 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9087 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9088 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9089 tree new_temp;
9090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9091 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9092 int ndts = 2;
928686b1 9093 poly_uint64 nunits;
42fd8198 9094 int ncopies;
49e76ff1 9095 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9096 stmt_vec_info prev_stmt_info = NULL;
9097 int i, j;
9098 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9099 vec<tree> vec_oprnds0 = vNULL;
9100 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9101 tree mask_type;
9102 tree mask;
9103
c245362b
IE
9104 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9105 return false;
9106
30480bcd 9107 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9108 return false;
9109
9110 mask_type = vectype;
9111 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9112
fce57248 9113 if (slp_node)
42fd8198
IE
9114 ncopies = 1;
9115 else
e8f142e2 9116 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9117
9118 gcc_assert (ncopies >= 1);
42fd8198
IE
9119 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9120 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9121 && reduc_def))
9122 return false;
9123
9124 if (STMT_VINFO_LIVE_P (stmt_info))
9125 {
9126 if (dump_enabled_p ())
9127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9128 "value used after loop.\n");
9129 return false;
9130 }
9131
9132 if (!is_gimple_assign (stmt))
9133 return false;
9134
9135 code = gimple_assign_rhs_code (stmt);
9136
9137 if (TREE_CODE_CLASS (code) != tcc_comparison)
9138 return false;
9139
9140 rhs1 = gimple_assign_rhs1 (stmt);
9141 rhs2 = gimple_assign_rhs2 (stmt);
9142
894dd753 9143 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9144 return false;
9145
894dd753 9146 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9147 return false;
9148
9149 if (vectype1 && vectype2
928686b1
RS
9150 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9151 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9152 return false;
9153
9154 vectype = vectype1 ? vectype1 : vectype2;
9155
9156 /* Invariant comparison. */
9157 if (!vectype)
9158 {
69a9a66f 9159 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9160 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9161 return false;
9162 }
928686b1 9163 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9164 return false;
9165
49e76ff1
IE
9166 /* Can't compare mask and non-mask types. */
9167 if (vectype1 && vectype2
9168 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9169 return false;
9170
9171 /* Boolean values may have another representation in vectors
9172 and therefore we prefer bit operations over comparison for
9173 them (which also works for scalar masks). We store opcodes
9174 to use in bitop1 and bitop2. Statement is vectorized as
9175 BITOP2 (rhs1 BITOP1 rhs2) or
9176 rhs1 BITOP2 (BITOP1 rhs2)
9177 depending on bitop1 and bitop2 arity. */
9178 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9179 {
9180 if (code == GT_EXPR)
9181 {
9182 bitop1 = BIT_NOT_EXPR;
9183 bitop2 = BIT_AND_EXPR;
9184 }
9185 else if (code == GE_EXPR)
9186 {
9187 bitop1 = BIT_NOT_EXPR;
9188 bitop2 = BIT_IOR_EXPR;
9189 }
9190 else if (code == LT_EXPR)
9191 {
9192 bitop1 = BIT_NOT_EXPR;
9193 bitop2 = BIT_AND_EXPR;
9194 std::swap (rhs1, rhs2);
264d951a 9195 std::swap (dts[0], dts[1]);
49e76ff1
IE
9196 }
9197 else if (code == LE_EXPR)
9198 {
9199 bitop1 = BIT_NOT_EXPR;
9200 bitop2 = BIT_IOR_EXPR;
9201 std::swap (rhs1, rhs2);
264d951a 9202 std::swap (dts[0], dts[1]);
49e76ff1
IE
9203 }
9204 else
9205 {
9206 bitop1 = BIT_XOR_EXPR;
9207 if (code == EQ_EXPR)
9208 bitop2 = BIT_NOT_EXPR;
9209 }
9210 }
9211
42fd8198
IE
9212 if (!vec_stmt)
9213 {
49e76ff1 9214 if (bitop1 == NOP_EXPR)
68435eb2
RB
9215 {
9216 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9217 return false;
9218 }
49e76ff1
IE
9219 else
9220 {
9221 machine_mode mode = TYPE_MODE (vectype);
9222 optab optab;
9223
9224 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9225 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9226 return false;
9227
9228 if (bitop2 != NOP_EXPR)
9229 {
9230 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9231 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9232 return false;
9233 }
49e76ff1 9234 }
68435eb2
RB
9235
9236 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9237 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9238 dts, ndts, slp_node, cost_vec);
9239 return true;
42fd8198
IE
9240 }
9241
9242 /* Transform. */
9243 if (!slp_node)
9244 {
9245 vec_oprnds0.create (1);
9246 vec_oprnds1.create (1);
9247 }
9248
9249 /* Handle def. */
9250 lhs = gimple_assign_lhs (stmt);
9251 mask = vect_create_destination_var (lhs, mask_type);
9252
9253 /* Handle cmp expr. */
9254 for (j = 0; j < ncopies; j++)
9255 {
9256 gassign *new_stmt = NULL;
9257 if (j == 0)
9258 {
9259 if (slp_node)
9260 {
9261 auto_vec<tree, 2> ops;
9262 auto_vec<vec<tree>, 2> vec_defs;
9263
9264 ops.safe_push (rhs1);
9265 ops.safe_push (rhs2);
306b0c92 9266 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9267 vec_oprnds1 = vec_defs.pop ();
9268 vec_oprnds0 = vec_defs.pop ();
9269 }
9270 else
9271 {
e4af0bc4
IE
9272 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9273 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9274 }
9275 }
9276 else
9277 {
9278 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9279 vec_oprnds0.pop ());
9280 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9281 vec_oprnds1.pop ());
9282 }
9283
9284 if (!slp_node)
9285 {
9286 vec_oprnds0.quick_push (vec_rhs1);
9287 vec_oprnds1.quick_push (vec_rhs2);
9288 }
9289
9290 /* Arguments are ready. Create the new vector stmt. */
9291 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9292 {
9293 vec_rhs2 = vec_oprnds1[i];
9294
9295 new_temp = make_ssa_name (mask);
49e76ff1
IE
9296 if (bitop1 == NOP_EXPR)
9297 {
9298 new_stmt = gimple_build_assign (new_temp, code,
9299 vec_rhs1, vec_rhs2);
9300 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9301 }
9302 else
9303 {
9304 if (bitop1 == BIT_NOT_EXPR)
9305 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9306 else
9307 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9308 vec_rhs2);
9309 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9310 if (bitop2 != NOP_EXPR)
9311 {
9312 tree res = make_ssa_name (mask);
9313 if (bitop2 == BIT_NOT_EXPR)
9314 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9315 else
9316 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9317 new_temp);
9318 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9319 }
9320 }
42fd8198
IE
9321 if (slp_node)
9322 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9323 }
9324
9325 if (slp_node)
9326 continue;
9327
9328 if (j == 0)
9329 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9330 else
9331 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9332
9333 prev_stmt_info = vinfo_for_stmt (new_stmt);
9334 }
9335
9336 vec_oprnds0.release ();
9337 vec_oprnds1.release ();
9338
9339 return true;
9340}
ebfd146a 9341
68a0f2ff
RS
9342/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9343 can handle all live statements in the node. Otherwise return true
9344 if STMT is not live or if vectorizable_live_operation can handle it.
9345 GSI and VEC_STMT are as for vectorizable_live_operation. */
9346
9347static bool
9348can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9349 slp_tree slp_node, gimple **vec_stmt,
9350 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9351{
9352 if (slp_node)
9353 {
9354 gimple *slp_stmt;
9355 unsigned int i;
9356 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9357 {
9358 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9359 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9360 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9361 vec_stmt, cost_vec))
68a0f2ff
RS
9362 return false;
9363 }
9364 }
9365 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9366 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9367 cost_vec))
68a0f2ff
RS
9368 return false;
9369
9370 return true;
9371}
9372
8644a673 9373/* Make sure the statement is vectorizable. */
ebfd146a
IR
9374
9375bool
891ad31c 9376vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9377 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9378{
8644a673 9379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6585ff8f 9380 vec_info *vinfo = stmt_info->vinfo;
a70d6342 9381 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9382 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9383 bool ok;
355fe088 9384 gimple *pattern_stmt;
363477c0 9385 gimple_seq pattern_def_seq;
ebfd146a 9386
73fbfcad 9387 if (dump_enabled_p ())
ebfd146a 9388 {
78c60e3d
SS
9389 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9390 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9391 }
ebfd146a 9392
1825a1f3 9393 if (gimple_has_volatile_ops (stmt))
b8698a0f 9394 {
73fbfcad 9395 if (dump_enabled_p ())
78c60e3d 9396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9397 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9398
9399 return false;
9400 }
b8698a0f 9401
d54a098e
RS
9402 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9403 && node == NULL
9404 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9405 {
9406 gimple_stmt_iterator si;
9407
9408 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9409 {
9410 gimple *pattern_def_stmt = gsi_stmt (si);
6585ff8f
RS
9411 stmt_vec_info pattern_def_stmt_info
9412 = vinfo->lookup_stmt (gsi_stmt (si));
9413 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9414 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
d54a098e
RS
9415 {
9416 /* Analyze def stmt of STMT if it's a pattern stmt. */
9417 if (dump_enabled_p ())
9418 {
9419 dump_printf_loc (MSG_NOTE, vect_location,
9420 "==> examining pattern def statement: ");
9421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9422 }
9423
9424 if (!vect_analyze_stmt (pattern_def_stmt,
9425 need_to_vectorize, node, node_instance,
9426 cost_vec))
9427 return false;
9428 }
9429 }
9430 }
9431
b8698a0f 9432 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9433 to include:
9434 - the COND_EXPR which is the loop exit condition
9435 - any LABEL_EXPRs in the loop
b8698a0f 9436 - computations that are used only for array indexing or loop control.
8644a673 9437 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9438 instance, therefore, all the statements are relevant.
ebfd146a 9439
d092494c 9440 Pattern statement needs to be analyzed instead of the original statement
83197f37 9441 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9442 statements. In basic blocks we are called from some SLP instance
9443 traversal, don't analyze pattern stmts instead, the pattern stmts
9444 already will be part of SLP instance. */
83197f37
IR
9445
9446 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9447 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9448 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9449 {
9d5e7640 9450 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9451 && pattern_stmt
9d5e7640
IR
9452 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9453 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9454 {
83197f37 9455 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9456 stmt = pattern_stmt;
9457 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9458 if (dump_enabled_p ())
9d5e7640 9459 {
78c60e3d
SS
9460 dump_printf_loc (MSG_NOTE, vect_location,
9461 "==> examining pattern statement: ");
9462 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9463 }
9464 }
9465 else
9466 {
73fbfcad 9467 if (dump_enabled_p ())
e645e942 9468 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9469
9d5e7640
IR
9470 return true;
9471 }
8644a673 9472 }
83197f37 9473 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9474 && node == NULL
83197f37
IR
9475 && pattern_stmt
9476 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9477 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9478 {
9479 /* Analyze PATTERN_STMT too. */
73fbfcad 9480 if (dump_enabled_p ())
83197f37 9481 {
78c60e3d
SS
9482 dump_printf_loc (MSG_NOTE, vect_location,
9483 "==> examining pattern statement: ");
9484 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9485 }
9486
891ad31c 9487 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9488 node_instance, cost_vec))
83197f37
IR
9489 return false;
9490 }
ebfd146a 9491
8644a673
IR
9492 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9493 {
9494 case vect_internal_def:
9495 break;
ebfd146a 9496
8644a673 9497 case vect_reduction_def:
7c5222ff 9498 case vect_nested_cycle:
14a61437
RB
9499 gcc_assert (!bb_vinfo
9500 && (relevance == vect_used_in_outer
9501 || relevance == vect_used_in_outer_by_reduction
9502 || relevance == vect_used_by_reduction
b28ead45
AH
9503 || relevance == vect_unused_in_scope
9504 || relevance == vect_used_only_live));
8644a673
IR
9505 break;
9506
9507 case vect_induction_def:
e7baeb39
RB
9508 gcc_assert (!bb_vinfo);
9509 break;
9510
8644a673
IR
9511 case vect_constant_def:
9512 case vect_external_def:
9513 case vect_unknown_def_type:
9514 default:
9515 gcc_unreachable ();
9516 }
ebfd146a 9517
8644a673 9518 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9519 {
8644a673 9520 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9521 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9522 || (is_gimple_call (stmt)
9523 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9524 *need_to_vectorize = true;
ebfd146a
IR
9525 }
9526
b1af7da6
RB
9527 if (PURE_SLP_STMT (stmt_info) && !node)
9528 {
9529 dump_printf_loc (MSG_NOTE, vect_location,
9530 "handled only by SLP analysis\n");
9531 return true;
9532 }
9533
9534 ok = true;
9535 if (!bb_vinfo
9536 && (STMT_VINFO_RELEVANT_P (stmt_info)
9537 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9538 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9539 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9540 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9541 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9542 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9543 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9544 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9545 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9546 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9547 cost_vec)
9548 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9549 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9550 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9551 else
9552 {
9553 if (bb_vinfo)
68435eb2
RB
9554 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9555 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9556 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9557 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9558 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9559 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9560 cost_vec)
9561 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9562 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9563 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9564 cost_vec)
9565 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9566 cost_vec));
b1af7da6 9567 }
8644a673
IR
9568
9569 if (!ok)
ebfd146a 9570 {
73fbfcad 9571 if (dump_enabled_p ())
8644a673 9572 {
78c60e3d
SS
9573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9574 "not vectorized: relevant stmt not ");
9575 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9576 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9577 }
b8698a0f 9578
ebfd146a
IR
9579 return false;
9580 }
9581
8644a673
IR
9582 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9583 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9584 if (!bb_vinfo
9585 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9586 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9587 {
73fbfcad 9588 if (dump_enabled_p ())
8644a673 9589 {
78c60e3d 9590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9591 "not vectorized: live stmt not supported: ");
78c60e3d 9592 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9593 }
b8698a0f 9594
8644a673 9595 return false;
ebfd146a
IR
9596 }
9597
ebfd146a
IR
9598 return true;
9599}
9600
9601
9602/* Function vect_transform_stmt.
9603
9604 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9605
9606bool
355fe088 9607vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9608 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9609 slp_instance slp_node_instance)
9610{
6585ff8f
RS
9611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9612 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 9613 bool is_store = false;
355fe088 9614 gimple *vec_stmt = NULL;
ebfd146a 9615 bool done;
ebfd146a 9616
fce57248 9617 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9618 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9619
e57d9a82
RB
9620 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9621 && nested_in_vect_loop_p
9622 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9623 stmt));
9624
ebfd146a
IR
9625 switch (STMT_VINFO_TYPE (stmt_info))
9626 {
9627 case type_demotion_vec_info_type:
ebfd146a 9628 case type_promotion_vec_info_type:
ebfd146a 9629 case type_conversion_vec_info_type:
68435eb2 9630 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9631 gcc_assert (done);
9632 break;
9633
9634 case induc_vec_info_type:
68435eb2 9635 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9636 gcc_assert (done);
9637 break;
9638
9dc3f7de 9639 case shift_vec_info_type:
68435eb2 9640 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9641 gcc_assert (done);
9642 break;
9643
ebfd146a 9644 case op_vec_info_type:
68435eb2 9645 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9646 gcc_assert (done);
9647 break;
9648
9649 case assignment_vec_info_type:
68435eb2 9650 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9651 gcc_assert (done);
9652 break;
9653
9654 case load_vec_info_type:
b8698a0f 9655 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9656 slp_node_instance, NULL);
ebfd146a
IR
9657 gcc_assert (done);
9658 break;
9659
9660 case store_vec_info_type:
68435eb2 9661 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9662 gcc_assert (done);
0d0293ac 9663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9664 {
9665 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9666 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9667 one are skipped, and there vec_stmt_info shouldn't be freed
9668 meanwhile. */
0d0293ac 9669 *grouped_store = true;
f307441a 9670 stmt_vec_info group_info
2c53b149
RB
9671 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9672 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9673 is_store = true;
f307441a 9674 }
ebfd146a
IR
9675 else
9676 is_store = true;
9677 break;
9678
9679 case condition_vec_info_type:
68435eb2 9680 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9681 gcc_assert (done);
9682 break;
9683
42fd8198 9684 case comparison_vec_info_type:
68435eb2 9685 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9686 gcc_assert (done);
9687 break;
9688
ebfd146a 9689 case call_vec_info_type:
68435eb2 9690 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9691 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9692 break;
9693
0136f8f0 9694 case call_simd_clone_vec_info_type:
68435eb2 9695 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9696 stmt = gsi_stmt (*gsi);
9697 break;
9698
ebfd146a 9699 case reduc_vec_info_type:
891ad31c 9700 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9701 slp_node_instance, NULL);
ebfd146a
IR
9702 gcc_assert (done);
9703 break;
9704
9705 default:
9706 if (!STMT_VINFO_LIVE_P (stmt_info))
9707 {
73fbfcad 9708 if (dump_enabled_p ())
78c60e3d 9709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9710 "stmt not supported.\n");
ebfd146a
IR
9711 gcc_unreachable ();
9712 }
9713 }
9714
225ce44b
RB
9715 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9716 This would break hybrid SLP vectorization. */
9717 if (slp_node)
d90f8440
RB
9718 gcc_assert (!vec_stmt
9719 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9720
ebfd146a
IR
9721 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9722 is being vectorized, but outside the immediately enclosing loop. */
9723 if (vec_stmt
e57d9a82 9724 && nested_p
ebfd146a
IR
9725 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9726 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9727 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9728 vect_used_in_outer_by_reduction))
ebfd146a 9729 {
a70d6342
IR
9730 struct loop *innerloop = LOOP_VINFO_LOOP (
9731 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9732 imm_use_iterator imm_iter;
9733 use_operand_p use_p;
9734 tree scalar_dest;
ebfd146a 9735
73fbfcad 9736 if (dump_enabled_p ())
78c60e3d 9737 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9738 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9739
9740 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9741 (to be used when vectorizing outer-loop stmts that use the DEF of
9742 STMT). */
9743 if (gimple_code (stmt) == GIMPLE_PHI)
9744 scalar_dest = PHI_RESULT (stmt);
9745 else
9746 scalar_dest = gimple_assign_lhs (stmt);
9747
9748 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6585ff8f
RS
9749 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9750 {
9751 stmt_vec_info exit_phi_info
9752 = vinfo->lookup_stmt (USE_STMT (use_p));
9753 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9754 }
ebfd146a
IR
9755 }
9756
9757 /* Handle stmts whose DEF is used outside the loop-nest that is
9758 being vectorized. */
68a0f2ff 9759 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9760 {
68435eb2 9761 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9762 gcc_assert (done);
9763 }
9764
9765 if (vec_stmt)
83197f37 9766 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9767
b8698a0f 9768 return is_store;
ebfd146a
IR
9769}
9770
9771
b8698a0f 9772/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9773 stmt_vec_info. */
9774
9775void
355fe088 9776vect_remove_stores (gimple *first_stmt)
ebfd146a 9777{
355fe088
TS
9778 gimple *next = first_stmt;
9779 gimple *tmp;
ebfd146a
IR
9780 gimple_stmt_iterator next_si;
9781
9782 while (next)
9783 {
78048b1c
JJ
9784 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9785
2c53b149 9786 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9787 if (is_pattern_stmt_p (stmt_info))
9788 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9789 /* Free the attached stmt_vec_info and remove the stmt. */
9790 next_si = gsi_for_stmt (next);
3d3f2249 9791 unlink_stmt_vdef (next);
ebfd146a 9792 gsi_remove (&next_si, true);
3d3f2249 9793 release_defs (next);
ebfd146a
IR
9794 free_stmt_vec_info (next);
9795 next = tmp;
9796 }
9797}
9798
9799
9800/* Function new_stmt_vec_info.
9801
9802 Create and initialize a new stmt_vec_info struct for STMT. */
9803
9804stmt_vec_info
310213d4 9805new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9806{
9807 stmt_vec_info res;
9808 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9809
9810 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9811 STMT_VINFO_STMT (res) = stmt;
310213d4 9812 res->vinfo = vinfo;
8644a673 9813 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9814 STMT_VINFO_LIVE_P (res) = false;
9815 STMT_VINFO_VECTYPE (res) = NULL;
9816 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9817 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9818 STMT_VINFO_IN_PATTERN_P (res) = false;
9819 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9820 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9821 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9822 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9823 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9824
ebfd146a
IR
9825 if (gimple_code (stmt) == GIMPLE_PHI
9826 && is_loop_header_bb_p (gimple_bb (stmt)))
9827 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9828 else
8644a673
IR
9829 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9830
9771b263 9831 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9832 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9833 STMT_VINFO_NUM_SLP_USES (res) = 0;
9834
2c53b149
RB
9835 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9836 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9837 res->size = 0; /* GROUP_SIZE */
9838 res->store_count = 0; /* GROUP_STORE_COUNT */
9839 res->gap = 0; /* GROUP_GAP */
9840 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9841
ca823c85
RB
9842 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9843 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9844
ebfd146a
IR
9845 return res;
9846}
9847
9848
f8c0baaf 9849/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9850
9851void
f8c0baaf 9852set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9853{
f8c0baaf 9854 stmt_vec_info_vec = v;
ebfd146a
IR
9855}
9856
f8c0baaf 9857/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9858
9859void
f8c0baaf 9860free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9861{
93675444 9862 unsigned int i;
3161455c 9863 stmt_vec_info info;
f8c0baaf 9864 FOR_EACH_VEC_ELT (*v, i, info)
93675444 9865 if (info != NULL)
3161455c 9866 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9867 if (v == stmt_vec_info_vec)
9868 stmt_vec_info_vec = NULL;
9869 v->release ();
ebfd146a
IR
9870}
9871
9872
9873/* Free stmt vectorization related info. */
9874
9875void
355fe088 9876free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9877{
9878 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9879
9880 if (!stmt_info)
9881 return;
9882
78048b1c
JJ
9883 /* Check if this statement has a related "pattern stmt"
9884 (introduced by the vectorizer during the pattern recognition
9885 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9886 too. */
9887 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9888 {
e3947d80
RS
9889 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9890 for (gimple_stmt_iterator si = gsi_start (seq);
9891 !gsi_end_p (si); gsi_next (&si))
9892 {
9893 gimple *seq_stmt = gsi_stmt (si);
9894 gimple_set_bb (seq_stmt, NULL);
9895 tree lhs = gimple_get_lhs (seq_stmt);
9896 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9897 release_ssa_name (lhs);
9898 free_stmt_vec_info (seq_stmt);
9899 }
78048b1c
JJ
9900 stmt_vec_info patt_info
9901 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9902 if (patt_info)
9903 {
355fe088 9904 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9905 gimple_set_bb (patt_stmt, NULL);
9906 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9907 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9908 release_ssa_name (lhs);
f0281fde 9909 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9910 }
9911 }
9912
9771b263 9913 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9914 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9915 set_vinfo_for_stmt (stmt, NULL);
9916 free (stmt_info);
9917}
9918
9919
bb67d9c7 9920/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9921
bb67d9c7 9922 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9923 by the target. */
9924
c803b2a9 9925tree
86e36728 9926get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9927{
c7d97b28 9928 tree orig_scalar_type = scalar_type;
3bd8f481 9929 scalar_mode inner_mode;
ef4bddc2 9930 machine_mode simd_mode;
86e36728 9931 poly_uint64 nunits;
ebfd146a
IR
9932 tree vectype;
9933
3bd8f481
RS
9934 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9935 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9936 return NULL_TREE;
9937
3bd8f481 9938 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9939
7b7b1813
RG
9940 /* For vector types of elements whose mode precision doesn't
9941 match their types precision we use a element type of mode
9942 precision. The vectorization routines will have to make sure
48f2e373
RB
9943 they support the proper result truncation/extension.
9944 We also make sure to build vector types with INTEGER_TYPE
9945 component type only. */
6d7971b8 9946 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9947 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9948 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9949 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9950 TYPE_UNSIGNED (scalar_type));
6d7971b8 9951
ccbf5bb4
RG
9952 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9953 When the component mode passes the above test simply use a type
9954 corresponding to that mode. The theory is that any use that
9955 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9956 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9957 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9958 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9959
9960 /* We can't build a vector type of elements with alignment bigger than
9961 their size. */
dfc2e2ac 9962 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9963 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9964 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9965
dfc2e2ac
RB
9966 /* If we felt back to using the mode fail if there was
9967 no scalar type for it. */
9968 if (scalar_type == NULL_TREE)
9969 return NULL_TREE;
9970
bb67d9c7
RG
9971 /* If no size was supplied use the mode the target prefers. Otherwise
9972 lookup a vector mode of the specified size. */
86e36728 9973 if (known_eq (size, 0U))
bb67d9c7 9974 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9975 else if (!multiple_p (size, nbytes, &nunits)
9976 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9977 return NULL_TREE;
4c8fd8ac 9978 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9979 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9980 return NULL_TREE;
ebfd146a
IR
9981
9982 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9983
9984 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9985 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9986 return NULL_TREE;
ebfd146a 9987
c7d97b28
RB
9988 /* Re-attach the address-space qualifier if we canonicalized the scalar
9989 type. */
9990 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9991 return build_qualified_type
9992 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9993
ebfd146a
IR
9994 return vectype;
9995}
9996
86e36728 9997poly_uint64 current_vector_size;
bb67d9c7
RG
9998
9999/* Function get_vectype_for_scalar_type.
10000
10001 Returns the vector type corresponding to SCALAR_TYPE as supported
10002 by the target. */
10003
10004tree
10005get_vectype_for_scalar_type (tree scalar_type)
10006{
10007 tree vectype;
10008 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10009 current_vector_size);
10010 if (vectype
86e36728 10011 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
10012 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10013 return vectype;
10014}
10015
42fd8198
IE
10016/* Function get_mask_type_for_scalar_type.
10017
10018 Returns the mask type corresponding to a result of comparison
10019 of vectors of specified SCALAR_TYPE as supported by target. */
10020
10021tree
10022get_mask_type_for_scalar_type (tree scalar_type)
10023{
10024 tree vectype = get_vectype_for_scalar_type (scalar_type);
10025
10026 if (!vectype)
10027 return NULL;
10028
10029 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10030 current_vector_size);
10031}
10032
b690cc0f
RG
10033/* Function get_same_sized_vectype
10034
10035 Returns a vector type corresponding to SCALAR_TYPE of size
10036 VECTOR_TYPE if supported by the target. */
10037
10038tree
bb67d9c7 10039get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10040{
2568d8a1 10041 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10042 return build_same_sized_truth_vector_type (vector_type);
10043
bb67d9c7
RG
10044 return get_vectype_for_scalar_type_and_size
10045 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10046}
10047
ebfd146a
IR
10048/* Function vect_is_simple_use.
10049
10050 Input:
81c40241
RB
10051 VINFO - the vect info of the loop or basic block that is being vectorized.
10052 OPERAND - operand in the loop or bb.
10053 Output:
894dd753 10054 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
81c40241 10055 DT - the type of definition
ebfd146a
IR
10056
10057 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10058 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10059 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10060 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10061 is the case in reduction/induction computations).
10062 For basic blocks, supportable operands are constants and bb invariants.
10063 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10064
10065bool
894dd753
RS
10066vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10067 gimple **def_stmt_out)
b8698a0f 10068{
894dd753
RS
10069 if (def_stmt_out)
10070 *def_stmt_out = NULL;
3fc356dc 10071 *dt = vect_unknown_def_type;
b8698a0f 10072
73fbfcad 10073 if (dump_enabled_p ())
ebfd146a 10074 {
78c60e3d
SS
10075 dump_printf_loc (MSG_NOTE, vect_location,
10076 "vect_is_simple_use: operand ");
30f502ed
RB
10077 if (TREE_CODE (operand) == SSA_NAME
10078 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10079 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10080 else
10081 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 10082 }
b8698a0f 10083
b758f602 10084 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
10085 *dt = vect_constant_def;
10086 else if (is_gimple_min_invariant (operand))
10087 *dt = vect_external_def;
10088 else if (TREE_CODE (operand) != SSA_NAME)
10089 *dt = vect_unknown_def_type;
10090 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 10091 *dt = vect_external_def;
ebfd146a
IR
10092 else
10093 {
30f502ed 10094 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
c98d0595
RS
10095 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10096 if (!stmt_vinfo)
30f502ed
RB
10097 *dt = vect_external_def;
10098 else
0f8c840c 10099 {
30f502ed
RB
10100 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10101 {
10102 def_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10103 stmt_vinfo = vinfo_for_stmt (def_stmt);
10104 }
10105 switch (gimple_code (def_stmt))
10106 {
10107 case GIMPLE_PHI:
10108 case GIMPLE_ASSIGN:
10109 case GIMPLE_CALL:
10110 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10111 break;
10112 default:
10113 *dt = vect_unknown_def_type;
10114 break;
10115 }
0f8c840c 10116 }
30f502ed
RB
10117 if (def_stmt_out)
10118 *def_stmt_out = def_stmt;
ebfd146a
IR
10119 }
10120
2e8ab70c
RB
10121 if (dump_enabled_p ())
10122 {
30f502ed 10123 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
10124 switch (*dt)
10125 {
10126 case vect_uninitialized_def:
10127 dump_printf (MSG_NOTE, "uninitialized\n");
10128 break;
10129 case vect_constant_def:
10130 dump_printf (MSG_NOTE, "constant\n");
10131 break;
10132 case vect_external_def:
10133 dump_printf (MSG_NOTE, "external\n");
10134 break;
10135 case vect_internal_def:
10136 dump_printf (MSG_NOTE, "internal\n");
10137 break;
10138 case vect_induction_def:
10139 dump_printf (MSG_NOTE, "induction\n");
10140 break;
10141 case vect_reduction_def:
10142 dump_printf (MSG_NOTE, "reduction\n");
10143 break;
10144 case vect_double_reduction_def:
10145 dump_printf (MSG_NOTE, "double reduction\n");
10146 break;
10147 case vect_nested_cycle:
10148 dump_printf (MSG_NOTE, "nested cycle\n");
10149 break;
10150 case vect_unknown_def_type:
10151 dump_printf (MSG_NOTE, "unknown\n");
10152 break;
10153 }
10154 }
10155
81c40241 10156 if (*dt == vect_unknown_def_type)
ebfd146a 10157 {
73fbfcad 10158 if (dump_enabled_p ())
78c60e3d 10159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10160 "Unsupported pattern.\n");
ebfd146a
IR
10161 return false;
10162 }
10163
ebfd146a
IR
10164 return true;
10165}
10166
81c40241 10167/* Function vect_is_simple_use.
b690cc0f 10168
81c40241 10169 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10170 type of OPERAND and stores it to *VECTYPE. If the definition of
10171 OPERAND is vect_uninitialized_def, vect_constant_def or
10172 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10173 is responsible to compute the best suited vector type for the
10174 scalar operand. */
10175
10176bool
894dd753
RS
10177vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10178 tree *vectype, gimple **def_stmt_out)
b690cc0f 10179{
894dd753
RS
10180 gimple *def_stmt;
10181 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt))
b690cc0f
RG
10182 return false;
10183
894dd753
RS
10184 if (def_stmt_out)
10185 *def_stmt_out = def_stmt;
10186
b690cc0f
RG
10187 /* Now get a vector type if the def is internal, otherwise supply
10188 NULL_TREE and leave it up to the caller to figure out a proper
10189 type for the use stmt. */
10190 if (*dt == vect_internal_def
10191 || *dt == vect_induction_def
10192 || *dt == vect_reduction_def
10193 || *dt == vect_double_reduction_def
10194 || *dt == vect_nested_cycle)
10195 {
894dd753 10196 stmt_vec_info stmt_info = vinfo_for_stmt (def_stmt);
b690cc0f
RG
10197 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10198 gcc_assert (*vectype != NULL_TREE);
30f502ed
RB
10199 if (dump_enabled_p ())
10200 {
10201 dump_printf_loc (MSG_NOTE, vect_location,
10202 "vect_is_simple_use: vectype ");
10203 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10204 dump_printf (MSG_NOTE, "\n");
10205 }
b690cc0f
RG
10206 }
10207 else if (*dt == vect_uninitialized_def
10208 || *dt == vect_constant_def
10209 || *dt == vect_external_def)
10210 *vectype = NULL_TREE;
10211 else
10212 gcc_unreachable ();
10213
10214 return true;
10215}
10216
ebfd146a
IR
10217
10218/* Function supportable_widening_operation
10219
b8698a0f
L
10220 Check whether an operation represented by the code CODE is a
10221 widening operation that is supported by the target platform in
b690cc0f
RG
10222 vector form (i.e., when operating on arguments of type VECTYPE_IN
10223 producing a result of type VECTYPE_OUT).
b8698a0f 10224
1bda738b
JJ
10225 Widening operations we currently support are NOP (CONVERT), FLOAT,
10226 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10227 are supported by the target platform either directly (via vector
10228 tree-codes), or via target builtins.
ebfd146a
IR
10229
10230 Output:
b8698a0f
L
10231 - CODE1 and CODE2 are codes of vector operations to be used when
10232 vectorizing the operation, if available.
ebfd146a
IR
10233 - MULTI_STEP_CVT determines the number of required intermediate steps in
10234 case of multi-step conversion (like char->short->int - in that case
10235 MULTI_STEP_CVT will be 1).
b8698a0f
L
10236 - INTERM_TYPES contains the intermediate type required to perform the
10237 widening operation (short in the above example). */
ebfd146a
IR
10238
10239bool
355fe088 10240supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10241 tree vectype_out, tree vectype_in,
ebfd146a
IR
10242 enum tree_code *code1, enum tree_code *code2,
10243 int *multi_step_cvt,
9771b263 10244 vec<tree> *interm_types)
ebfd146a
IR
10245{
10246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10247 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10248 struct loop *vect_loop = NULL;
ef4bddc2 10249 machine_mode vec_mode;
81f40b79 10250 enum insn_code icode1, icode2;
ebfd146a 10251 optab optab1, optab2;
b690cc0f
RG
10252 tree vectype = vectype_in;
10253 tree wide_vectype = vectype_out;
ebfd146a 10254 enum tree_code c1, c2;
4a00c761
JJ
10255 int i;
10256 tree prev_type, intermediate_type;
ef4bddc2 10257 machine_mode intermediate_mode, prev_mode;
4a00c761 10258 optab optab3, optab4;
ebfd146a 10259
4a00c761 10260 *multi_step_cvt = 0;
4ef69dfc
IR
10261 if (loop_info)
10262 vect_loop = LOOP_VINFO_LOOP (loop_info);
10263
ebfd146a
IR
10264 switch (code)
10265 {
10266 case WIDEN_MULT_EXPR:
6ae6116f
RH
10267 /* The result of a vectorized widening operation usually requires
10268 two vectors (because the widened results do not fit into one vector).
10269 The generated vector results would normally be expected to be
10270 generated in the same order as in the original scalar computation,
10271 i.e. if 8 results are generated in each vector iteration, they are
10272 to be organized as follows:
10273 vect1: [res1,res2,res3,res4],
10274 vect2: [res5,res6,res7,res8].
10275
10276 However, in the special case that the result of the widening
10277 operation is used in a reduction computation only, the order doesn't
10278 matter (because when vectorizing a reduction we change the order of
10279 the computation). Some targets can take advantage of this and
10280 generate more efficient code. For example, targets like Altivec,
10281 that support widen_mult using a sequence of {mult_even,mult_odd}
10282 generate the following vectors:
10283 vect1: [res1,res3,res5,res7],
10284 vect2: [res2,res4,res6,res8].
10285
10286 When vectorizing outer-loops, we execute the inner-loop sequentially
10287 (each vectorized inner-loop iteration contributes to VF outer-loop
10288 iterations in parallel). We therefore don't allow to change the
10289 order of the computation in the inner-loop during outer-loop
10290 vectorization. */
10291 /* TODO: Another case in which order doesn't *really* matter is when we
10292 widen and then contract again, e.g. (short)((int)x * y >> 8).
10293 Normally, pack_trunc performs an even/odd permute, whereas the
10294 repack from an even/odd expansion would be an interleave, which
10295 would be significantly simpler for e.g. AVX2. */
10296 /* In any case, in order to avoid duplicating the code below, recurse
10297 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10298 are properly set up for the caller. If we fail, we'll continue with
10299 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10300 if (vect_loop
10301 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10302 && !nested_in_vect_loop_p (vect_loop, stmt)
10303 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10304 stmt, vectype_out, vectype_in,
a86ec597
RH
10305 code1, code2, multi_step_cvt,
10306 interm_types))
ebc047a2
CH
10307 {
10308 /* Elements in a vector with vect_used_by_reduction property cannot
10309 be reordered if the use chain with this property does not have the
10310 same operation. One such an example is s += a * b, where elements
10311 in a and b cannot be reordered. Here we check if the vector defined
10312 by STMT is only directly used in the reduction statement. */
10313 tree lhs = gimple_assign_lhs (stmt);
10314 use_operand_p dummy;
355fe088 10315 gimple *use_stmt;
ebc047a2
CH
10316 stmt_vec_info use_stmt_info = NULL;
10317 if (single_imm_use (lhs, &dummy, &use_stmt)
10318 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10319 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10320 return true;
10321 }
4a00c761
JJ
10322 c1 = VEC_WIDEN_MULT_LO_EXPR;
10323 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10324 break;
10325
81c40241
RB
10326 case DOT_PROD_EXPR:
10327 c1 = DOT_PROD_EXPR;
10328 c2 = DOT_PROD_EXPR;
10329 break;
10330
10331 case SAD_EXPR:
10332 c1 = SAD_EXPR;
10333 c2 = SAD_EXPR;
10334 break;
10335
6ae6116f
RH
10336 case VEC_WIDEN_MULT_EVEN_EXPR:
10337 /* Support the recursion induced just above. */
10338 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10339 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10340 break;
10341
36ba4aae 10342 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10343 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10344 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10345 break;
10346
ebfd146a 10347 CASE_CONVERT:
4a00c761
JJ
10348 c1 = VEC_UNPACK_LO_EXPR;
10349 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10350 break;
10351
10352 case FLOAT_EXPR:
4a00c761
JJ
10353 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10354 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10355 break;
10356
10357 case FIX_TRUNC_EXPR:
1bda738b
JJ
10358 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10359 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10360 break;
ebfd146a
IR
10361
10362 default:
10363 gcc_unreachable ();
10364 }
10365
6ae6116f 10366 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10367 std::swap (c1, c2);
4a00c761 10368
ebfd146a
IR
10369 if (code == FIX_TRUNC_EXPR)
10370 {
10371 /* The signedness is determined from output operand. */
b690cc0f
RG
10372 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10373 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10374 }
10375 else
10376 {
10377 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10378 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10379 }
10380
10381 if (!optab1 || !optab2)
10382 return false;
10383
10384 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10385 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10386 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10387 return false;
10388
4a00c761
JJ
10389 *code1 = c1;
10390 *code2 = c2;
10391
10392 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10393 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10394 /* For scalar masks we may have different boolean
10395 vector types having the same QImode. Thus we
10396 add additional check for elements number. */
10397 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10398 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10399 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10400
b8698a0f 10401 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10402 types. */
ebfd146a 10403
4a00c761
JJ
10404 prev_type = vectype;
10405 prev_mode = vec_mode;
b8698a0f 10406
4a00c761
JJ
10407 if (!CONVERT_EXPR_CODE_P (code))
10408 return false;
b8698a0f 10409
4a00c761
JJ
10410 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10411 intermediate steps in promotion sequence. We try
10412 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10413 not. */
9771b263 10414 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10415 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10416 {
10417 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10418 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10419 {
7cfb4d93 10420 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10421 if (intermediate_mode != TYPE_MODE (intermediate_type))
10422 return false;
10423 }
10424 else
10425 intermediate_type
10426 = lang_hooks.types.type_for_mode (intermediate_mode,
10427 TYPE_UNSIGNED (prev_type));
10428
4a00c761
JJ
10429 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10430 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10431
10432 if (!optab3 || !optab4
10433 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10434 || insn_data[icode1].operand[0].mode != intermediate_mode
10435 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10436 || insn_data[icode2].operand[0].mode != intermediate_mode
10437 || ((icode1 = optab_handler (optab3, intermediate_mode))
10438 == CODE_FOR_nothing)
10439 || ((icode2 = optab_handler (optab4, intermediate_mode))
10440 == CODE_FOR_nothing))
10441 break;
ebfd146a 10442
9771b263 10443 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10444 (*multi_step_cvt)++;
10445
10446 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10447 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10448 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10449 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10450 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10451
10452 prev_type = intermediate_type;
10453 prev_mode = intermediate_mode;
ebfd146a
IR
10454 }
10455
9771b263 10456 interm_types->release ();
4a00c761 10457 return false;
ebfd146a
IR
10458}
10459
10460
10461/* Function supportable_narrowing_operation
10462
b8698a0f
L
10463 Check whether an operation represented by the code CODE is a
10464 narrowing operation that is supported by the target platform in
b690cc0f
RG
10465 vector form (i.e., when operating on arguments of type VECTYPE_IN
10466 and producing a result of type VECTYPE_OUT).
b8698a0f 10467
1bda738b
JJ
10468 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10469 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10470 the target platform directly via vector tree-codes.
10471
10472 Output:
b8698a0f
L
10473 - CODE1 is the code of a vector operation to be used when
10474 vectorizing the operation, if available.
ebfd146a
IR
10475 - MULTI_STEP_CVT determines the number of required intermediate steps in
10476 case of multi-step conversion (like int->short->char - in that case
10477 MULTI_STEP_CVT will be 1).
10478 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10479 narrowing operation (short in the above example). */
ebfd146a
IR
10480
10481bool
10482supportable_narrowing_operation (enum tree_code code,
b690cc0f 10483 tree vectype_out, tree vectype_in,
ebfd146a 10484 enum tree_code *code1, int *multi_step_cvt,
9771b263 10485 vec<tree> *interm_types)
ebfd146a 10486{
ef4bddc2 10487 machine_mode vec_mode;
ebfd146a
IR
10488 enum insn_code icode1;
10489 optab optab1, interm_optab;
b690cc0f
RG
10490 tree vectype = vectype_in;
10491 tree narrow_vectype = vectype_out;
ebfd146a 10492 enum tree_code c1;
3ae0661a 10493 tree intermediate_type, prev_type;
ef4bddc2 10494 machine_mode intermediate_mode, prev_mode;
ebfd146a 10495 int i;
4a00c761 10496 bool uns;
ebfd146a 10497
4a00c761 10498 *multi_step_cvt = 0;
ebfd146a
IR
10499 switch (code)
10500 {
10501 CASE_CONVERT:
10502 c1 = VEC_PACK_TRUNC_EXPR;
10503 break;
10504
10505 case FIX_TRUNC_EXPR:
10506 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10507 break;
10508
10509 case FLOAT_EXPR:
1bda738b
JJ
10510 c1 = VEC_PACK_FLOAT_EXPR;
10511 break;
ebfd146a
IR
10512
10513 default:
10514 gcc_unreachable ();
10515 }
10516
10517 if (code == FIX_TRUNC_EXPR)
10518 /* The signedness is determined from output operand. */
b690cc0f 10519 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10520 else
10521 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10522
10523 if (!optab1)
10524 return false;
10525
10526 vec_mode = TYPE_MODE (vectype);
947131ba 10527 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10528 return false;
10529
4a00c761
JJ
10530 *code1 = c1;
10531
10532 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10533 /* For scalar masks we may have different boolean
10534 vector types having the same QImode. Thus we
10535 add additional check for elements number. */
10536 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10537 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10538 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10539
1bda738b
JJ
10540 if (code == FLOAT_EXPR)
10541 return false;
10542
ebfd146a
IR
10543 /* Check if it's a multi-step conversion that can be done using intermediate
10544 types. */
4a00c761 10545 prev_mode = vec_mode;
3ae0661a 10546 prev_type = vectype;
4a00c761
JJ
10547 if (code == FIX_TRUNC_EXPR)
10548 uns = TYPE_UNSIGNED (vectype_out);
10549 else
10550 uns = TYPE_UNSIGNED (vectype);
10551
10552 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10553 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10554 costly than signed. */
10555 if (code == FIX_TRUNC_EXPR && uns)
10556 {
10557 enum insn_code icode2;
10558
10559 intermediate_type
10560 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10561 interm_optab
10562 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10563 if (interm_optab != unknown_optab
4a00c761
JJ
10564 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10565 && insn_data[icode1].operand[0].mode
10566 == insn_data[icode2].operand[0].mode)
10567 {
10568 uns = false;
10569 optab1 = interm_optab;
10570 icode1 = icode2;
10571 }
10572 }
ebfd146a 10573
4a00c761
JJ
10574 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10575 intermediate steps in promotion sequence. We try
10576 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10577 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10578 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10579 {
10580 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10581 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10582 {
7cfb4d93 10583 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10584 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10585 return false;
3ae0661a
IE
10586 }
10587 else
10588 intermediate_type
10589 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10590 interm_optab
10591 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10592 optab_default);
10593 if (!interm_optab
10594 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10595 || insn_data[icode1].operand[0].mode != intermediate_mode
10596 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10597 == CODE_FOR_nothing))
10598 break;
10599
9771b263 10600 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10601 (*multi_step_cvt)++;
10602
10603 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10604 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10605 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10606 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10607
10608 prev_mode = intermediate_mode;
3ae0661a 10609 prev_type = intermediate_type;
4a00c761 10610 optab1 = interm_optab;
ebfd146a
IR
10611 }
10612
9771b263 10613 interm_types->release ();
4a00c761 10614 return false;
ebfd146a 10615}
7cfb4d93
RS
10616
10617/* Generate and return a statement that sets vector mask MASK such that
10618 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10619
10620gcall *
10621vect_gen_while (tree mask, tree start_index, tree end_index)
10622{
10623 tree cmp_type = TREE_TYPE (start_index);
10624 tree mask_type = TREE_TYPE (mask);
10625 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10626 cmp_type, mask_type,
10627 OPTIMIZE_FOR_SPEED));
10628 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10629 start_index, end_index,
10630 build_zero_cst (mask_type));
10631 gimple_call_set_lhs (call, mask);
10632 return call;
10633}
535e7c11
RS
10634
10635/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10636 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10637
10638tree
10639vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10640 tree end_index)
10641{
10642 tree tmp = make_ssa_name (mask_type);
10643 gcall *call = vect_gen_while (tmp, start_index, end_index);
10644 gimple_seq_add_stmt (seq, call);
10645 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10646}
1f3cb663
RS
10647
10648/* Try to compute the vector types required to vectorize STMT_INFO,
10649 returning true on success and false if vectorization isn't possible.
10650
10651 On success:
10652
10653 - Set *STMT_VECTYPE_OUT to:
10654 - NULL_TREE if the statement doesn't need to be vectorized;
10655 - boolean_type_node if the statement is a boolean operation whose
10656 vector type can only be determined once all the other vector types
10657 are known; and
10658 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10659
10660 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10661 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10662 statement does not help to determine the overall number of units. */
10663
10664bool
10665vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10666 tree *stmt_vectype_out,
10667 tree *nunits_vectype_out)
10668{
10669 gimple *stmt = stmt_info->stmt;
10670
10671 *stmt_vectype_out = NULL_TREE;
10672 *nunits_vectype_out = NULL_TREE;
10673
10674 if (gimple_get_lhs (stmt) == NULL_TREE
10675 /* MASK_STORE has no lhs, but is ok. */
10676 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10677 {
10678 if (is_a <gcall *> (stmt))
10679 {
10680 /* Ignore calls with no lhs. These must be calls to
10681 #pragma omp simd functions, and what vectorization factor
10682 it really needs can't be determined until
10683 vectorizable_simd_clone_call. */
10684 if (dump_enabled_p ())
10685 dump_printf_loc (MSG_NOTE, vect_location,
10686 "defer to SIMD clone analysis.\n");
10687 return true;
10688 }
10689
10690 if (dump_enabled_p ())
10691 {
10692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10693 "not vectorized: irregular stmt.");
10694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10695 }
10696 return false;
10697 }
10698
10699 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10700 {
10701 if (dump_enabled_p ())
10702 {
10703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10704 "not vectorized: vector stmt in loop:");
10705 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10706 }
10707 return false;
10708 }
10709
10710 tree vectype;
10711 tree scalar_type = NULL_TREE;
10712 if (STMT_VINFO_VECTYPE (stmt_info))
10713 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10714 else
10715 {
10716 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10717 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10718 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10719 else
10720 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10721
10722 /* Pure bool ops don't participate in number-of-units computation.
10723 For comparisons use the types being compared. */
10724 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10725 && is_gimple_assign (stmt)
10726 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10727 {
10728 *stmt_vectype_out = boolean_type_node;
10729
10730 tree rhs1 = gimple_assign_rhs1 (stmt);
10731 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10732 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10733 scalar_type = TREE_TYPE (rhs1);
10734 else
10735 {
10736 if (dump_enabled_p ())
10737 dump_printf_loc (MSG_NOTE, vect_location,
10738 "pure bool operation.\n");
10739 return true;
10740 }
10741 }
10742
10743 if (dump_enabled_p ())
10744 {
10745 dump_printf_loc (MSG_NOTE, vect_location,
10746 "get vectype for scalar type: ");
10747 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10748 dump_printf (MSG_NOTE, "\n");
10749 }
10750 vectype = get_vectype_for_scalar_type (scalar_type);
10751 if (!vectype)
10752 {
10753 if (dump_enabled_p ())
10754 {
10755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10756 "not vectorized: unsupported data-type ");
10757 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10758 scalar_type);
10759 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10760 }
10761 return false;
10762 }
10763
10764 if (!*stmt_vectype_out)
10765 *stmt_vectype_out = vectype;
10766
10767 if (dump_enabled_p ())
10768 {
10769 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10770 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10771 dump_printf (MSG_NOTE, "\n");
10772 }
10773 }
10774
10775 /* Don't try to compute scalar types if the stmt produces a boolean
10776 vector; use the existing vector type instead. */
10777 tree nunits_vectype;
10778 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10779 nunits_vectype = vectype;
10780 else
10781 {
10782 /* The number of units is set according to the smallest scalar
10783 type (or the largest vector size, but we only support one
10784 vector size per vectorization). */
10785 if (*stmt_vectype_out != boolean_type_node)
10786 {
10787 HOST_WIDE_INT dummy;
10788 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10789 }
10790 if (dump_enabled_p ())
10791 {
10792 dump_printf_loc (MSG_NOTE, vect_location,
10793 "get vectype for scalar type: ");
10794 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10795 dump_printf (MSG_NOTE, "\n");
10796 }
10797 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10798 }
10799 if (!nunits_vectype)
10800 {
10801 if (dump_enabled_p ())
10802 {
10803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10804 "not vectorized: unsupported data-type ");
10805 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10806 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10807 }
10808 return false;
10809 }
10810
10811 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10812 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10813 {
10814 if (dump_enabled_p ())
10815 {
10816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10817 "not vectorized: different sized vector "
10818 "types in statement, ");
10819 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10820 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10821 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10822 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10823 }
10824 return false;
10825 }
10826
10827 if (dump_enabled_p ())
10828 {
10829 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10830 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10831 dump_printf (MSG_NOTE, "\n");
10832
10833 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10834 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10835 dump_printf (MSG_NOTE, "\n");
10836 }
10837
10838 *nunits_vectype_out = nunits_vectype;
10839 return true;
10840}
10841
10842/* Try to determine the correct vector type for STMT_INFO, which is a
10843 statement that produces a scalar boolean result. Return the vector
10844 type on success, otherwise return NULL_TREE. */
10845
10846tree
10847vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10848{
10849 gimple *stmt = stmt_info->stmt;
10850 tree mask_type = NULL;
10851 tree vectype, scalar_type;
10852
10853 if (is_gimple_assign (stmt)
10854 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10855 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10856 {
10857 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10858 mask_type = get_mask_type_for_scalar_type (scalar_type);
10859
10860 if (!mask_type)
10861 {
10862 if (dump_enabled_p ())
10863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10864 "not vectorized: unsupported mask\n");
10865 return NULL_TREE;
10866 }
10867 }
10868 else
10869 {
10870 tree rhs;
10871 ssa_op_iter iter;
1f3cb663
RS
10872 enum vect_def_type dt;
10873
10874 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10875 {
894dd753 10876 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10877 {
10878 if (dump_enabled_p ())
10879 {
10880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10881 "not vectorized: can't compute mask type "
10882 "for statement, ");
10883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10884 0);
10885 }
10886 return NULL_TREE;
10887 }
10888
10889 /* No vectype probably means external definition.
10890 Allow it in case there is another operand which
10891 allows to determine mask type. */
10892 if (!vectype)
10893 continue;
10894
10895 if (!mask_type)
10896 mask_type = vectype;
10897 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10898 TYPE_VECTOR_SUBPARTS (vectype)))
10899 {
10900 if (dump_enabled_p ())
10901 {
10902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10903 "not vectorized: different sized masks "
10904 "types in statement, ");
10905 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10906 mask_type);
10907 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10908 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10909 vectype);
10910 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10911 }
10912 return NULL_TREE;
10913 }
10914 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10915 != VECTOR_BOOLEAN_TYPE_P (vectype))
10916 {
10917 if (dump_enabled_p ())
10918 {
10919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10920 "not vectorized: mixed mask and "
10921 "nonmask vector types in statement, ");
10922 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10923 mask_type);
10924 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10925 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10926 vectype);
10927 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10928 }
10929 return NULL_TREE;
10930 }
10931 }
10932
10933 /* We may compare boolean value loaded as vector of integers.
10934 Fix mask_type in such case. */
10935 if (mask_type
10936 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10937 && gimple_code (stmt) == GIMPLE_ASSIGN
10938 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10939 mask_type = build_same_sized_truth_vector_type (mask_type);
10940 }
10941
10942 /* No mask_type should mean loop invariant predicate.
10943 This is probably a subject for optimization in if-conversion. */
10944 if (!mask_type && dump_enabled_p ())
10945 {
10946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10947 "not vectorized: can't compute mask type "
10948 "for statement, ");
10949 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10950 }
10951 return mask_type;
10952}